diff --git a/.editorconfig b/.editorconfig index 8740487fd..0a171c17a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -10,6 +10,9 @@ charset = utf-8 trim_trailing_whitespace = true insert_final_newline = true +[*.rs] +max_line_length = 100 + [*.yml] ident_size = 2 diff --git a/.gitignore b/.gitignore index c663bcca4..be40d7fec 100644 --- a/.gitignore +++ b/.gitignore @@ -17,9 +17,12 @@ env/ *.out node_modules/ *DS_Store +._* *.iml book/ # Ignore Cargo.lock in test projects examples/**/Cargo.lock tests/**/Cargo.lock + +*.lit_test_times.txt* diff --git a/Cargo.lock b/Cargo.lock index 6b0970215..479f42b0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3107,6 +3107,7 @@ dependencies = [ "miden-assembly-syntax", "miden-core", "miden-mast-package", + "miden-thiserror", ] [[package]] @@ -3528,6 +3529,7 @@ name = "midenc-dialect-scf" version = "0.8.1" dependencies = [ "bitvec", + "env_logger", "log", "midenc-dialect-arith", "midenc-dialect-cf", @@ -3627,6 +3629,7 @@ dependencies = [ "memchr", "miden-core", "miden-thiserror", + "midenc-expect-test", "midenc-hir-macros", "midenc-hir-symbol", "midenc-hir-type", diff --git a/Cargo.toml b/Cargo.toml index 98e3eff15..1a5aa8c49 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -76,6 +76,7 @@ inventory = "0.3" litcheck = { package = "litcheck-core", version = "0.4" } litcheck-filecheck = "0.4" log = { version = "0.4", features = ["kv"] } +env_logger = "0.11" # Miden Dependencies miden-assembly = { version = "0.22", default-features = false } @@ -174,9 +175,9 @@ miden-field = { version = "^0.25" } #miden-processor = { path = "../miden-vm/processor" } #miden-mast-package = { git = "https://github.com/0xMiden/miden-vm", rev = "614cd7f9b52f45238b0ab59c71ebb49325051e5d" } #miden-mast-package = { path = "../miden-vm/package" } -# miden-protocol = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } -# miden-standards = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } -# miden-tx = { tag = "v0.14.0-beta.4", git = "https://github.com/0xMiden/miden-base" } +#miden-protocol = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } +#miden-standards = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } +#miden-tx = { tag = "v0.14.0-beta.4", git = "https://github.com/0xMiden/miden-base" } [profile.dev] lto = false diff --git a/Makefile.toml b/Makefile.toml index c5128742d..732a050ed 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -436,6 +436,7 @@ dependencies = ["cargo-miden"] category = "Test" description = "Runs the lit/filecheck test suite" command = "litcheck" +env = { MIDENC_BIN_DIR = "${MIDENC_BIN_DIR}" } args = [ "lit", "run", @@ -444,7 +445,7 @@ args = [ "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/bin", "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit", ] -dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt"] +dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt", "miden-objtool"] [tasks.lit] category = "Test" @@ -455,7 +456,7 @@ args = [ "lit", "${@}", ] -dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt"] +dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt", "miden-objtool"] [tasks.litcheck] diff --git a/codegen/masm/src/emitter.rs b/codegen/masm/src/emitter.rs index 12db31481..cf05b7239 100644 --- a/codegen/masm/src/emitter.rs +++ b/codegen/masm/src/emitter.rs @@ -115,9 +115,9 @@ impl BlockEmitter<'_> { // operand stack space on operands that will never be used. //self.drop_unused_operands_at(op); - let lowering = op.as_trait::().unwrap_or_else(|| { - panic!("illegal operation: no lowering has been defined for '{}'", op.name()) - }); + let Some(lowering) = op.as_trait::() else { + panic!("illegal operation: no lowering has been defined for '{}'", op.name()); + }; // Schedule operands for this instruction lowering diff --git a/codegen/masm/src/lib.rs b/codegen/masm/src/lib.rs index 3de5df068..e07144a6f 100644 --- a/codegen/masm/src/lib.rs +++ b/codegen/masm/src/lib.rs @@ -31,7 +31,10 @@ use midenc_dialect_hir as hir; use midenc_dialect_scf as scf; use midenc_dialect_ub as ub; use midenc_dialect_wasm as wasm; -use midenc_hir::{dialects::builtin, inventory}; +use midenc_hir::{ + dialects::{builtin, debuginfo}, + inventory, +}; pub(crate) use self::lower::HirLowering; pub use self::{ @@ -62,6 +65,9 @@ inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( lower_wasm_ops )); +inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( + lower_debuginfo_ops +)); fn lower_builtin_ops(info: &mut midenc_hir::DialectInfo) { info.register_operation_trait::(); @@ -172,3 +178,9 @@ fn lower_wasm_ops(info: &mut midenc_hir::DialectInfo) { info.register_operation_trait::(); info.register_operation_trait::(); } + +fn lower_debuginfo_ops(info: &mut midenc_hir::DialectInfo) { + info.register_operation_trait::(); + info.register_operation_trait::(); + info.register_operation_trait::(); +} diff --git a/codegen/masm/src/lower/component.rs b/codegen/masm/src/lower/component.rs index c2d5566d7..341ad045b 100644 --- a/codegen/masm/src/lower/component.rs +++ b/codegen/masm/src/lower/component.rs @@ -1,10 +1,16 @@ -use alloc::{collections::BTreeSet, sync::Arc}; +use alloc::{collections::BTreeSet, sync::Arc, vec::Vec}; use miden_assembly::{PathBuf as LibraryPath, ast::InvocationTarget}; use miden_assembly_syntax::{ast::Attribute, parser::WordValue}; +use miden_core::operations::DebugVarLocation; use midenc_hir::{ FunctionIdent, Op, OpExt, SourceSpan, Span, Symbol, TraceTarget, ValueRef, - diagnostics::IntoDiagnostic, dialects::builtin, pass::AnalysisManager, + diagnostics::IntoDiagnostic, + dialects::{ + builtin, + debuginfo::attributes::{decode_frame_base_local_index, encode_frame_base_local_offset}, + }, + pass::AnalysisManager, }; use midenc_hir_analysis::analyses::LivenessAnalysis; use midenc_session::{ @@ -646,6 +652,32 @@ impl MasmFunctionBuilder { num_locals, } = self; + // Align num_locals to WORD_SIZE, matching the assembler's FMP frame sizing. + // num_locals already counts all HIR locals (including those allocated for params). + // The assembler rounds up to next_multiple_of(WORD_SIZE) when advancing FMP + // (see fmp.rs fmp_start_frame_sequence and mem_ops.rs locaddr), so we must use + // the same alignment for debug var offset computation. + let aligned_num_locals = num_locals.next_multiple_of(miden_core::WORD_SIZE as u16); + + // Resolve FrameBase global_index → Miden memory address. + // Use the stack pointer offset from the linker's global layout. + let stack_pointer_addr = link_info.globals_layout().stack_pointer_offset(); + + // Patch DebugVar Local locations to compute FMP offset. + // During lowering, Local(idx) stores the raw WASM local index. + // Now convert to FMP offset: idx - aligned_num_locals + // This matches locaddr.N which computes -(aligned_num_locals - N). + patch_debug_var_locals_in_block(&mut body, aligned_num_locals, stack_pointer_addr); + + // If a function body after lowering produces a MASM procedure with an empty body aside + // from debug decorators, then we must emit a `nop` at the end of the block which will + // act as the anchor for those decorators. Such a procedure is basically useless, as it is + // just passing through arguments as results - but the assembler currently rejects empty + // procedures (not counting decorators), so we must handle this edge case. + if !block_has_real_instructions(&body) { + body.push(masm::Op::Inst(Span::unknown(masm::Instruction::Nop))); + } + let mut procedure = masm::Procedure::new(span, visibility, name, num_locals, body); procedure.set_signature(signature); for attribute in ["auth_script", "note_script"] { @@ -660,3 +692,96 @@ impl MasmFunctionBuilder { Ok(procedure) } } + +/// Returns true if the block contains at least one real (non-decorator) instruction. +/// +/// DebugVar instructions are decorator-only and don't produce MAST nodes. If a procedure +/// body contains only DebugVar ops, the assembler will reject it. +fn block_has_real_instructions(block: &masm::Block) -> bool { + block.iter().any(|op| match op { + masm::Op::Inst(inst) => !matches!( + inst.inner(), + masm::Instruction::Debug(_) + | masm::Instruction::DebugVar(_) + | masm::Instruction::Trace(_) + ), + masm::Op::If { + then_blk, else_blk, .. + } => block_has_real_instructions(then_blk) || block_has_real_instructions(else_blk), + masm::Op::While { body, .. } => block_has_real_instructions(body), + masm::Op::Repeat { body, .. } => block_has_real_instructions(body), + }) +} + +/// Recursively patch DebugVar locations in a block. +/// +/// Converts `Local(idx)` where idx is the raw WASM local index to `Local(offset)` where +/// `offset = idx - aligned_num_locals` (the FMP-relative offset, typically negative). This matches +/// the assembler's `locaddr.N` formula, i.e. `FMP - aligned_num_locals + N`. +/// +/// Also resolves `FrameBase { global_index, byte_offset }` by replacing the WASM global index with +/// the resolved Miden memory address of the stack pointer. +fn patch_debug_var_locals_in_block( + block: &mut masm::Block, + aligned_num_locals: u16, + stack_pointer_addr: Option, +) { + for op in block.iter_mut() { + match op { + masm::Op::Inst(span_inst) => { + // Use DerefMut to get mutable access to the inner Instruction + if let masm::Instruction::DebugVar(info) = &mut **span_inst { + if let DebugVarLocation::Local(idx) = info.value_location() { + // Convert raw WASM local index to FMP offset + let fmp_offset = *idx - (aligned_num_locals as i16); + info.set_value_location(DebugVarLocation::Local(fmp_offset)); + } else if let DebugVarLocation::FrameBase { + global_index, + byte_offset, + } = info.value_location() + { + let byte_offset = *byte_offset; + if let Some(local_index) = decode_frame_base_local_index(*global_index) { + if let Ok(local_index) = i16::try_from(local_index) { + let local_offset = local_index - (aligned_num_locals as i16); + info.set_value_location(DebugVarLocation::FrameBase { + global_index: encode_frame_base_local_offset(local_offset), + byte_offset, + }); + } + } else { + // Resolve FrameBase: replace WASM global index with + // the Miden memory address of the stack pointer global. + if let Some(resolved_addr) = stack_pointer_addr { + info.set_value_location(DebugVarLocation::FrameBase { + global_index: resolved_addr, + byte_offset, + }); + } + } + } + } + } + masm::Op::If { + then_blk, else_blk, .. + } => { + patch_debug_var_locals_in_block(then_blk, aligned_num_locals, stack_pointer_addr); + patch_debug_var_locals_in_block(else_blk, aligned_num_locals, stack_pointer_addr); + } + masm::Op::While { + body: while_body, .. + } => { + patch_debug_var_locals_in_block(while_body, aligned_num_locals, stack_pointer_addr); + } + masm::Op::Repeat { + body: repeat_body, .. + } => { + patch_debug_var_locals_in_block( + repeat_body, + aligned_num_locals, + stack_pointer_addr, + ); + } + } + } +} diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index 3202ce2ec..abcbc6b40 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -6,7 +6,7 @@ use midenc_dialect_ub as ub; use midenc_dialect_wasm as wasm; use midenc_hir::{ Op, OpExt, Span, SymbolTable, Type, Value, ValueRange, ValueRef, - dialects::builtin, + dialects::{builtin, debuginfo}, traits::{BinaryOp, Commutative}, }; use midenc_session::diagnostics::{Report, Severity, Spanned}; @@ -1269,6 +1269,185 @@ impl HirLowering for arith::Split { } } +fn debug_var_location_from_expression( + expr: &midenc_hir::dialects::debuginfo::attributes::Expression, + value: Option, + emitter: &BlockEmitter<'_>, +) -> Option { + use miden_core::{Felt, operations::DebugVarLocation, serde::Serializable}; + use midenc_hir::dialects::debuginfo::attributes::ExpressionOp; + + match expr.operations.as_slice() { + [] => value + .as_ref() + .and_then(|value| emitter.stack.find(value)) + .map(|pos| emitter.stack.effective_index(pos) as u8) + .map(DebugVarLocation::Stack), + [first] => match first { + ExpressionOp::WasmStack(offset) => Some(DebugVarLocation::Stack(*offset as u8)), + ExpressionOp::WasmLocal(idx) => { + // WASM locals are always stored in memory via FMP in Miden. + // Store raw WASM local index; the FMP offset will be computed later in + // MasmFunctionBuilder::build() when num_locals is known. + i16::try_from(*idx).ok().map(DebugVarLocation::Local) + } + ExpressionOp::WasmGlobal(_) | ExpressionOp::Deref => value + .as_ref() + .and_then(|value| emitter.stack.find(value)) + .map(|pos| emitter.stack.effective_index(pos) as u8) + .map(DebugVarLocation::Stack), + ExpressionOp::ConstU64(val) => Some(DebugVarLocation::Const(Felt::new(*val))), + ExpressionOp::ConstS64(val) => Some(DebugVarLocation::Const(Felt::new(*val as u64))), + ExpressionOp::FrameBase { + global_index, + byte_offset, + } => Some(DebugVarLocation::FrameBase { + global_index: *global_index, + byte_offset: *byte_offset, + }), + _ => value + .as_ref() + .and_then(|value| emitter.stack.find(value)) + .map(|pos| emitter.stack.effective_index(pos) as u8) + .map(DebugVarLocation::Stack), + }, + _ => Some(DebugVarLocation::Expression(expr.to_bytes())), + } +} + +fn apply_debug_var_metadata( + debug_var: &mut miden_core::operations::DebugVarInfo, + var: &midenc_hir::dialects::debuginfo::attributes::Variable, +) { + // Set arg_index if this is a parameter + if let Some(arg_index) = var.arg_index { + debug_var.set_arg_index(arg_index + 1); // Convert to 1-based + } + + // Set source location + if let Some(line) = core::num::NonZeroU32::new(var.line) { + use miden_assembly::debuginfo::{ColumnNumber, FileLineCol, LineNumber, Uri}; + let uri = Uri::new(var.file.as_str()); + let file_line_col = FileLineCol::new( + uri, + LineNumber::new(line.get()).unwrap_or_default(), + var.column.and_then(ColumnNumber::new).unwrap_or_default(), + ); + debug_var.set_location(file_line_col); + } +} + +impl HirLowering for debuginfo::DebugValue { + fn schedule_operands(&self, _emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + // Debug value operations are purely observational — they do not consume their + // operand from the stack. Skip operand scheduling entirely; the emit() method + // will look up the value's current stack position (if any) on its own. + Ok(()) + } + + fn required_operands(&self) -> ValueRange<'_, 4> { + // No operands need to be scheduled on the stack for debug ops. + ValueRange::Empty + } + + fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + use miden_core::operations::DebugVarInfo; + use midenc_hir::dialects::debuginfo::attributes::ExpressionOp; + + // Get the variable info + let var = self.variable(); + + // Build the DebugVarLocation from DIExpression + let expr = self.expression(); + let value = self.value().as_value_ref(); + + // If the value is not on the stack and there's no expression info, + // skip emitting this debug info (the value has been optimized away) + let has_location_expr = expr.operations.first().is_some_and(|op| { + matches!( + op, + ExpressionOp::WasmStack(_) + | ExpressionOp::WasmLocal(_) + | ExpressionOp::ConstU64(_) + | ExpressionOp::ConstS64(_) + | ExpressionOp::FrameBase { .. } + ) + }); + if !has_location_expr && emitter.stack.find(&value).is_none() { + // Value has been dropped and we have no other location info, skip + return Ok(()); + } + // Resolve the runtime location. Returns None when the location cannot be determined, in + // which case we skip the decorator rather than emitting a placeholder. + let value_location = + debug_var_location_from_expression(expr.as_value(), Some(value), emitter); + + let Some(value_location) = value_location else { + return Ok(()); + }; + + let mut debug_var = DebugVarInfo::new(var.name.to_string(), value_location); + apply_debug_var_metadata(&mut debug_var, var.as_value()); + + // Emit the instruction + let inst = masm::Instruction::DebugVar(debug_var); + emitter.emit_op(masm::Op::Inst(Span::new(self.span(), inst))); + + Ok(()) + } +} + +impl HirLowering for debuginfo::DebugDeclare { + fn schedule_operands(&self, _emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + Ok(()) + } + + fn required_operands(&self) -> ValueRange<'_, 4> { + ValueRange::Empty + } + + fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + use miden_core::operations::DebugVarInfo; + + let var = self.variable(); + let expr = self.expression(); + + let Some(value_location) = + debug_var_location_from_expression(expr.as_value(), None, emitter) + else { + return Ok(()); + }; + + let mut debug_var = DebugVarInfo::new(var.name.to_string(), value_location); + apply_debug_var_metadata(&mut debug_var, var.as_value()); + + let inst = masm::Instruction::DebugVar(debug_var); + emitter.emit_op(masm::Op::Inst(Span::new(self.span(), inst))); + + Ok(()) + } +} + +impl HirLowering for debuginfo::DebugKill { + fn schedule_operands(&self, _emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + // Debug value operations are purely observational — they do not consume their + // operand from the stack. Skip operand scheduling entirely; the emit() method + // will look up the value's current stack position (if any) on its own. + Ok(()) + } + + fn required_operands(&self) -> ValueRange<'_, 4> { + // No operands need to be scheduled on the stack for debug ops. + ValueRange::Empty + } + + fn emit(&self, _emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + // TODO(pauls): Either add new decorator, or emit a special trace event for kills, and + // map debug variable name to the event out of band + Ok(()) + } +} + impl HirLowering for builtin::GlobalSymbol { fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { let context = self.as_operation().context(); diff --git a/dialects/scf/Cargo.toml b/dialects/scf/Cargo.toml index 1adab716c..e96f131e0 100644 --- a/dialects/scf/Cargo.toml +++ b/dialects/scf/Cargo.toml @@ -28,3 +28,4 @@ bitvec.workspace = true # NOTE: Use local paths for dev-only dependency to avoid relying on crates.io during packaging midenc-expect-test = { path = "../../tools/expect-test" } midenc-hir = { path = "../../hir", features = ["logging"] } +env_logger.workspace = true diff --git a/dialects/scf/src/canonicalization/if_remove_unused_results.rs b/dialects/scf/src/canonicalization/if_remove_unused_results.rs index eaec0cc43..a1a802182 100644 --- a/dialects/scf/src/canonicalization/if_remove_unused_results.rs +++ b/dialects/scf/src/canonicalization/if_remove_unused_results.rs @@ -74,7 +74,7 @@ impl RewritePattern for IfRemoveUnusedResults { .results() .iter() .copied() - .filter(|result| result.borrow().is_used()) + .filter(|result| result.borrow().has_real_uses()) .collect::>(); // Replace the operation if only a subset of its results have uses. diff --git a/dialects/scf/src/canonicalization/while_remove_unused_args.rs b/dialects/scf/src/canonicalization/while_remove_unused_args.rs index 52da21c80..cb6766c02 100644 --- a/dialects/scf/src/canonicalization/while_remove_unused_args.rs +++ b/dialects/scf/src/canonicalization/while_remove_unused_args.rs @@ -46,7 +46,13 @@ impl RewritePattern for WhileRemoveUnusedArgs { return Ok(false); }; - if while_op.before().entry().arguments().iter().all(|arg| arg.borrow().is_used()) { + if while_op + .before() + .entry() + .arguments() + .iter() + .all(|arg| arg.borrow().has_real_uses()) + { // All the arguments are used (nothing to remove) return Ok(false); } @@ -67,7 +73,7 @@ impl RewritePattern for WhileRemoveUnusedArgs { let before_arg = before_arg.borrow(); let yield_value = yield_op.yielded()[i]; let init_value = while_op.inits()[i]; - if before_arg.is_used() { + if before_arg.has_real_uses() { args_to_erase.push(false); new_yields.push(yield_value.borrow().as_value_ref()); new_inits.push(init_value.borrow().as_value_ref()); diff --git a/dialects/scf/src/canonicalization/while_unused_result.rs b/dialects/scf/src/canonicalization/while_unused_result.rs index 4892bc334..f4eee480e 100644 --- a/dialects/scf/src/canonicalization/while_unused_result.rs +++ b/dialects/scf/src/canonicalization/while_unused_result.rs @@ -107,7 +107,7 @@ impl RewritePattern for WhileUnusedResult { let after_arg = after_args[i]; let term_arg = forwarded[i]; - if !result.is_used() && !after_arg.borrow().is_used() { + if !result.has_real_uses() && !after_arg.borrow().has_real_uses() { need_update = true; } else { new_results_indices.push(i); diff --git a/dialects/scf/src/transforms/cfg_to_scf.rs b/dialects/scf/src/transforms/cfg_to_scf.rs index d029ad68c..db1a32629 100644 --- a/dialects/scf/src/transforms/cfg_to_scf.rs +++ b/dialects/scf/src/transforms/cfg_to_scf.rs @@ -837,4 +837,69 @@ mod tests { Ok(()) } + + /// This test verifies that `debuginfo.debug_value` operations are preserved through the + /// CF-to-SCF transformation. The key behavior being tested is that `replace_all_uses_with` + /// (used internally by the transform to replace block arguments with `scf.if` results) + /// automatically updates the SSA operands of debug value ops. + #[test] + fn cfg_to_scf_debug_value_preservation() -> Result<(), Report> { + use midenc_hir::{ + dialects::debuginfo::{DIBuilder, DebugInfoDialect, attributes::Variable}, + interner::Symbol, + }; + + let mut test = Test::new("cfg_to_scf_debug_value_preservation", &[Type::U32], &[Type::U32]); + test.context().get_or_register_dialect::(); + + let span = SourceSpan::default(); + let mut builder = test.function_builder(); + + let if_is_zero = builder.create_block(); + let if_is_nonzero = builder.create_block(); + let exit_block = builder.create_block(); + let return_val = builder.append_block_param(exit_block, Type::U32, span); + + let block = builder.current_block(); + let input = block.borrow().arguments()[0].upcast(); + + let input_var = + Variable::new(Symbol::intern("input"), Symbol::intern("test.rs"), 1, Some(1)); + let result_var = + Variable::new(Symbol::intern("result"), Symbol::intern("test.rs"), 2, Some(1)); + + let zero = builder.u32(0, span); + let is_zero = builder.eq(input, zero, span)?; + builder.builder_mut().debug_value(input, input_var.clone(), span)?; + builder.cond_br(is_zero, if_is_zero, [], if_is_nonzero, [], span)?; + + builder.switch_to_block(if_is_zero); + let a = builder.incr(input, span)?; + builder.builder_mut().debug_value(a, result_var.clone(), span)?; + builder.br(exit_block, [a], span)?; + + builder.switch_to_block(if_is_nonzero); + let b = builder.mul(input, input, span)?; + builder.builder_mut().debug_value(b, result_var.clone(), span)?; + builder.br(exit_block, [b], span)?; + + builder.switch_to_block(exit_block); + // KEY: this debug_value uses the block argument `return_val`, which will be + // replaced by the scf.if result via replace_all_uses_with + builder.builder_mut().debug_value(return_val, result_var.clone(), span)?; + builder.ret(Some(return_val), span)?; + + let operation = test.function().as_operation_ref(); + + let input_ir = format!("{}", &operation.borrow()); + expect_file!["expected/cfg_to_scf_debug_value_preservation_before.hir"] + .assert_eq(&input_ir); + + test.apply_pass::(true)?; + + let output = format!("{}", &operation.borrow()); + expect_file!["expected/cfg_to_scf_debug_value_preservation_after.hir"].assert_eq(&output); + + Ok(()) + } } diff --git a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir new file mode 100644 index 000000000..be95193fb --- /dev/null +++ b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir @@ -0,0 +1,16 @@ +builtin.function public extern("C") @cfg_to_scf_debug_value_preservation(%0: u32) -> u32 { + %2 = arith.constant 0 : u32; + %3 = arith.eq %0, %2; + di.debug_value %0 <{ variable = #di.variable<{ name = "input", file = "test.rs", line = 1, column = 1 }>, expression = #di.expression<[]> }> : (u32); + %8 = scf.if %3 then { + %4 = arith.incr %0; + di.debug_value %4 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); + scf.yield %4 : (u32); + } else { + %5 = arith.mul %0, %0 <{ overflow = #builtin.overflow }>; + di.debug_value %5 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); + scf.yield %5 : (u32); + } : (i1) -> (u32); + di.debug_value %8 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); + builtin.ret %8 : (u32); +}; \ No newline at end of file diff --git a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir new file mode 100644 index 000000000..fd85982ed --- /dev/null +++ b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir @@ -0,0 +1,17 @@ +builtin.function public extern("C") @cfg_to_scf_debug_value_preservation(%0: u32) -> u32 { + %2 = arith.constant 0 : u32; + %3 = arith.eq %0, %2; + di.debug_value %0 <{ variable = #di.variable<{ name = "input", file = "test.rs", line = 1, column = 1 }>, expression = #di.expression<[]> }> : (u32); + cf.cond_br %3 ^block1, ^block2 : (i1); +^block1: + %4 = arith.incr %0; + di.debug_value %4 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); + cf.br ^block3:(%4); +^block2: + %5 = arith.mul %0, %0 <{ overflow = #builtin.overflow }>; + di.debug_value %5 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); + cf.br ^block3:(%5); +^block3(%1: u32): + di.debug_value %1 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); + builtin.ret %1 : (u32); +}; \ No newline at end of file diff --git a/eval/src/eval.rs b/eval/src/eval.rs index f4d745505..9b5f16d8e 100644 --- a/eval/src/eval.rs +++ b/eval/src/eval.rs @@ -12,7 +12,8 @@ use midenc_dialect_wasm::{self as wasm}; use midenc_hir::{ AttributeRef, Felt, Immediate, ImmediateAttr, Op, OperationRef, Overflow, RegionBranchPoint, RegionBranchTerminatorOpInterface, Report, SmallVec, SourceSpan, Spanned, SuccessorInfo, Type, - Value as _, ValueRange, dialects::builtin, + Value as _, ValueRange, + dialects::{builtin, debuginfo}, }; use midenc_session::diagnostics::Severity; @@ -106,6 +107,13 @@ impl Eval for ub::Unreachable { } } +// Debug info operations are purely observational and have no runtime semantics. +impl Eval for debuginfo::DebugValue { + fn eval(&self, _evaluator: &mut HirEvaluator) -> Result { + Ok(ControlFlowEffect::None) + } +} + impl Eval for ub::Poison { fn eval(&self, evaluator: &mut HirEvaluator) -> Result { let value = match self.value().as_immediate() { diff --git a/eval/src/lib.rs b/eval/src/lib.rs index 990b01f30..f7eeda3ea 100644 --- a/eval/src/lib.rs +++ b/eval/src/lib.rs @@ -19,7 +19,10 @@ use midenc_dialect_hir as hir; use midenc_dialect_scf as scf; use midenc_dialect_ub as ub; use midenc_dialect_wasm as wasm; -use midenc_hir::{dialects::builtin, inventory}; +use midenc_hir::{ + dialects::{builtin, debuginfo}, + inventory, +}; pub use self::{ eval::{ControlFlowEffect, Eval, Initialize}, @@ -48,6 +51,9 @@ inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( eval_wasm_dialect )); +inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( + eval_debuginfo_dialect +)); fn eval_builtin_dialect(info: &mut ::midenc_hir::DialectInfo) { info.register_operation_trait::(); @@ -155,3 +161,7 @@ fn eval_wasm_dialect(info: &mut ::midenc_hir::DialectInfo) { info.register_operation_trait::(); info.register_operation_trait::(); } + +fn eval_debuginfo_dialect(info: &mut ::midenc_hir::DialectInfo) { + info.register_operation_trait::(); +} diff --git a/frontend/wasm/src/code_translator/mod.rs b/frontend/wasm/src/code_translator/mod.rs index 4cef29cbc..acbb4d0cd 100644 --- a/frontend/wasm/src/code_translator/mod.rs +++ b/frontend/wasm/src/code_translator/mod.rs @@ -57,6 +57,8 @@ pub fn translate_operator( diagnostics: &DiagnosticsHandler, span: SourceSpan, ) -> WasmResult<()> { + builder.record_debug_span(span); + if !state.reachable { translate_unreachable_operator(op, builder, state, mod_types, diagnostics, span)?; return Ok(()); @@ -94,6 +96,7 @@ pub fn translate_operator( val }; builder.store_local(local, val, span)?; + builder.emit_dbg_value_for_var(var, val, span); } Operator::LocalTee { local_index } => { let var = Variable::from_u32(*local_index); @@ -114,6 +117,7 @@ pub fn translate_operator( val }; builder.store_local(local, val, span)?; + builder.emit_dbg_value_for_var(var, val, span); } /********************************** Globals ****************************************/ Operator::GlobalGet { global_index } => { diff --git a/frontend/wasm/src/component/lift_exports.rs b/frontend/wasm/src/component/lift_exports.rs index 60ac3cb10..2ddb961fc 100644 --- a/frontend/wasm/src/component/lift_exports.rs +++ b/frontend/wasm/src/component/lift_exports.rs @@ -7,9 +7,12 @@ use midenc_frontend_wasm_metadata::ProtocolExportKind; use midenc_hir::{ FunctionType, Ident, Op, OpExt, SmallVec, SourceSpan, SymbolPath, ValueRange, ValueRef, Visibility, - dialects::builtin::{ - BuiltinOpBuilder, ComponentBuilder, ModuleBuilder, - attributes::{Signature, UnitAttr}, + dialects::{ + builtin::{ + BuiltinOpBuilder, ComponentBuilder, ModuleBuilder, + attributes::{Signature, UnitAttr}, + }, + debuginfo::attributes::{CompileUnit, CompileUnitAttr, Subprogram, SubprogramAttr}, }, }; use midenc_session::{DiagnosticsHandler, diagnostics::Severity}; @@ -25,11 +28,18 @@ use crate::{ }, }; +struct ComponentExportMetadata<'a> { + ty: &'a FunctionType, + param_names: &'a [String], + protocol_export_kind: Option, +} + /// Generates a lifted component export wrapper around a lowered core Wasm export. pub fn generate_export_lifting_function( component_builder: &mut ComponentBuilder, export_func_name: &str, export_func_ty: FunctionType, + export_param_names: &[String], core_export_func_path: SymbolPath, protocol_export_kind: Option, diagnostics: &DiagnosticsHandler, @@ -55,6 +65,11 @@ pub fn generate_export_lifting_function( } let export_func_ident = Ident::new(export_func_name.to_string().into(), SourceSpan::default()); + let export_metadata = ComponentExportMetadata { + ty: &export_func_ty, + param_names: export_param_names, + protocol_export_kind, + }; let core_export_module_path = core_export_func_path.without_leaf(); let core_module_ref = component_builder @@ -77,22 +92,21 @@ pub fn generate_export_lifting_function( generate_lifting_with_transformation( component_builder, export_func_ident, - &export_func_ty, + &export_metadata, cross_ctx_export_sig_flat, core_export_func_ref, core_export_func_sig, &core_export_func_path, - protocol_export_kind, diagnostics, )?; } else { generate_direct_lifting( component_builder, export_func_ident, + &export_metadata, core_export_func_ref, core_export_func_sig, cross_ctx_export_sig_flat, - protocol_export_kind, )?; } @@ -132,12 +146,11 @@ pub fn generate_export_lifting_function( fn generate_lifting_with_transformation( component_builder: &mut ComponentBuilder, export_func_ident: Ident, - export_func_ty: &FunctionType, + export_metadata: &ComponentExportMetadata<'_>, cross_ctx_export_sig_flat: Signature, core_export_func_ref: midenc_hir::dialects::builtin::FunctionRef, core_export_func_sig: Signature, core_export_func_path: &SymbolPath, - protocol_export_kind: Option, diagnostics: &DiagnosticsHandler, ) -> WasmResult<()> { assert_eq!( @@ -154,7 +167,7 @@ fn generate_lifting_with_transformation( // Extract flattened result types from the exported component-level function type let context = { core_export_func_ref.borrow().as_operation().context_rc() }; - let flattened_results = flatten_types(&context, &export_func_ty.results).map_err(|e| { + let flattened_results = flatten_types(&context, &export_metadata.ty.results).map_err(|e| { let message = format!( "Failed to flatten result types for exported function {core_export_func_path}: {e}" ); @@ -176,7 +189,13 @@ fn generate_lifting_with_transformation( }; let export_func_ref = component_builder.define_function(export_func_ident, Visibility::Public, new_func_sig)?; - annotate_protocol_export(export_func_ref, protocol_export_kind); + annotate_protocol_export(export_func_ref, export_metadata.protocol_export_kind); + annotate_component_export_debug_signature( + export_func_ref, + export_func_ident.name.as_str(), + export_metadata.ty, + export_metadata.param_names, + ); let (span, context) = { let export_func = export_func_ref.borrow(); @@ -216,11 +235,11 @@ fn generate_lifting_with_transformation( // Load results using the recursive function from canon_abi_utils assert_eq!( - export_func_ty.results.len(), + export_metadata.ty.results.len(), 1, "expected a single result in the component-level export function" ); - let result_type = &export_func_ty.results[0]; + let result_type = &export_metadata.ty.results[0]; load(&mut fb, result_ptr, result_type, &mut return_values, span)?; @@ -273,17 +292,23 @@ fn generate_lifting_with_transformation( fn generate_direct_lifting( component_builder: &mut ComponentBuilder, export_func_ident: Ident, + export_metadata: &ComponentExportMetadata<'_>, core_export_func_ref: midenc_hir::dialects::builtin::FunctionRef, core_export_func_sig: Signature, cross_ctx_export_sig_flat: Signature, - protocol_export_kind: Option, ) -> WasmResult<()> { let export_func_ref = component_builder.define_function( export_func_ident, Visibility::Public, cross_ctx_export_sig_flat.clone(), )?; - annotate_protocol_export(export_func_ref, protocol_export_kind); + annotate_protocol_export(export_func_ref, export_metadata.protocol_export_kind); + annotate_component_export_debug_signature( + export_func_ref, + export_func_ident.name.as_str(), + export_metadata.ty, + export_metadata.param_names, + ); let (span, context) = { let export_func = export_func_ref.borrow(); @@ -349,3 +374,39 @@ fn annotate_protocol_export( None => {} } } + +fn annotate_component_export_debug_signature( + mut export_func_ref: midenc_hir::dialects::builtin::FunctionRef, + export_func_name: &str, + export_func_ty: &FunctionType, + export_param_names: &[String], +) { + let context = { + let export_func = export_func_ref.borrow(); + export_func.as_operation().context_rc() + }; + + let file = midenc_hir::interner::Symbol::intern(""); + let mut compile_unit = CompileUnit::new(midenc_hir::interner::Symbol::intern("wit"), file); + compile_unit.producer = Some(midenc_hir::interner::Symbol::intern("midenc-frontend-wasm")); + + let param_names = export_param_names + .iter() + .map(|name| midenc_hir::interner::Symbol::intern(name.as_str())); + let subprogram = + Subprogram::new(midenc_hir::interner::Symbol::intern(export_func_name), file, 1, Some(1)) + .with_function_type(FunctionType { + abi: export_func_ty.abi, + params: export_func_ty.params.clone(), + results: export_func_ty.results.clone(), + }) + .with_param_names(param_names); + + let cu_attr = context.create_attribute::(compile_unit).as_attribute_ref(); + let sp_attr = context.create_attribute::(subprogram).as_attribute_ref(); + + let mut export_func = export_func_ref.borrow_mut(); + let op = export_func.as_operation_mut(); + op.set_attribute("di.compile_unit", cu_attr); + op.set_attribute("di.subprogram", sp_attr); +} diff --git a/frontend/wasm/src/component/translator.rs b/frontend/wasm/src/component/translator.rs index c65110229..82739fd60 100644 --- a/frontend/wasm/src/component/translator.rs +++ b/frontend/wasm/src/component/translator.rs @@ -479,6 +479,7 @@ impl<'a> ComponentTranslator<'a> { let type_func_idx = types.convert_component_func_type(frame.types, canon_lift.ty).unwrap(); let component_types = types.resources_mut_and_types().1; + let type_func = component_types[type_func_idx].clone(); let func_ty = convert_lifted_func_ty(CanonicalAbiMode::Export, &type_func_idx, component_types); let core_export_func_path = self.core_module_export_func_path(frame, canon_lift); @@ -491,6 +492,7 @@ impl<'a> ComponentTranslator<'a> { &mut self.result, name, func_ty, + &type_func.param_names, core_export_func_path, protocol_export_kind, self.context.diagnostics(), @@ -688,6 +690,7 @@ impl<'a> ComponentTranslator<'a> { TypeDef::ComponentInstance(type_component_instance_idx) => type_component_instance_idx, _ => panic!("expected component instance"), }; + types.register_component_instance_export_type_names(ty, Some(name.0)); frame .component_instances .push(ComponentInstanceDef::Import(ComponentInstanceImport { diff --git a/frontend/wasm/src/component/types/mod.rs b/frontend/wasm/src/component/types/mod.rs index 1562cc890..8f3e2e6f5 100644 --- a/frontend/wasm/src/component/types/mod.rs +++ b/frontend/wasm/src/component/types/mod.rs @@ -282,6 +282,7 @@ pub struct ComponentTypes { options: PrimaryMap, results: PrimaryMap, resource_tables: PrimaryMap, + interface_type_names: FxHashMap, module_types: ModuleTypes, } @@ -325,6 +326,10 @@ impl ComponentTypes { InterfaceType::Result(i) => &self[*i].abi, } } + + pub fn interface_type_name(&self, ty: &InterfaceType) -> Option<&str> { + self.interface_type_names.get(ty).map(String::as_str) + } } macro_rules! impl_index { @@ -473,6 +478,7 @@ impl ComponentTypesBuilder { id: component_types::ComponentFuncTypeId, ) -> Result { let ty = &types[id]; + let param_names = ty.params.iter().map(|(name, _ty)| name.to_string()).collect(); let params = ty .params .iter() @@ -485,10 +491,53 @@ impl ComponentTypesBuilder { let ty = TypeFunc { params: self.new_tuple_type(params), results: self.new_tuple_type(results), + param_names, }; Ok(self.add_func_type(ty)) } + pub fn register_component_instance_export_type_names( + &mut self, + instance_idx: TypeComponentInstanceIndex, + namespace: Option<&str>, + ) { + let exports = self.component_types[instance_idx] + .exports + .iter() + .map(|(name, ty)| (name.clone(), *ty)) + .collect::>(); + + for (name, ty) in exports { + let qualified_name = namespace + .filter(|namespace| !namespace.is_empty()) + .map(|namespace| format!("{}/{}", namespace.trim_end_matches('/'), name)) + .unwrap_or(name); + self.register_type_name(ty, qualified_name); + } + } + + fn register_type_name(&mut self, ty: TypeDef, name: String) { + match ty { + TypeDef::Interface(interface_ty) => { + self.component_types.interface_type_names.entry(interface_ty).or_insert(name); + } + TypeDef::ComponentInstance(instance_idx) => { + self.register_component_instance_export_type_names(instance_idx, Some(&name)); + } + TypeDef::Component(component_idx) => { + let exports = self.component_types[component_idx] + .exports + .iter() + .map(|(export_name, ty)| (export_name.clone(), *ty)) + .collect::>(); + for (export_name, ty) in exports { + self.register_type_name(ty, format!("{}/{}", name, export_name)); + } + } + TypeDef::ComponentFunc(_) | TypeDef::Module(_) | TypeDef::Resource(_) => {} + } + } + /// Converts a wasmparser `ComponentEntityType` pub fn convert_component_entity_type( &mut self, @@ -1005,6 +1054,8 @@ pub struct TypeFunc { pub params: TypeTupleIndex, /// Results of the function represented as a tuple. pub results: TypeTupleIndex, + /// Source/component names of the parameters, in declaration order. + pub param_names: Box<[String]>, } /// All possible interface types that values can have. @@ -1756,11 +1807,15 @@ pub fn interface_type_to_ir( InterfaceType::String => todo!(), InterfaceType::ErrorContext => todo!("the async proposal is not currently supported"), InterfaceType::Record(idx) => { - let tys = component_types.records[*idx] - .fields - .iter() - .map(|f| interface_type_to_ir(&f.ty, component_types)); - midenc_hir::Type::from(midenc_hir::StructType::new(tys)) + let fields = component_types.records[*idx].fields.iter().map(|f| { + (Arc::::from(f.name.as_str()), interface_type_to_ir(&f.ty, component_types)) + }); + let struct_ty = if let Some(name) = component_types.interface_type_name(ty) { + midenc_hir::StructType::named(Arc::from(name), fields) + } else { + midenc_hir::StructType::new(fields) + }; + midenc_hir::Type::from(struct_ty) } // TODO: This is a stub to make `enum` in WIT generation work. Use proper type when ready. InterfaceType::Variant(_) => midenc_hir::Type::U32, diff --git a/frontend/wasm/src/module/build_ir.rs b/frontend/wasm/src/module/build_ir.rs index 1c055a288..b6aa8d39d 100644 --- a/frontend/wasm/src/module/build_ir.rs +++ b/frontend/wasm/src/module/build_ir.rs @@ -14,7 +14,8 @@ use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic, Severity, use wasmparser::Validator; use super::{ - MemoryIndex, module_translation_state::ModuleTranslationState, types::ModuleTypesBuilder, + MemoryIndex, debug_info::collect_function_debug_info, + module_translation_state::ModuleTranslationState, types::ModuleTypesBuilder, }; use crate::{ WasmTranslationConfig, @@ -116,6 +117,18 @@ pub fn build_ir_module( ..Default::default() }) .into_diagnostic()?; + parsed_module.function_debug = if context.session().options.emit_debug_decorators() { + collect_function_debug_info( + parsed_module, + module_types, + &parsed_module.module, + &addr2line, + context.diagnostics(), + ) + } else { + Default::default() + }; + let mut func_translator = FuncTranslator::new(context.clone()); // Although this renders this parsed module invalid(without function // bodies), we don't support multiple module instances. Thus, this @@ -188,8 +201,12 @@ pub fn build_ir_module( continue; } - let FunctionBodyData { validator, body } = body_data; + let FunctionBodyData { + validator, body, .. + } = body_data; let mut func_validator = validator.into_validator(Default::default()); + let debug_info = parsed_module.function_debug.get(&func_index).cloned(); + func_translator.translate_body( &body, function_ref, @@ -200,6 +217,7 @@ pub fn build_ir_module( context.session(), &mut func_validator, _config, + debug_info, )?; } Ok(()) diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs new file mode 100644 index 000000000..541e1a8fd --- /dev/null +++ b/frontend/wasm/src/module/debug_info.rs @@ -0,0 +1,979 @@ +use alloc::{rc::Rc, vec::Vec}; +use core::cell::RefCell; +use std::path::Path; + +use addr2line::Context; +use cranelift_entity::EntityRef; +use gimli::{self, AttributeValue, read::Operation}; +use log::debug; +use midenc_hir::{ + FxHashMap, SourceSpan, + dialects::debuginfo::attributes::{ + CompileUnit, Expression, ExpressionOp, Subprogram, Variable, encode_frame_base_local_index, + }, + interner::Symbol, +}; +use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic}; + +use super::{ + FuncIndex, Module, + module_env::{DwarfReader, FunctionBodyData, ParsedModule}, + types::{WasmFuncType, convert_valtype, ir_type}, +}; +use crate::module::types::ModuleTypesBuilder; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LocationDescriptor { + /// Inclusive start offset within the function's code, relative to the Wasm code section. + pub start: u64, + /// Exclusive end offset. `None` indicates the location is valid until the end of the function. + pub end: Option, + pub storage: Expression, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum VariableStorage { + Local(u32), + Global(u32), + Stack(u32), + ConstU64(u64), + /// Frame base + byte offset — from DW_OP_fbreg. + /// + /// For Wasm-global frame bases, `global_index` is the Wasm global index. + /// For Wasm-local frame bases, it is encoded with + /// `encode_frame_base_local_index`. + FrameBase { + global_index: u32, + byte_offset: i64, + }, + Unsupported, +} + +impl VariableStorage { + pub fn as_local(&self) -> Option { + match self { + VariableStorage::Local(index) => Some(*index), + _ => None, + } + } + + pub fn to_expression_op(&self) -> ExpressionOp { + match self { + VariableStorage::Local(idx) => ExpressionOp::WasmLocal(*idx), + VariableStorage::Global(idx) => ExpressionOp::WasmGlobal(*idx), + VariableStorage::Stack(idx) => ExpressionOp::WasmStack(*idx), + VariableStorage::ConstU64(val) => ExpressionOp::ConstU64(*val), + VariableStorage::FrameBase { + global_index, + byte_offset, + } => ExpressionOp::FrameBase { + global_index: *global_index, + byte_offset: *byte_offset, + }, + VariableStorage::Unsupported => { + ExpressionOp::Unsupported(Symbol::intern("unsupported")) + } + } + } +} + +#[derive(Clone)] +pub struct LocalDebugInfo { + pub attr: Variable, + pub locations: Vec, + pub expression: Option, +} + +#[derive(Clone)] +pub struct FunctionDebugInfo { + pub compile_unit: CompileUnit, + pub subprogram: Subprogram, + pub locals: Vec>, + pub function_span: Option, + pub location_schedule: Vec, + pub next_location_event: usize, +} + +#[derive(Default, Clone)] +struct DwarfLocalData { + name: Option, + decl_file: Option, + locations: Vec, + decl_line: Option, + decl_column: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LocationScheduleEntry { + pub offset: u64, + pub var_index: usize, + pub storage: Expression, +} + +impl FunctionDebugInfo { + pub fn local_attr(&self, index: usize) -> Option<&Variable> { + self.locals.get(index).and_then(|info| info.as_ref().map(|data| &data.attr)) + } +} + +pub fn collect_function_debug_info( + parsed_module: &ParsedModule, + module_types: &ModuleTypesBuilder, + module: &Module, + addr2line: &Context>, + diagnostics: &DiagnosticsHandler, +) -> FxHashMap>> { + let mut map = FxHashMap::default(); + + let collected = collect_dwarf_local_data(parsed_module, module, diagnostics); + + debug!( + "Collecting function debug info for {} functions", + parsed_module.function_body_inputs.len() + ); + + for (defined_idx, body) in parsed_module.function_body_inputs.iter() { + let func_index = module.func_index(defined_idx); + let func_name = module.func_name(func_index); + if let Some(info) = build_function_debug_info( + parsed_module, + module_types, + module, + func_index, + body, + addr2line, + diagnostics, + collected.by_local.get(&func_index), + collected.frame_base.get(&func_index), + ) { + debug!( + "Collected debug info for function {}: {} locals", + func_name.as_str(), + info.locals.len() + ); + map.insert(func_index, Rc::new(RefCell::new(info))); + } else { + debug!("No debug info collected for function {}", func_name.as_str()); + } + } + + debug!("Collected debug info for {} functions total", map.len()); + map +} + +#[allow(clippy::too_many_arguments)] +fn build_function_debug_info( + parsed_module: &ParsedModule, + module_types: &ModuleTypesBuilder, + module: &Module, + func_index: FuncIndex, + body: &FunctionBodyData, + addr2line: &Context>, + diagnostics: &DiagnosticsHandler, + dwarf_locals: Option<&FxHashMap>, + frame_base_vars: Option<&Vec>, +) -> Option { + let func_name = module.func_name(func_index); + + let (file_symbol, directory_symbol) = determine_file_symbols(parsed_module, addr2line, body); + let (line, column) = determine_location(addr2line, body.body_offset); + + let mut compile_unit = CompileUnit::new(Symbol::intern("wasm"), file_symbol); + compile_unit.directory = directory_symbol; + compile_unit.producer = Some(Symbol::intern("midenc-frontend-wasm")); + + let mut subprogram = Subprogram::new(func_name, compile_unit.file, line, column); + subprogram.is_definition = true; + + let wasm_signature = module_types[module.functions[func_index].signature].clone(); + let locals = build_local_debug_info( + module, + func_index, + &wasm_signature, + body, + &subprogram, + diagnostics, + dwarf_locals, + frame_base_vars, + ); + let location_schedule = build_location_schedule(&locals); + + Some(FunctionDebugInfo { + compile_unit, + subprogram, + locals, + function_span: None, + location_schedule, + next_location_event: 0, + }) +} + +fn determine_file_symbols( + parsed_module: &ParsedModule, + addr2line: &Context>, + body: &FunctionBodyData, +) -> (Symbol, Option) { + if let Some(location) = addr2line + .find_location(body.body_offset) + .ok() + .flatten() + .and_then(|loc| loc.file.map(|file| file.to_owned())) + { + let path = Path::new(location.as_str()); + let directory_symbol = path.parent().and_then(|parent| parent.to_str()).map(Symbol::intern); + let file_symbol = Symbol::intern(location.as_str()); + (file_symbol, directory_symbol) + } else if let Some(path) = parsed_module.wasm_file.path.as_ref() { + let file_symbol = Symbol::intern(path.to_string_lossy().as_ref()); + let directory_symbol = path.parent().and_then(|parent| parent.to_str()).map(Symbol::intern); + (file_symbol, directory_symbol) + } else { + (Symbol::intern("unknown"), None) + } +} + +fn determine_location(addr2line: &Context>, offset: u64) -> (u32, Option) { + match addr2line.find_location(offset).ok().flatten() { + Some(location) => { + let line = location.line.unwrap_or_default(); + let column = location.column; + (line, column) + } + None => (0, None), + } +} + +#[allow(clippy::too_many_arguments)] +fn build_local_debug_info( + module: &Module, + func_index: FuncIndex, + wasm_signature: &WasmFuncType, + body: &FunctionBodyData, + subprogram: &Subprogram, + diagnostics: &DiagnosticsHandler, + dwarf_locals: Option<&FxHashMap>, + frame_base_vars: Option<&Vec>, +) -> Vec> { + let param_count = wasm_signature.params().len(); + let mut local_entries = Vec::new(); + if let Ok(mut locals_reader) = body.body.get_locals_reader().into_diagnostic() { + let decl_count = locals_reader.get_count(); + for _ in 0..decl_count { + if let Ok((count, ty)) = locals_reader.read().into_diagnostic() { + local_entries.push((count, ty)); + } + } + } + let local_count: usize = local_entries.iter().map(|(count, _)| *count as usize).sum(); + + let total = param_count + local_count; + let mut locals = vec![None; total]; + let has_dwarf_locals = dwarf_locals.is_some_and(|locals| !locals.is_empty()) + || frame_base_vars.is_some_and(|locals| !locals.is_empty()); + + for (param_idx, wasm_ty) in wasm_signature.params().iter().enumerate() { + let index_u32 = param_idx as u32; + let dwarf_entry = dwarf_locals.and_then(|map| map.get(&index_u32)); + let mut name_symbol = module + .local_name(func_index, index_u32) + .unwrap_or_else(|| Symbol::intern(format!("arg{param_idx}"))); + if let Some(info) = dwarf_entry + && let Some(symbol) = info.name + { + name_symbol = symbol; + } + let mut attr = + Variable::new(name_symbol, subprogram.file, subprogram.line, subprogram.column); + attr.arg_index = Some(param_idx as u32); + if let Ok(ty) = ir_type(*wasm_ty, diagnostics) { + attr.ty = Some(ty); + } + let dwarf_info = dwarf_entry.cloned(); + if let Some(info) = dwarf_info.as_ref() { + if let Some(file) = info.decl_file { + attr.file = file; + } + if let Some(line) = info.decl_line + && line != 0 + { + attr.line = line; + } + if info.decl_column.is_some() { + attr.column = info.decl_column; + } + } + let locations = dwarf_info.as_ref().map(|info| info.locations.clone()).unwrap_or_default(); + + // Create expression from the first location if available + let expression = if !locations.is_empty() { + Some(locations[0].storage.clone()) + } else { + None + }; + + locals[param_idx] = Some(LocalDebugInfo { + attr, + locations, + expression, + }); + } + + let mut next_local_index = param_count; + for (count, ty) in local_entries { + for _ in 0..count { + let index_u32 = next_local_index as u32; + let dwarf_entry = dwarf_locals.and_then(|map| map.get(&index_u32)); + let local_name = module.local_name(func_index, index_u32); + if has_dwarf_locals && dwarf_entry.is_none() && local_name.is_none() { + next_local_index += 1; + continue; + } + + let mut name_symbol = + local_name.unwrap_or_else(|| Symbol::intern(format!("local{next_local_index}"))); + if let Some(info) = dwarf_entry + && let Some(symbol) = info.name + { + name_symbol = symbol; + } + let mut attr = + Variable::new(name_symbol, subprogram.file, subprogram.line, subprogram.column); + let wasm_ty = convert_valtype(ty); + if let Ok(ir_ty) = ir_type(wasm_ty, diagnostics) { + attr.ty = Some(ir_ty); + } + let dwarf_info = dwarf_entry.cloned(); + if let Some(info) = dwarf_info.as_ref() { + if let Some(file) = info.decl_file { + attr.file = file; + } + if let Some(line) = info.decl_line + && line != 0 + { + attr.line = line; + } + if info.decl_column.is_some() { + attr.column = info.decl_column; + } + } + let locations = + dwarf_info.as_ref().map(|info| info.locations.clone()).unwrap_or_default(); + + // Create expression from the first location if available + let expression = if !locations.is_empty() { + Some(locations[0].storage.clone()) + } else { + None + }; + + locals[next_local_index] = Some(LocalDebugInfo { + attr, + locations, + expression, + }); + next_local_index += 1; + } + } + + // Append FrameBase-only variables beyond normal WASM locals. + // These are variables like local `sum` in debug builds that live in + // linear memory via __stack_pointer and have no WASM local index. + if let Some(fb_vars) = frame_base_vars { + for fb_var in fb_vars { + let name = fb_var.name.unwrap_or_else(|| Symbol::intern("?")); + let mut attr = Variable::new(name, subprogram.file, subprogram.line, subprogram.column); + if let Some(file) = fb_var.decl_file { + attr.file = file; + } + if let Some(line) = fb_var.decl_line.filter(|l| *l != 0) { + attr.line = line; + } + attr.column = fb_var.decl_column; + let expression = if !fb_var.locations.is_empty() { + Some(fb_var.locations[0].storage.clone()) + } else { + None + }; + locals.push(Some(LocalDebugInfo { + attr, + locations: fb_var.locations.clone(), + expression, + })); + } + } + + locals +} + +fn build_location_schedule(locals: &[Option]) -> Vec { + let mut schedule = Vec::new(); + for (var_index, info_opt) in locals.iter().enumerate() { + let Some(info) = info_opt else { + continue; + }; + for descriptor in &info.locations { + if descriptor.storage.operations.len() == 1 + && !matches!( + &descriptor.storage.operations[0], + ExpressionOp::WasmLocal(_) | ExpressionOp::FrameBase { .. }, + ) + { + continue; + } + schedule.push(LocationScheduleEntry { + offset: descriptor.start, + var_index, + storage: descriptor.storage.clone(), + }); + } + } + schedule.sort_by_key(|entry| entry.offset); + schedule +} + +/// Collected DWARF local data for all functions. +struct CollectedDwarfLocals { + /// Variables keyed by WASM local index (existing behavior). + by_local: FxHashMap>, + /// FrameBase-only variables that have no WASM local index (e.g. `sum` in debug builds). + frame_base: FxHashMap>, +} + +fn collect_dwarf_local_data( + parsed_module: &ParsedModule, + module: &Module, + diagnostics: &DiagnosticsHandler, +) -> CollectedDwarfLocals { + let _ = diagnostics; + let dwarf = &parsed_module.debuginfo.dwarf; + + let mut func_by_name = FxHashMap::default(); + for (func_index, _) in module.functions.iter() { + let name = module.func_name(func_index).as_str().to_owned(); + func_by_name.insert(name, func_index); + } + + let mut low_pc_map = FxHashMap::default(); + let code_section_offset = parsed_module.wasm_file.code_section_offset; + for (defined_idx, body) in parsed_module.function_body_inputs.iter() { + let func_index = module.func_index(defined_idx); + let adjusted = body.body_offset.saturating_sub(code_section_offset); + low_pc_map.insert(adjusted, func_index); + } + + let mut results: FxHashMap> = FxHashMap::default(); + let mut fb_results: FxHashMap> = FxHashMap::default(); + let mut units = dwarf.units(); + loop { + let header = match units.next() { + Ok(Some(header)) => header, + Ok(None) => break, + Err(err) => { + debug!("failed to iterate DWARF units: {err:?}"); + break; + } + }; + let unit = match dwarf.unit(header) { + Ok(unit) => unit, + Err(err) => { + debug!("failed to load DWARF unit: {err:?}"); + continue; + } + }; + + let mut entries = unit.entries(); + loop { + let entry = match entries.next_dfs() { + Ok(Some(data)) => data, + Ok(None) => break, + Err(err) => { + debug!("error while traversing DWARF entries: {err:?}"); + break; + } + }; + + if entry.tag() == gimli::DW_TAG_subprogram { + let Some(info) = + resolve_subprogram_target(dwarf, &unit, &func_by_name, &low_pc_map, entry) + else { + continue; + }; + + if let Err(err) = collect_subprogram_variables( + dwarf, + &unit, + entry.offset(), + info.func_index, + info.low_pc, + info.high_pc, + info.frame_base_global, + &mut results, + &mut fb_results, + ) { + debug!( + "failed to gather variables for function {:?}: {err:?}", + info.func_index + ); + } + } + } + } + + CollectedDwarfLocals { + by_local: results, + frame_base: fb_results, + } +} + +/// Result of resolving a DWARF subprogram to a WASM function. +struct SubprogramInfo { + func_index: FuncIndex, + low_pc: u64, + high_pc: Option, + /// The encoded WASM location used as the frame base (from DW_AT_frame_base). + /// Plain values are Wasm globals; values encoded with + /// `encode_frame_base_local_index` are Wasm locals. + frame_base_global: Option, +} + +fn resolve_subprogram_target>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + func_by_name: &FxHashMap, + low_pc_map: &FxHashMap, + entry: &gimli::DebuggingInformationEntry, +) -> Option { + let mut maybe_name: Option = None; + let mut low_pc = None; + let mut high_pc = None; + let mut frame_base_global = None; + + for attr in entry.attrs() { + match attr.name() { + gimli::DW_AT_name => { + if let Ok(raw) = dwarf.attr_string(unit, attr.value()) + && let Ok(name) = raw.to_string_lossy() + { + maybe_name = Some(name.into_owned()); + } + } + gimli::DW_AT_linkage_name => { + if maybe_name.is_none() + && let Ok(raw) = dwarf.attr_string(unit, attr.value()) + && let Ok(name) = raw.to_string_lossy() + { + maybe_name = Some(name.into_owned()); + } + } + gimli::DW_AT_low_pc => match attr.value() { + AttributeValue::Addr(addr) => low_pc = Some(addr), + AttributeValue::Udata(val) => low_pc = Some(val), + _ => {} + }, + gimli::DW_AT_high_pc => match attr.value() { + AttributeValue::Addr(addr) => high_pc = Some(addr), + AttributeValue::Udata(size) => { + if let Some(base) = low_pc { + high_pc = Some(base.saturating_add(size)); + } + } + _ => {} + }, + gimli::DW_AT_frame_base => { + // Decode the frame base expression. Rust-generated Wasm commonly + // uses a generated Wasm local as the frame pointer; globals are + // still supported for producers that use __stack_pointer directly. + if let AttributeValue::Exprloc(expr) = attr.value() { + let mut ops = expr.operations(unit.encoding()); + while let Ok(Some(op)) = ops.next() { + match op { + Operation::WasmLocal { index } => { + frame_base_global = encode_frame_base_local_index(index); + } + Operation::WasmGlobal { index } => { + frame_base_global = Some(index); + } + _ => {} + } + } + } + } + _ => {} + } + } + + let make_info = |func_index, lp, hp| SubprogramInfo { + func_index, + low_pc: lp, + high_pc: hp, + frame_base_global, + }; + + if let Some(ref name) = maybe_name + && let Some(&func_index) = func_by_name.get(name) + { + return Some(make_info(func_index, low_pc.unwrap_or_default(), high_pc)); + } + + if let Some(base) = low_pc + && let Some(&func_index) = low_pc_map.get(&base) + { + return Some(make_info(func_index, base, high_pc)); + } + None +} + +#[allow(clippy::too_many_arguments)] +fn collect_subprogram_variables>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + offset: gimli::UnitOffset, + func_index: FuncIndex, + low_pc: u64, + high_pc: Option, + frame_base_global: Option, + results: &mut FxHashMap>, + fb_results: &mut FxHashMap>, +) -> gimli::Result<()> { + let mut tree = unit.entries_tree(Some(offset))?; + let root = tree.root()?; + let mut children = root.children(); + let mut param_counter: u32 = 0; + while let Some(child) = children.next()? { + walk_variable_nodes( + dwarf, + unit, + child, + func_index, + low_pc, + high_pc, + frame_base_global, + results, + fb_results, + &mut param_counter, + )?; + } + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +fn walk_variable_nodes>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + node: gimli::EntriesTreeNode, + func_index: FuncIndex, + low_pc: u64, + high_pc: Option, + frame_base_global: Option, + results: &mut FxHashMap>, + fb_results: &mut FxHashMap>, + param_counter: &mut u32, +) -> gimli::Result<()> { + let entry = node.entry(); + let tag = entry.tag(); + match tag { + gimli::DW_TAG_formal_parameter | gimli::DW_TAG_variable => { + // For formal parameters, the WASM local index equals the parameter + // order (params are always the first N WASM locals). + let fallback_index = if tag == gimli::DW_TAG_formal_parameter { + let idx = *param_counter; + *param_counter += 1; + Some(idx) + } else { + None + }; + let mut fb_vars = Vec::new(); + if let Some((local_index, mut data)) = decode_variable_entry( + dwarf, + unit, + entry, + low_pc, + high_pc, + frame_base_global, + fallback_index, + &mut fb_vars, + )? { + let local_map = results.entry(func_index).or_default(); + let entry = local_map.entry(local_index).or_insert_with(DwarfLocalData::default); + entry.name = entry.name.or(data.name); + entry.decl_file = entry.decl_file.or(data.decl_file); + entry.decl_line = entry.decl_line.or(data.decl_line); + entry.decl_column = entry.decl_column.or(data.decl_column); + if !data.locations.is_empty() { + entry.locations.append(&mut data.locations); + } + } + if !fb_vars.is_empty() { + fb_results.entry(func_index).or_default().extend(fb_vars); + } + } + _ => {} + } + + let mut children = node.children(); + while let Some(child) = children.next()? { + walk_variable_nodes( + dwarf, + unit, + child, + func_index, + low_pc, + high_pc, + frame_base_global, + results, + fb_results, + param_counter, + )?; + } + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +fn decode_variable_entry>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry, + low_pc: u64, + high_pc: Option, + frame_base_global: Option, + fallback_index: Option, + frame_base_vars: &mut Vec, +) -> gimli::Result> { + let mut name_symbol = None; + let mut decl_file = None; + let mut location_attr = None; + let mut decl_line = None; + let mut decl_column = None; + + for attr in entry.attrs() { + match attr.name() { + gimli::DW_AT_name => { + if let Ok(raw) = dwarf.attr_string(unit, attr.value()) + && let Ok(text) = raw.to_string_lossy() + { + name_symbol = Some(Symbol::intern(text.as_ref())); + } + } + gimli::DW_AT_location => location_attr = Some(attr.value()), + gimli::DW_AT_decl_file => { + if let Some(file_index) = attr.udata_value() { + decl_file = resolve_decl_file(dwarf, unit, file_index); + } + } + gimli::DW_AT_decl_line => { + if let Some(line) = attr.udata_value() { + decl_line = Some(line as u32); + } + } + gimli::DW_AT_decl_column => { + if let Some(column) = attr.udata_value() { + decl_column = Some(column as u32); + } + } + _ => {} + } + } + + let Some(location_value) = location_attr else { + return Ok(None); + }; + + let mut locations = Vec::new(); + + match location_value { + AttributeValue::Exprloc(ref expr) => { + let storage = decode_storage_from_expression(expr, unit, frame_base_global)?; + if let Some(storage) = storage { + // Determine the WASM local index for this variable. + // For WasmLocal storage, use the index directly. + // For FrameBase (DW_OP_fbreg), use the parameter order as + // fallback since formal params map to WASM locals 0..N. + let local_index = match storage.operations.as_slice() { + [ExpressionOp::WasmLocal(index)] => Some(*index), + _ => fallback_index, + }; + if let Some(local_index) = local_index { + locations.push(LocationDescriptor { + start: low_pc, + end: high_pc, + storage, + }); + let data = DwarfLocalData { + name: name_symbol, + decl_file, + locations, + decl_line, + decl_column, + }; + return Ok(Some((local_index, data))); + } else if matches!(storage.operations.as_slice(), [ExpressionOp::FrameBase { .. }]) + { + // FrameBase-only variable (no WASM local index, e.g. local `sum` + // in debug builds). Collect separately instead of dropping. + locations.push(LocationDescriptor { + start: low_pc, + end: high_pc, + storage, + }); + let data = DwarfLocalData { + name: name_symbol, + decl_file, + locations, + decl_line, + decl_column, + }; + frame_base_vars.push(data); + return Ok(None); + } + } + return Ok(None); + } + AttributeValue::LocationListsRef(offset) => { + let mut iter = dwarf.locations.locations( + offset, + unit.encoding(), + low_pc, + &dwarf.debug_addr, + unit.addr_base, + )?; + let mut has_frame_base = false; + while let Some(entry) = iter.next()? { + let storage_expr = entry.data; + if let Some(storage) = + decode_storage_from_expression(&storage_expr, unit, frame_base_global)? + && matches!( + storage.operations.as_slice(), + [ExpressionOp::WasmLocal(_) | ExpressionOp::FrameBase { .. }] + ) + { + if matches!(storage.operations.as_slice(), [ExpressionOp::FrameBase { .. }]) { + has_frame_base = true; + } + locations.push(LocationDescriptor { + start: entry.range.begin, + end: Some(entry.range.end), + storage, + }); + } + } + if locations.is_empty() { + return Ok(None); + } + // Try to find a WASM local index from any location descriptor + if let Some(local_index) = + locations.iter().find_map(|desc| match desc.storage.operations.as_slice() { + [ExpressionOp::WasmLocal(index)] => Some(*index), + _ => None, + }) + { + let data = DwarfLocalData { + name: name_symbol, + decl_file, + locations, + decl_line, + decl_column, + }; + return Ok(Some((local_index, data))); + } else if has_frame_base { + // FrameBase-only location list variable + let data = DwarfLocalData { + name: name_symbol, + decl_file, + locations, + decl_line, + decl_column, + }; + frame_base_vars.push(data); + return Ok(None); + } + return Ok(None); + } + _ => {} + } + + Ok(None) +} + +fn resolve_decl_file>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + file_index: u64, +) -> Option { + let line_program = unit.line_program.as_ref()?; + let header = line_program.header(); + let file = header.file(file_index)?; + let raw = dwarf.attr_string(unit, file.path_name()).ok()?; + let path = raw.to_string_lossy().ok()?; + Some(Symbol::intern(path.as_ref())) +} + +fn decode_storage_from_expression>( + expr: &gimli::Expression, + unit: &gimli::Unit, + frame_base_global: Option, +) -> gimli::Result> { + let mut operations = expr.clone().operations(unit.encoding()); + let mut storage = vec![]; + while let Some(op) = operations.next()? { + match op { + Operation::WasmLocal { index } => storage.push(ExpressionOp::WasmLocal(index)), + Operation::WasmGlobal { index } => storage.push(ExpressionOp::WasmGlobal(index)), + Operation::WasmStack { index } => storage.push(ExpressionOp::WasmStack(index)), + Operation::UnsignedConstant { value } => { + storage.push(ExpressionOp::ConstU64(value)); + } + Operation::SignedConstant { value } => { + storage.push(ExpressionOp::ConstS64(value)); + } + Operation::PlusConstant { value } => { + storage.push(ExpressionOp::PlusUConst(value)); + } + Operation::StackValue => { + storage.push(ExpressionOp::StackValue); + } + Operation::FrameOffset { offset } => { + // DW_OP_fbreg(offset): variable is at frame_base + offset in WASM linear memory. + // The frame base is a WASM global (typically __stack_pointer = global 0). + if let Some(global_index) = frame_base_global { + storage.push(ExpressionOp::FrameBase { + global_index, + byte_offset: offset, + }); + } + } + Operation::Address { address } => { + storage.push(ExpressionOp::Address { address }); + } + Operation::Piece { + size_in_bits, + bit_offset, + } => { + storage.push(ExpressionOp::BitPiece { + size: size_in_bits, + offset: bit_offset.unwrap_or_default(), + }); + } + Operation::Register { .. } => { + storage.push(ExpressionOp::Unsupported(Symbol::intern("DW_OP_breg(N)"))); + } + Operation::RegisterOffset { .. } => { + storage.push(ExpressionOp::Unsupported(Symbol::intern("DW_OP_bregx"))); + } + op => { + log::trace!(target: "dwarf", "unhandled expression op {op:?}"); + // Bail if we observe unhandled ops, as we cannot properly represent the expression + return Ok(None); + } + } + } + + if storage.is_empty() { + Ok(None) + } else { + Ok(Some(Expression::with_ops(storage))) + } +} + +fn func_local_index(func_index: FuncIndex, module: &Module) -> Option { + module.defined_func_index(func_index).map(|idx| idx.index()) +} diff --git a/frontend/wasm/src/module/func_translation_state.rs b/frontend/wasm/src/module/func_translation_state.rs index bb0f96113..b0826a2e1 100644 --- a/frontend/wasm/src/module/func_translation_state.rs +++ b/frontend/wasm/src/module/func_translation_state.rs @@ -5,13 +5,16 @@ //! //! Based on Cranelift's Wasm -> CLIF translator v11.0.0 +use alloc::rc::Rc; +use core::cell::RefCell; + use midenc_dialect_hir::HirOpBuilder; use midenc_hir::{ BlockRef, Builder, OperationRef, SourceSpan, Type, ValueRef, dialects::builtin::attributes::Signature, }; -use super::function_builder_ext::FunctionBuilderExt; +use super::{debug_info::FunctionDebugInfo, function_builder_ext::FunctionBuilderExt}; use crate::{error::WasmResult, module::types::BlockType}; /// Information about the presence of an associated `else` for an `if`, or the @@ -232,6 +235,8 @@ pub struct FuncTranslationState { /// Is the current translation state still reachable? This is false when translating operators /// like End, Return, or Unreachable. pub(crate) reachable: bool, + /// Optional debug metadata for the current function. + pub(crate) debug_info: Option>>, } impl FuncTranslationState { @@ -241,6 +246,7 @@ impl FuncTranslationState { stack: Vec::new(), control_stack: Vec::new(), reachable: true, + debug_info: None, } } @@ -248,6 +254,7 @@ impl FuncTranslationState { debug_assert!(self.stack.is_empty()); debug_assert!(self.control_stack.is_empty()); self.reachable = true; + self.debug_info = None; } /// Initialize the state for compiling a function with the given signature. @@ -259,6 +266,10 @@ impl FuncTranslationState { self.push_block(exit_block, 0, sig.results().len()); } + pub(crate) fn set_debug_info(&mut self, info: Option>>) { + self.debug_info = info; + } + /// Push a value. pub(crate) fn push1(&mut self, val: ValueRef) { self.stack.push(val); diff --git a/frontend/wasm/src/module/func_translator.rs b/frontend/wasm/src/module/func_translator.rs index 7693431b9..9a23fe69e 100644 --- a/frontend/wasm/src/module/func_translator.rs +++ b/frontend/wasm/src/module/func_translator.rs @@ -10,7 +10,7 @@ use std::{cell::RefCell, rc::Rc}; use cranelift_entity::EntityRef; use midenc_hir::{ - BlockRef, Builder, Context, Op, + BlockRef, Builder, Context, Op, Type, diagnostics::{ColumnNumber, LineNumber}, dialects::builtin::{BuiltinOpBuilder, FunctionRef}, }; @@ -21,8 +21,9 @@ use midenc_session::{ use wasmparser::{FuncValidator, FunctionBody, WasmModuleResources}; use super::{ - function_builder_ext::SSABuilderListener, module_env::ParsedModule, - module_translation_state::ModuleTranslationState, types::ModuleTypesBuilder, + debug_info::FunctionDebugInfo, function_builder_ext::SSABuilderListener, + module_env::ParsedModule, module_translation_state::ModuleTranslationState, + types::ModuleTypesBuilder, }; use crate::{ code_translator::translate_operator, @@ -69,12 +70,22 @@ impl FuncTranslator { session: &Session, func_validator: &mut FuncValidator, config: &crate::WasmTranslationConfig, + debug_info: Option>>, ) -> WasmResult<()> { let context = func.borrow().as_operation().context_rc(); let mut op_builder = midenc_hir::OpBuilder::new(context) .with_listener(SSABuilderListener::new(self.func_ctx.clone())); let mut builder = FunctionBuilderExt::new(func, &mut op_builder); + // Keep a clone for FrameBase variable declaration below + let debug_info_ref = debug_info.clone(); + + if let Some(info) = debug_info.clone() { + builder.set_debug_metadata(info); + } + + self.state.set_debug_info(debug_info); + let entry_block = builder.current_block(); builder.seal_block(entry_block); // Declare all predecessors known. @@ -91,7 +102,7 @@ impl FuncTranslator { let mut reader = body.get_locals_reader().into_diagnostic()?; - parse_local_decls( + let total_wasm_vars = parse_local_decls( &mut reader, &mut builder, num_params, @@ -99,6 +110,20 @@ impl FuncTranslator { &session.diagnostics, )?; + // Declare extra SSA variables for FrameBase-only debug entries (e.g. local `sum` + // in debug builds that lives in linear memory, not a WASM local). + // Use declare_var_only to avoid allocating HIR locals that would inflate + // num_locals and corrupt FMP offset calculations. + if let Some(info) = debug_info_ref.as_ref() { + let locals_len = info.borrow().locals.len(); + if locals_len > total_wasm_vars { + for idx in total_wasm_vars..locals_len { + let var = Variable::new(idx); + builder.declare_var_only(var, Type::I32); + } + } + } + let mut reader = body.get_operators_reader().into_diagnostic()?; parse_function_body( &mut reader, @@ -136,6 +161,7 @@ fn declare_parameters( let param_value = entry_block.borrow().arguments()[i]; builder.def_var(var, param_value); + builder.register_parameter(var, param_value); builder.store_local(local, param_value, SourceSpan::UNKNOWN).unwrap(); } next_local @@ -144,13 +170,14 @@ fn declare_parameters( /// Parse the local variable declarations that precede the function body. /// /// Declare local variables, starting from `num_params`. +/// Returns the total number of declared variables (params + locals). fn parse_local_decls( reader: &mut wasmparser::LocalsReader<'_>, builder: &mut FunctionBuilderExt<'_, B>, num_params: usize, validator: &mut FuncValidator, diagnostics: &DiagnosticsHandler, -) -> WasmResult<()> { +) -> WasmResult { let mut next_local = num_params; let local_count = reader.get_count(); @@ -161,7 +188,7 @@ fn parse_local_decls( declare_locals(builder, count, ty, &mut next_local, diagnostics)?; } - Ok(()) + Ok(next_local) } /// Declare `count` local variables of the same type, starting from `next_local`. @@ -313,6 +340,8 @@ fn parse_function_body( &session.diagnostics, effective_span, )?; + + builder.apply_location_schedule(offset, span); } // The final `End` operator left us in the exit block where we need to manually add a return diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index eee1e55ce..4887046a9 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -1,23 +1,35 @@ -use alloc::rc::Rc; +use alloc::{rc::Rc, vec::Vec}; use core::cell::RefCell; +use std::path::Path; -use cranelift_entity::SecondaryMap; +use cranelift_entity::{EntityRef as _, SecondaryMap}; +use log::warn; use midenc_dialect_arith::ArithOpBuilder; use midenc_dialect_cf::ControlFlowOpBuilder; use midenc_dialect_hir::HirOpBuilder; use midenc_dialect_ub::UndefinedBehaviorOpBuilder; use midenc_dialect_wasm::WasmOpBuilder; use midenc_hir::{ - BlockRef, Builder, Context, EntityRef, FxHashMap, FxHashSet, Ident, Listener, ListenerType, + BlockRef, Builder, Context, EntityRef, FxHashMap, FxHashSet, Ident, Listener, ListenerType, Op, OpBuilder, OperationRef, ProgramPoint, RegionRef, SmallVec, SourceSpan, Type, ValueRef, - dialects::builtin::{ - BuiltinOpBuilder, FunctionBuilder, FunctionRef, - attributes::{LocalVariable, Signature}, + dialects::{ + builtin::{ + BuiltinOpBuilder, FunctionBuilder, FunctionRef, + attributes::{LocalVariable, Signature}, + }, + debuginfo::{ + DIBuilder, + attributes::{CompileUnitAttr, Expression, ExpressionOp, SubprogramAttr}, + }, }, + interner::Symbol, traits::{BranchOpInterface, Terminator}, }; -use crate::ssa::{SSABuilder, SideEffects, Variable}; +use crate::{ + module::debug_info::{FunctionDebugInfo, LocationScheduleEntry}, + ssa::{SSABuilder, SideEffects, Variable}, +}; /// Tracking variables and blocks for SSA construction. pub struct FunctionBuilderContext { @@ -127,6 +139,14 @@ impl Listener for SSABuilderListener { pub struct FunctionBuilderExt<'c, B: ?Sized + Builder> { inner: FunctionBuilder<'c, B>, func_ctx: Rc>, + debug_info: Option>>, + param_values: Vec<(Variable, ValueRef)>, + param_dbg_emitted: bool, + /// Set of variables that have been defined via def_var. Used by + /// apply_location_schedule to avoid calling try_use_var on undefined + /// variables, which would insert block parameters as a side effect and + /// corrupt the CFG. + defined_vars: alloc::collections::BTreeSet, } impl<'c> FunctionBuilderExt<'c, OpBuilder> { @@ -136,11 +156,205 @@ impl<'c> FunctionBuilderExt<'c, OpBuilder> { let inner = FunctionBuilder::new(func, builder); - Self { inner, func_ctx } + Self { + inner, + func_ctx, + debug_info: None, + param_values: Vec::new(), + param_dbg_emitted: false, + defined_vars: alloc::collections::BTreeSet::new(), + } } } impl FunctionBuilderExt<'_, B> { + const DI_COMPILE_UNIT_ATTR: &'static str = "di.compile_unit"; + const DI_SUBPROGRAM_ATTR: &'static str = "di.subprogram"; + + pub fn set_debug_metadata(&mut self, info: Rc>) { + self.debug_info = Some(info); + self.param_dbg_emitted = false; + self.refresh_function_debug_attrs(); + } + + pub fn emit_dbg_value_for_var(&mut self, var: Variable, value: ValueRef, span: SourceSpan) { + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let idx = var.index(); + let (attr_opt, expr_opt) = { + let info = info.borrow(); + let local_info = info.locals.get(idx).and_then(|l| l.as_ref()); + match local_info { + Some(l) => (Some(l.attr.clone()), l.expression.clone()), + None => (None, None), + } + }; + let Some(mut attr) = attr_opt else { + return; + }; + + if let Some((file_symbol, _directory, line, column)) = self.span_to_location(span) { + attr.file = file_symbol; + if line != 0 { + attr.line = line; + } + attr.column = column; + } + + // If DWARF didn't provide a location expression, synthesize one from the + // wasm local index — we know this variable is stored as a wasm local. + let expr = expr_opt.or_else(|| { + let ops = vec![ExpressionOp::WasmLocal(idx as u32)]; + Some(Expression::with_ops(ops)) + }); + + if let Err(err) = + DIBuilder::builder_mut(self).debug_value_with_expr(value, attr, expr, span) + { + warn!("failed to emit dbg.value for local {idx}: {err:?}"); + } + } + + pub fn def_var_with_dbg(&mut self, var: Variable, val: ValueRef, span: SourceSpan) { + self.def_var(var, val); + self.emit_dbg_value_for_var(var, val, span); + } + + pub fn register_parameter(&mut self, var: Variable, value: ValueRef) { + self.param_values.push((var, value)); + } + + pub fn record_debug_span(&mut self, span: SourceSpan) { + if span == SourceSpan::UNKNOWN { + return; + } + let Some(info_rc) = self.debug_info.as_ref() else { + return; + }; + + if let Some((file_symbol, directory_symbol, line, column)) = self.span_to_location(span) { + { + let mut info = info_rc.borrow_mut(); + info.compile_unit.file = file_symbol; + info.compile_unit.directory = directory_symbol; + info.subprogram.file = file_symbol; + info.subprogram.line = line; + info.subprogram.column = column; + info.function_span.get_or_insert(span); + } + self.refresh_function_debug_attrs(); + self.emit_parameter_dbg_if_needed(span); + } + } + + pub fn apply_location_schedule(&mut self, offset: u64, span: SourceSpan) { + let Some(info_rc) = self.debug_info.as_ref() else { + return; + }; + + let updates = { + let mut info = info_rc.borrow_mut(); + let mut pending = Vec::new(); + while info.next_location_event < info.location_schedule.len() { + let entry = &info.location_schedule[info.next_location_event]; + if entry.offset > offset { + break; + } + pending.push(entry.clone()); + info.next_location_event += 1; + } + pending + }; + + for entry in updates { + self.emit_scheduled_dbg_value(entry, span); + } + } + + fn emit_scheduled_dbg_value(&mut self, entry: LocationScheduleEntry, span: SourceSpan) { + // Skip variables already emitted as parameters to avoid duplicates. + if self.param_dbg_emitted + && self.param_values.iter().any(|(v, _)| v.index() == entry.var_index) + { + return; + } + + // Only emit debug values for variables that have already been defined. + // Calling try_use_var on an undefined variable would insert block + // parameters (phis) as a side effect, corrupting the CFG. + let is_frame_base = + matches!(entry.storage.operations.as_slice(), [ExpressionOp::FrameBase { .. }]); + if !is_frame_base && !self.defined_vars.contains(&(entry.var_index as u32)) { + return; + } + + let var = Variable::new(entry.var_index); + let is_defined = self.defined_vars.contains(&(entry.var_index as u32)); + if !is_defined && is_frame_base { + self.emit_scheduled_dbg_declare(entry, span); + return; + } + + let value = if is_defined { + match self.try_use_var(var) { + Ok(v) => v, + Err(_) => { + return; + } + } + } else { + return; + }; + + // Create expression from the scheduled location + let expression = if entry.storage.is_empty() { + None + } else { + Some(entry.storage) + }; + + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let idx = entry.var_index; + let attr_opt = { + let info = info.borrow(); + info.local_attr(idx).cloned() + }; + let Some(attr) = attr_opt else { + return; + }; + + if let Err(err) = + DIBuilder::builder_mut(self).debug_value_with_expr(value, attr, expression, span) + { + warn!("failed to emit scheduled dbg.value for local {idx}: {err:?}"); + } + } + + fn emit_scheduled_dbg_declare(&mut self, entry: LocationScheduleEntry, span: SourceSpan) { + if entry.storage.is_empty() { + return; + } + + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let idx = entry.var_index; + let attr_opt = { + let info = info.borrow(); + info.local_attr(idx).cloned() + }; + let Some(attr) = attr_opt else { + return; + }; + + if let Err(err) = DIBuilder::builder_mut(self).debug_declare(attr, entry.storage, span) { + warn!("failed to emit scheduled dbg.declare for local {idx}: {err:?}"); + } + } + pub fn name(&self) -> Ident { *self.inner.func.borrow().get_name() } @@ -308,6 +522,19 @@ impl FunctionBuilderExt<'_, B> { local } + /// Declare an SSA variable without allocating an HIR local. + /// + /// Used for FrameBase-only debug variables that live in linear memory + /// and don't need a real function-local storage slot. This avoids + /// inflating `num_locals` which would corrupt FMP offset calculations. + pub fn declare_var_only(&mut self, var: Variable, ty: Type) { + let mut ctx = self.func_ctx.borrow_mut(); + if ctx.types[var] != Type::Unknown { + return; // Already declared + } + ctx.types[var] = ty; + } + /// Declares the type of a variable, so that it can be used later (by calling /// [`FunctionBuilderExt::use_var`]). This function will return an error if the variable /// has been previously declared. @@ -367,12 +594,16 @@ impl FunctionBuilderExt<'_, B> { /// an error if the value supplied does not match the type the variable was /// declared to have. pub fn try_def_var(&mut self, var: Variable, val: ValueRef) -> Result<(), DefVariableError> { - let mut func_ctx = self.func_ctx.borrow_mut(); - let var_ty = func_ctx.types.get(var).ok_or(DefVariableError::DefinedBeforeDeclared(var))?; - if var_ty != val.borrow().ty() { - return Err(DefVariableError::TypeMismatch(var, val)); + { + let mut func_ctx = self.func_ctx.borrow_mut(); + let var_ty = + func_ctx.types.get(var).ok_or(DefVariableError::DefinedBeforeDeclared(var))?; + if var_ty != val.borrow().ty() { + return Err(DefVariableError::TypeMismatch(var, val)); + } + func_ctx.ssa.def_var(var, val, self.inner.current_block()); } - func_ctx.ssa.def_var(var, val, self.inner.current_block()); + self.defined_vars.insert(var.index() as u32); Ok(()) } @@ -437,6 +668,59 @@ impl FunctionBuilderExt<'_, B> { inst_branch.change_branch_destination(old_block, new_block); self.func_ctx.borrow_mut().ssa.declare_block_predecessor(new_block, branch_inst); } + + fn refresh_function_debug_attrs(&mut self) { + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let info = info.borrow(); + let context = self.inner.builder().context_rc(); + let cu_attr = context + .create_attribute::(info.compile_unit.clone()) + .as_attribute_ref(); + let sp_attr = context + .create_attribute::(info.subprogram.clone()) + .as_attribute_ref(); + let mut func = self.inner.func.borrow_mut(); + let op = func.as_operation_mut(); + op.set_attribute(Self::DI_COMPILE_UNIT_ATTR, cu_attr); + op.set_attribute(Self::DI_SUBPROGRAM_ATTR, sp_attr); + } + + fn emit_parameter_dbg_if_needed(&mut self, span: SourceSpan) { + if self.param_dbg_emitted { + return; + } + self.param_dbg_emitted = true; + let params: Vec<_> = self.param_values.to_vec(); + for (var, value) in ¶ms { + self.emit_dbg_value_for_var(*var, *value, span); + } + // FrameBase-only variables (e.g. local `sum`) are emitted solely via + // the location schedule in apply_location_schedule/emit_scheduled_dbg_value, + // avoiding duplicate DebugVar emissions. + } + + fn span_to_location( + &self, + span: SourceSpan, + ) -> Option<(Symbol, Option, u32, Option)> { + if span == SourceSpan::UNKNOWN { + return None; + } + + let context = self.inner.builder().context(); + let session = context.session(); + let source_file = session.source_manager.get(span.source_id()).ok()?; + let uri = source_file.uri().as_str(); + let path = Path::new(uri); + let file_symbol = Symbol::intern(uri); + let directory_symbol = path.parent().and_then(|parent| parent.to_str()).map(Symbol::intern); + let location = source_file.location(span); + let line = location.line.to_u32(); + let column = location.column.to_u32(); + Some((file_symbol, directory_symbol, line, Some(column))) + } } impl<'f, B: ?Sized + Builder> ArithOpBuilder<'f, B> for FunctionBuilderExt<'f, B> { @@ -499,6 +783,18 @@ impl<'f, B: ?Sized + Builder> BuiltinOpBuilder<'f, B> for FunctionBuilderExt<'f, } } +impl<'f, B: ?Sized + Builder> DIBuilder<'f, B> for FunctionBuilderExt<'f, B> { + #[inline(always)] + fn builder(&self) -> &B { + self.inner.builder() + } + + #[inline(always)] + fn builder_mut(&mut self) -> &mut B { + self.inner.builder_mut() + } +} + impl<'f, B: ?Sized + Builder> HirOpBuilder<'f, B> for FunctionBuilderExt<'f, B> { #[inline(always)] fn builder(&self) -> &B { diff --git a/frontend/wasm/src/module/mod.rs b/frontend/wasm/src/module/mod.rs index 69b6ca55d..7b97e6723 100644 --- a/frontend/wasm/src/module/mod.rs +++ b/frontend/wasm/src/module/mod.rs @@ -12,6 +12,7 @@ use self::types::*; use crate::{component::SignatureIndex, error::WasmResult, unsupported_diag}; pub mod build_ir; +pub mod debug_info; pub mod func_translation_state; pub mod func_translator; pub mod function_builder_ext; @@ -336,6 +337,14 @@ impl Module { .unwrap_or_else(|| Symbol::intern(format!("data{}", index.as_u32()))) } + /// Returns the name of the given local (including parameters) if available in the name section. + pub fn local_name(&self, func: FuncIndex, index: u32) -> Option { + self.name_section + .locals_names + .get(&func) + .and_then(|locals| locals.get(&index).copied()) + } + /// Sets the fallback name of this module, used if there is no module name in the name section pub fn set_name_fallback(&mut self, name_fallback: Cow<'static, str>) { self.name_fallback = Some(Ident::from(name_fallback.as_ref())); diff --git a/frontend/wasm/src/module/module_env.rs b/frontend/wasm/src/module/module_env.rs index 0399ae6ca..a897e0a86 100644 --- a/frontend/wasm/src/module/module_env.rs +++ b/frontend/wasm/src/module/module_env.rs @@ -1,10 +1,10 @@ -use alloc::sync::Arc; +use alloc::{rc::Rc, sync::Arc}; use core::ops::Range; use std::path::PathBuf; use cranelift_entity::{PrimaryMap, packed_option::ReservedValue}; use midenc_frontend_wasm_metadata::{FrontendMetadata, WASM_FRONTEND_METADATA_CUSTOM_SECTION_NAME}; -use midenc_hir::{FxHashSet, Ident, interner::Symbol}; +use midenc_hir::{FxHashMap, FxHashSet, Ident, interner::Symbol}; use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic, Report, Severity}; use wasmparser::{ CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind, @@ -67,6 +67,10 @@ pub struct ParsedModule<'data> { /// DWARF debug information, if enabled, parsed from the module. pub debuginfo: DebugInfoData<'data>, + /// Precomputed debug metadata for functions + pub function_debug: + FxHashMap>>, + /// Set if debuginfo was found but it was not parsed due to `Tunables` /// configuration. pub has_unparsed_debuginfo: bool, @@ -186,6 +190,8 @@ pub struct FunctionBodyData<'a> { pub body: FunctionBody<'a>, /// Validator for the function body pub validator: FuncToValidate, + /// Offset in the original wasm binary where this function body starts + pub body_offset: u64, } #[cfg(test)] @@ -704,7 +710,12 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { params: sig.params().into(), }); } - self.result.function_body_inputs.push(FunctionBodyData { validator, body }); + let body_offset = body.range().start as u64; + self.result.function_body_inputs.push(FunctionBodyData { + validator, + body, + body_offset, + }); self.result.code_index += 1; Ok(()) } diff --git a/hir-analysis/src/analyses/liveness.rs b/hir-analysis/src/analyses/liveness.rs index 31baea14e..24828c7f0 100644 --- a/hir-analysis/src/analyses/liveness.rs +++ b/hir-analysis/src/analyses/liveness.rs @@ -8,6 +8,7 @@ use midenc_hir::{ RegionBranchOpInterface, RegionBranchPoint, RegionRef, Report, Spanned, SymbolTable, ValueRef, dominance::DominanceInfo, pass::{Analysis, AnalysisManager, PreservedAnalyses}, + traits::Transparent, }; pub use self::next_use_set::NextUseSet; @@ -360,9 +361,14 @@ impl DenseBackwardDataFlowAnalysis for Liveness { temp_live_in.remove(result); } - // Set the next-use distance of any operands to 0 - for operand in op.operands().all().iter() { - temp_live_in.insert(operand.borrow().as_value_ref(), 0); + // Set the next-use distance of any operands to 0. + // + // Ignore transparent operations, as such operations are purely informational, and are not + // considered to keep their operands live. + if !op.implements::() { + for operand in op.operands().all().iter() { + temp_live_in.insert(operand.borrow().as_value_ref(), 0); + } } // Determine if the state has changed, if so, then overwrite `live_in` with what we've diff --git a/hir-macros/src/operation.rs b/hir-macros/src/operation.rs index ae9ba14f5..d7876b2ff 100644 --- a/hir-macros/src/operation.rs +++ b/hir-macros/src/operation.rs @@ -1937,6 +1937,7 @@ impl quote::ToTokens for OpBuilderImpl { { #op_builder_new_doc #[inline(always)] + #[allow(unused)] pub fn new(builder: &'a mut B, span: ::midenc_hir::diagnostics::SourceSpan) -> Self { Self { builder, @@ -2237,6 +2238,7 @@ impl quote::ToTokens for OpVerifierImpl { _derived: ::core::marker::PhantomData<(#(&'a dyn #derived_traits,)* #(&'a dyn #implemented_traits),*)>, } impl<'a, T> OpVerifierImpl<'a, T> { + #[allow(unused)] const fn new(op: &'a ::midenc_hir::Operation) -> Self { Self { op, diff --git a/hir-transform/src/sink.rs b/hir-transform/src/sink.rs index f8c1f5edd..d73b58550 100644 --- a/hir-transform/src/sink.rs +++ b/hir-transform/src/sink.rs @@ -2,15 +2,47 @@ use alloc::vec::Vec; use midenc_hir::{ Backward, Builder, EntityMut, Forward, FxHashSet, OpBuilder, Operation, OperationName, - OperationRef, ProgramPoint, RawWalk, Region, RegionBranchOpInterface, - RegionBranchTerminatorOpInterface, RegionRef, Report, SmallVec, Usable, ValueRef, + OperationRef, ProgramPoint, RawWalk, Region, RegionBranchOpInterface, RegionRef, Report, + SmallVec, Usable, Value, ValueRef, adt::SmallDenseMap, dominance::DominanceInfo, matchers::{self, Matcher}, pass::{Pass, PassExecutionState, PostPassStatus}, - traits::{ConstantLike, Terminator}, + traits::{ConstantLike, Transparent}, }; +/// Check whether `operation` is the sole _non-transparent_ user of `value`. +/// +/// Transparent users are excluded, because they are purely informational and their uses are not +/// considered for purposes of computing liveness. +fn is_sole_non_transparent_user(value: &dyn Value, operation: OperationRef) -> bool { + value + .iter_uses() + .all(|user| user.owner == operation || user.owner.borrow().implements::()) +} + +/// Erase all transparent operations that reference the given value. +/// +/// This is used before erasing a defining op whose result is only kept alive by +/// transparent uses. +fn erase_transparent_users(value: ValueRef) { + let transparent_ops: SmallVec<[OperationRef; 2]> = { + let v = value.borrow(); + v.iter_uses() + .filter_map(|user| { + if user.owner.borrow().implements::() { + Some(user.owner) + } else { + None + } + }) + .collect() + }; + for mut op in transparent_ops { + op.borrow_mut().erase(); + } +} + /// This transformation sinks operations as close as possible to their uses, one of two ways: /// /// 1. If there exists only a single use of the operation, move it before it's use so that it is @@ -221,7 +253,8 @@ impl Pass for SinkOperandDefs { for operand in op.operands().iter().rev() { let value = operand.borrow(); let value = value.value(); - let is_sole_user = value.iter_uses().all(|user| user.owner == operation); + // Exclude transparent uses when determining whether this is the sole user. + let is_sole_user = is_sole_non_transparent_user(&*value, operation); let Some(defining_op) = value.get_defining_op() else { // Skip block arguments, nothing to move in that situation @@ -276,17 +309,23 @@ impl Pass for SinkOperandDefs { let mut operation = sink_state.operation; let op = operation.borrow(); - // If this operation is unused, remove it now if it has no side effects - let is_memory_effect_free = - op.is_memory_effect_free() || op.implements::(); - if !op.is_used() - && is_memory_effect_free - && !op.implements::() - && !op.implements::() - && erased.insert(operation) - { + // Ignore transparent ops - we do not sink them directly + if op.implements::() { + continue; + } + + // If this operation is unused, remove it now if it has no side effects. + // + // NOTE: We explicitly DO NOT remove transparent ops here, unless we're removing the + // defining op of the transparent operand + let has_real_uses = op.results().iter().any(|result| result.borrow().has_real_uses()); + if !has_real_uses && op.would_be_trivially_dead() && erased.insert(operation) { log::debug!(target: Self::NAME, "erasing unused, effect-free, non-terminator op {op}"); drop(op); + // Erase any remaining transparent uses before erasing the defining op. + for result in operation.borrow().results().iter() { + erase_transparent_users(result.borrow().as_value_ref()); + } operation.borrow_mut().erase(); continue; } @@ -320,10 +359,11 @@ impl Pass for SinkOperandDefs { operand.borrow_mut().set(replacement); changed = PostPassStatus::Changed; - // If no other uses of this value remain, then remove the original + // If no real uses of this value remain, then remove the original // operation, as it is now dead. - if !operand_value.borrow().is_used() { + if !operand_value.borrow().has_real_uses() { log::trace!(target: Self::NAME, " {operand_value} is no longer used, erasing definition"); + erase_transparent_users(operand_value); // Replacements are only ever for op results let mut defining_op = operand_value.borrow().get_defining_op().unwrap(); defining_op.borrow_mut().erase(); @@ -333,7 +373,8 @@ impl Pass for SinkOperandDefs { } let value = operand_value.borrow(); - let is_sole_user = value.iter_uses().all(|user| user.owner == operation); + // Exclude transparent uses when determining sole-user status. + let is_sole_user = is_sole_non_transparent_user(&*value, operation); let Some(mut defining_op) = value.get_defining_op() else { // Skip block arguments, nothing to move in that situation @@ -372,8 +413,12 @@ impl Pass for SinkOperandDefs { // The original op can be moved drop(def); drop(value); - defining_op.borrow_mut().move_to(*builder.insertion_point()); + let mut def_op = defining_op.borrow_mut(); + def_op.move_to(*builder.insertion_point()); sink_state.replacements.insert(operand_value, operand_value); + + // Move any transparent users of `defining_op` after it + move_transparent_users_to(&def_op, &[operation]); } } else if !is_sole_user || def.num_results() != 1 || !def.is_memory_effect_free() { // Skip this operand if the defining op cannot be safely moved @@ -396,12 +441,16 @@ impl Pass for SinkOperandDefs { drop(def); drop(value); log::trace!(target: Self::NAME, " defining op can be moved and has no other uses, moving into place"); - defining_op.borrow_mut().move_to(*builder.insertion_point()); + let mut def_op = defining_op.borrow_mut(); + def_op.move_to(*builder.insertion_point()); sink_state.replacements.insert(operand_value, operand_value); + // Move any transparent users of `defining_op` after it + move_transparent_users_to(&def_op, &[operation]); + drop(def_op); + // Enqueue the defining op to be visited before continuing with this op's operands log::trace!(target: Self::NAME, " enqueing defining op for immediate processing"); - //sink_state.ip = *builder.insertion_point(); sink_state.ip = ProgramPoint::before(operation); worklist.push_front(sink_state); worklist.push_front(OpOperandSink::new(defining_op)); @@ -415,6 +464,26 @@ impl Pass for SinkOperandDefs { } } +fn move_transparent_users_to(op: &Operation, exclude: &[OperationRef]) { + use midenc_hir::adt::SmallSet; + + let ip = ProgramPoint::after(op.as_operation_ref()); + let mut visited = SmallSet::<_, 4>::from_iter(exclude.iter().copied()); + for result in op.results().iter() { + let result = result.borrow(); + for user in result.iter_uses() { + if !visited.insert(user.owner) { + continue; + } + let mut user = user.owner; + let mut user = user.borrow_mut(); + if user.implements::() { + user.move_to(ip); + } + } + } +} + struct OpOperandSink { operation: OperationRef, ip: ProgramPoint, @@ -476,9 +545,12 @@ where self.num_sunk } - /// Given a region and an op which dominates the region, returns true if all - /// users of the given op are dominated by the entry block of the region, and - /// thus the operation can be sunk into the region. + /// Given a region and an op which dominates the region, returns true if all _non-transparent_ + /// users of the given op are dominated by the entry block of the region, and thus the operation + /// can be sunk into the region. + /// + /// Transparent uses are excluded because they are observational and should not prevent + /// control-flow sinking. fn all_users_dominated_by(&self, op: &Operation, region: &Region) -> bool { assert!( region.find_ancestor_op(op.as_operation_ref()).is_none(), @@ -488,6 +560,9 @@ where op.results().iter().all(|result| { let result = result.borrow(); result.iter_uses().all(|user| { + if user.owner.borrow().implements::() { + return true; + } // The user is dominated by the region if its containing block is dominated // by the region's entry block. self.dominfo.dominates(®ion_entry, &user.owner.parent().unwrap()) @@ -530,6 +605,9 @@ where if all_users_dominated_by && should_move_into_region { (self.move_into_region)(op, region); + // Move all transparent users of `op` into the region after it + move_transparent_users_to(&op.borrow(), &[user.as_operation_ref()]); + self.num_sunk += 1; // Add the op to the work queue diff --git a/hir/Cargo.toml b/hir/Cargo.toml index 49b27171e..727e1a80e 100644 --- a/hir/Cargo.toml +++ b/hir/Cargo.toml @@ -46,5 +46,6 @@ thiserror.workspace = true [dev-dependencies] # NOTE: Use local paths for dev-only dependency to avoid relying on crates.io during packaging -pretty_assertions = "1.0" +midenc-expect-test = { path = "../tools/expect-test" } midenc-log = { path = "../midenc-log" } +pretty_assertions = "1.0" diff --git a/hir/src/dialects.rs b/hir/src/dialects.rs index 225bb47dd..abeae970c 100644 --- a/hir/src/dialects.rs +++ b/hir/src/dialects.rs @@ -1,2 +1,3 @@ pub mod builtin; +pub mod debuginfo; pub mod test; diff --git a/hir/src/dialects/debuginfo.rs b/hir/src/dialects/debuginfo.rs new file mode 100644 index 000000000..8da67672f --- /dev/null +++ b/hir/src/dialects/debuginfo.rs @@ -0,0 +1,71 @@ +//! This module defines a first-class dialect for tracking source-level debug information through +//! compiler transformations. +//! +//! Inspired by [Mojo's DebugInfo dialect], this dialect makes debug variable tracking a first-class +//! citizen of the IR, using SSA use-def chains to enforce correctness. +//! +//! ## Motivation +//! +//! Traditional approaches to debug info in MLIR-like compilers (e.g. Flang/FIR) treat debug +//! information as metadata or attributes — second-class citizens that transforms are free to +//! silently drop. The consequences: +//! +//! - Transforms can silently lose debug info with no verifier catching it +//! - No mechanism forces transform authors to update debug info +//! - Debug info quality degrades as the optimizer gets more aggressive +//! +//! ## Approach: SSA-Based Debug Info +//! +//! This dialect defines debug operations as real IR operations with SSA operands: +//! +//! - **`di.value`** — Records the current value of a source variable. Uses an SSA value operand, +//! so deleting the value without updating debug uses is a hard error. +//! +//! - **`di.debug_declare`** — Records the storage location of a source variable using a debug +//! expression. +//! +//! - **`di.kill`** — Marks a variable as dead, giving the debugger precise lifetime boundaries +//! instead of scope-based heuristics. +//! +//! ## Transform Hooks +//! +//! The [`transform`] module provides utilities that make it easy for transform authors to maintain +//! debug info: +//! +//! - **Simple replacements** are handled automatically via `replace_all_uses_with` +//! - **Complex transforms** use [`salvage_debug_info`](transform::salvage_debug_info) where the +//! transform author only describes the *inverse* of their transformation +//! - **Value deletion** without a replacement emits `di.kill` automatically +//! +//! ## Design Pillars (as inherited from Mojo) +//! +//! 1. **SSA use-def chains** — debug values participate in standard use-def tracking +//! 2. **Expression trees** — `DIExpressionAttr` describes how to recover source values from +//! transformed IR values (encode the inverse transformation) +//! 3. **Explicit lifetimes** — `debuginfo.kill` for precise variable death points +//! +//! For historical context, you may be interested in the slides from Mojo's debugging talk, where +//! they discuss its debug info dialect. [You can find that here](https://llvm.org/devmtg/2024-04/slides/TechnicalTalks/MojoDebugging.pdf). +pub mod attributes; +mod builders; +mod ops; +pub mod transform; + +pub use self::{builders::DIBuilder, ops::*}; +use crate::{ + DialectInfo, + derive::{Dialect, DialectRegistration}, +}; + +/// The DebugInfo dialect — first-class debug variable tracking. +/// +/// This dialect provides operations for tracking source-level variables through +/// compiler transformations using SSA semantics. Unlike metadata-based approaches, +/// debug info here participates in standard use-def chains, making it impossible +/// for transforms to silently drop debug information. +#[derive(Debug, Dialect, DialectRegistration)] +#[dialect(name = "di")] +pub struct DebugInfoDialect { + #[dialect(info)] + info: DialectInfo, +} diff --git a/hir/src/dialects/debuginfo/attributes.rs b/hir/src/dialects/debuginfo/attributes.rs new file mode 100644 index 000000000..ded85bb15 --- /dev/null +++ b/hir/src/dialects/debuginfo/attributes.rs @@ -0,0 +1,15 @@ +mod compile_unit; +mod expression; +mod subprogram; +mod variable; + +pub use self::{ + compile_unit::{CompileUnit, CompileUnitAttr}, + expression::{ + Expression, ExpressionAttr, ExpressionOp, FRAME_BASE_LOCAL_MARKER, + decode_frame_base_local_index, decode_frame_base_local_offset, + encode_frame_base_local_index, encode_frame_base_local_offset, + }, + subprogram::{Subprogram, SubprogramAttr}, + variable::{Variable, VariableAttr}, +}; diff --git a/hir/src/dialects/debuginfo/attributes/compile_unit.rs b/hir/src/dialects/debuginfo/attributes/compile_unit.rs new file mode 100644 index 000000000..fb3621011 --- /dev/null +++ b/hir/src/dialects/debuginfo/attributes/compile_unit.rs @@ -0,0 +1,125 @@ +use crate::{ + AttrPrinter, attributes::AttrParser, derive::DialectAttribute, + dialects::debuginfo::DebugInfoDialect, interner::Symbol, print::AsmPrinter, +}; + +/// Represents the compilation unit associated with debug information. +/// +/// The fields in this struct are intentionally aligned with the subset of +/// DWARF metadata we currently care about when tracking variable locations. +#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] +#[attribute(dialect = DebugInfoDialect, implements(AttrPrinter))] +pub struct CompileUnit { + pub language: Symbol, + pub file: Symbol, + pub directory: Option, + pub producer: Option, + pub optimized: bool, +} + +impl Default for CompileUnit { + fn default() -> Self { + Self { + language: crate::interner::symbols::Empty, + file: crate::interner::symbols::Empty, + directory: None, + producer: None, + optimized: false, + } + } +} + +impl CompileUnit { + pub fn new(language: Symbol, file: Symbol) -> Self { + Self { + language, + file, + directory: None, + producer: None, + optimized: false, + } + } +} + +impl AttrPrinter for CompileUnitAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + use crate::formatter::*; + + *printer += const_text("{ "); + + *printer += const_text("language") + const_text(" = "); + printer.print_string(self.language.as_str()); + *printer += const_text(", "); + + *printer += const_text("file") + const_text(" = "); + printer.print_string(self.file.as_str()); + + if let Some(directory) = self.directory { + *printer += const_text(", "); + *printer += const_text("directory") + const_text(" = "); + printer.print_string(directory.as_str()); + } + + if let Some(producer) = self.producer { + *printer += const_text(", "); + *printer += const_text("producer") + const_text(" = "); + printer.print_string(producer.as_str()); + } + + *printer += const_text(", "); + *printer += const_text("optimized") + const_text(" = "); + printer.print_bool(self.optimized); + + *printer += const_text(" }"); + } +} + +impl AttrParser for CompileUnitAttr { + fn parse( + parser: &mut dyn crate::parse::Parser<'_>, + ) -> crate::parse::ParseResult { + use crate::parse::Token; + + parser.parse_lbrace()?; + + parser.parse_custom_keyword("language")?; + parser.parse_equal()?; + let language = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + parser.parse_custom_keyword("file")?; + parser.parse_equal()?; + let file = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + let mut unit = CompileUnit::new(language.into(), file.into()); + + if parser.parse_optional_custom_keyword("directory")?.is_some() { + parser.parse_equal()?; + unit.directory = Some(parser.parse_string()?.into_inner().into()); + parser.parse_comma()?; + } + if parser.parse_optional_custom_keyword("producer")?.is_some() { + parser.parse_equal()?; + unit.producer = Some(parser.parse_string()?.into_inner().into()); + parser.parse_comma()?; + } + if parser.parse_optional_custom_keyword("optimized")?.is_some() { + parser.parse_equal()?; + unit.optimized = parser + .token_stream_mut() + .expect_map("boolean", |tok| match tok { + Token::True => Some(true), + Token::False => Some(false), + _ => None, + })? + .into_inner(); + } + + parser.parse_rbrace()?; + + let attr = parser.context_rc().create_attribute::(unit); + + Ok(attr.as_attribute_ref()) + } +} diff --git a/hir/src/dialects/debuginfo/attributes/expression.rs b/hir/src/dialects/debuginfo/attributes/expression.rs new file mode 100644 index 000000000..64ca59599 --- /dev/null +++ b/hir/src/dialects/debuginfo/attributes/expression.rs @@ -0,0 +1,471 @@ +use alloc::{format, string::ToString, vec::Vec}; + +use crate::{ + AttrPrinter, attributes::AttrParser, derive::DialectAttribute, + dialects::debuginfo::DebugInfoDialect, interner::Symbol, parse::ParserExt, print::AsmPrinter, +}; + +/// Represents DWARF expression operations for describing variable locations +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum ExpressionOp { + /// DW_OP_WASM_location 0x00 - Variable is in a WebAssembly local + WasmLocal(u32) = 0, + /// DW_OP_WASM_location 0x01 - Variable is in a WebAssembly global + WasmGlobal(u32) = 1, + /// DW_OP_WASM_location 0x02 - Variable is on the WebAssembly operand stack + WasmStack(u32) = 2, + /// DW_OP_constu - Unsigned constant value + ConstU64(u64) = 3, + /// DW_OP_consts - Signed constant value + ConstS64(i64) = 4, + /// DW_OP_plus_uconst - Add unsigned constant to top of stack + PlusUConst(u64) = 5, + /// DW_OP_minus - Subtract top two stack values + Minus = 6, + /// DW_OP_plus - Add top two stack values + Plus = 7, + /// DW_OP_deref - Dereference the address at top of stack + Deref = 8, + /// DW_OP_stack_value - The value on the stack is the value of the variable + StackValue = 9, + /// DW_OP_piece - Describes a piece of a variable + Piece(u64) = 10, + /// DW_OP_bit_piece - Describes a piece of a variable in bits + BitPiece { size: u64, offset: u64 } = 11, + /// DW_OP_fbreg - Frame base register + offset. + /// The variable is in WASM linear memory at `value_of(global[global_index]) + byte_offset`. + FrameBase { global_index: u32, byte_offset: i64 } = 12, + /// DW_OP_addr - pushes memory address `address` on the expression operand stack + Address { address: u64 } = 13, + /// Placeholder for unsupported operations + Unsupported(Symbol) = u8::MAX, +} + +impl ExpressionOp { + const fn tag(&self) -> u8 { + // SAFETY: This is safe because we have given this enum a + // primitive representation with #[repr(u8)], with the first + // field of the underlying union-of-structs the discriminant + // + // See the section on "accessing the numeric value of the discriminant" + // here: https://doc.rust-lang.org/std/mem/fn.discriminant.html + unsafe { *(self as *const Self).cast::() } + } +} + +impl miden_core::serde::Serializable for ExpressionOp { + fn write_into(&self, target: &mut W) { + target.write_u8(self.tag()); + match self { + Self::WasmLocal(idx) | Self::WasmGlobal(idx) | Self::WasmStack(idx) => { + target.write_u32(*idx); + } + Self::ConstU64(val) | Self::PlusUConst(val) | Self::Piece(val) => { + target.write_u64(*val); + } + Self::ConstS64(val) => { + target.write_u64(*val as u64); + } + Self::Minus | Self::Plus | Self::Deref | Self::StackValue => (), + Self::BitPiece { size, offset } => { + target.write_u64(*size); + target.write_u64(*offset); + } + Self::FrameBase { + global_index, + byte_offset, + } => { + target.write_u32(*global_index); + target.write_u64(*byte_offset as u64); + } + Self::Address { address } => { + target.write_u64(*address); + } + Self::Unsupported(name) => { + target.write_usize(name.as_str().len()); + target.write_bytes(name.as_str().as_bytes()); + } + } + } +} + +impl miden_core::serde::Deserializable for ExpressionOp { + fn read_from( + source: &mut R, + ) -> Result { + use miden_core::serde::DeserializationError; + + Ok(match source.read_u8()? { + 0 => Self::WasmLocal(u32::read_from(source)?), + 1 => Self::WasmGlobal(u32::read_from(source)?), + 2 => Self::WasmStack(u32::read_from(source)?), + 3 => Self::ConstU64(u64::read_from(source)?), + 4 => Self::ConstS64(u64::read_from(source)? as i64), + 5 => Self::PlusUConst(u64::read_from(source)?), + 6 => Self::Minus, + 7 => Self::Plus, + 8 => Self::Deref, + 9 => Self::StackValue, + 10 => Self::Piece(u64::read_from(source)?), + 11 => { + let size = u64::read_from(source)?; + let offset = u64::read_from(source)?; + Self::BitPiece { size, offset } + } + 12 => { + let global_index = u32::read_from(source)?; + let byte_offset = u64::read_from(source)? as i64; + Self::FrameBase { + global_index, + byte_offset, + } + } + 13 => { + let address = u64::read_from(source)?; + Self::Address { address } + } + u8::MAX => { + let len = usize::read_from(source)?; + let bytes = source.read_slice(len)?; + let s = core::str::from_utf8(bytes) + .map_err(|err| DeserializationError::InvalidValue(err.to_string()))?; + Self::Unsupported(Symbol::intern(s)) + } + invalid => { + return Err(DeserializationError::InvalidValue(format!( + "unknown DIExpressionOp tag '{invalid}'" + ))); + } + }) + } +} + +impl crate::formatter::PrettyPrint for ExpressionOp { + fn render(&self) -> crate::formatter::Document { + use crate::formatter::*; + match self { + Self::WasmLocal(idx) => { + const_text("DW_OP_WASM_local") + const_text("(") + display(idx) + const_text(")") + } + Self::WasmGlobal(idx) => { + const_text("DW_OP_WASM_global") + const_text("(") + display(idx) + const_text(")") + } + Self::WasmStack(idx) => { + const_text("DW_OP_WASM_stack") + const_text("(") + display(idx) + const_text(")") + } + Self::ConstU64(val) => { + const_text("DW_OP_constu") + const_text("(") + display(val) + const_text(")") + } + Self::ConstS64(val) => { + const_text("DW_OP_consts") + const_text("(") + display(val) + const_text(")") + } + Self::PlusUConst(val) => { + const_text("DW_OP_plus_uconst") + const_text("(") + display(val) + const_text(")") + } + Self::Minus => const_text("DW_OP_minus"), + Self::Plus => const_text("DW_OP_plus"), + Self::Deref => const_text("DW_OP_deref"), + Self::StackValue => const_text("DW_OP_stack_value"), + Self::Piece(size) => { + const_text("DW_OP_piece") + const_text("(") + display(*size) + const_text(")") + } + Self::BitPiece { size, offset } => { + const_text("DW_OP_bit_piece") + + const_text("(") + + display(*size) + + const_text(",") + + display(*offset) + + const_text(")") + } + Self::FrameBase { + global_index, + byte_offset, + } => { + if let Some(local_index) = decode_frame_base_local_index(*global_index) { + const_text("DW_OP_fbreg(local, ") + + text(format!("{local_index}{byte_offset:+}")) + + const_text(")") + } else { + const_text("DW_OP_fbreg(global, ") + + text(format!("{global_index}{byte_offset:+}")) + + const_text(")") + } + } + Self::Address { address } => { + const_text("DW_OP_addr") + const_text("(") + display(*address) + const_text(")") + } + Self::Unsupported(name) => const_text(name.as_str()), + } + } +} + +impl ExpressionOp { + fn parse(parser: &mut dyn crate::parse::Parser<'_>) -> crate::parse::ParseResult { + use crate::parse::Token; + + let mut op = parser + .token_stream_mut() + .expect_map("DIExpression operator", |tok| match tok { + Token::BareIdent(id) => match id { + "DW_OP_WASM_local" => Some(ExpressionOp::WasmLocal(0)), + "DW_OP_WASM_global" => Some(ExpressionOp::WasmGlobal(0)), + "DW_OP_WASM_stack" => Some(ExpressionOp::WasmStack(0)), + "DW_OP_constu" => Some(ExpressionOp::ConstU64(0)), + "DW_OP_consts" => Some(ExpressionOp::ConstS64(0)), + "DW_OP_plus_uconst" => Some(ExpressionOp::PlusUConst(0)), + "DW_OP_minus" => Some(ExpressionOp::Minus), + "DW_OP_plus" => Some(ExpressionOp::Plus), + "DW_OP_deref" => Some(ExpressionOp::Deref), + "DW_OP_stack_value" => Some(ExpressionOp::StackValue), + "DW_OP_piece" => Some(ExpressionOp::Piece(0)), + "DW_OP_bit_piece" => Some(ExpressionOp::BitPiece { size: 0, offset: 0 }), + "DW_OP_fbreg" => Some(ExpressionOp::FrameBase { + global_index: 0, + byte_offset: 0, + }), + "DW_OP_addr" => Some(ExpressionOp::Address { address: 0 }), + other => Some(ExpressionOp::Unsupported(Symbol::intern(other))), + }, + _ => None, + })? + .into_inner(); + match &mut op { + ExpressionOp::WasmLocal(idx) + | ExpressionOp::WasmGlobal(idx) + | ExpressionOp::WasmStack(idx) => { + parser.parse_lparen()?; + *idx = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_rparen()?; + } + ExpressionOp::ConstU64(val) + | ExpressionOp::PlusUConst(val) + | ExpressionOp::Piece(val) + | ExpressionOp::Address { address: val } => { + parser.parse_lparen()?; + *val = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_rparen()?; + } + ExpressionOp::ConstS64(val) => { + parser.parse_lparen()?; + *val = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_rparen()?; + } + ExpressionOp::Minus + | ExpressionOp::Plus + | ExpressionOp::Deref + | ExpressionOp::StackValue + | ExpressionOp::Unsupported(_) => (), + ExpressionOp::BitPiece { size, offset } => { + parser.parse_lparen()?; + *size = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_comma()?; + *offset = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_rparen()?; + } + ExpressionOp::FrameBase { + global_index, + byte_offset, + } => { + parser.parse_lparen()?; + parser + .token_stream_mut() + .expect_if("'local' or 'global' modifier", |tok| { + matches!(tok, Token::BareIdent("local" | "global")) + })? + .into_inner(); + parser.parse_comma()?; + let index = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_comma()?; + *byte_offset = parser.parse_decimal_integer::()?.into_inner(); + *global_index = encode_frame_base_local_index(index).unwrap_or(index); + parser.parse_rparen()?; + } + } + + Ok(op) + } +} + +/// Represents a DWARF expression that describes how to compute or locate a variable's value +#[derive(DialectAttribute, Clone, Debug, Default, PartialEq, Eq, Hash)] +#[attribute(dialect = DebugInfoDialect, implements(AttrPrinter))] +pub struct Expression { + pub operations: Vec, +} + +impl Expression { + pub fn new() -> Self { + Self { + operations: Vec::new(), + } + } + + pub fn with_ops(operations: Vec) -> Self { + Self { operations } + } + + pub fn is_empty(&self) -> bool { + self.operations.is_empty() + } +} + +impl miden_core::serde::Serializable for Expression { + fn write_into(&self, target: &mut W) { + target.write_usize(self.operations.len()); + for op in self.operations.iter() { + target.write(op); + } + } +} + +impl miden_core::serde::Deserializable for Expression { + fn read_from( + source: &mut R, + ) -> Result { + let len = usize::read_from(source)?; + let mut expr = Self::with_ops(Vec::with_capacity(len)); + for _ in 0..len { + expr.operations.push(ExpressionOp::read_from(source)?); + } + Ok(expr) + } +} + +impl AttrPrinter for ExpressionAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + use crate::formatter::*; + + if self.operations.is_empty() { + *printer += const_text("[]"); + return; + } + + *printer += const_text("["); + for (i, op) in self.operations.iter().enumerate() { + if i > 0 { + *printer += const_text(", "); + } + match op { + ExpressionOp::WasmLocal(idx) => { + *printer += const_text("DW_OP_WASM_local"); + *printer += const_text("(") + display(*idx) + const_text(")"); + } + ExpressionOp::WasmGlobal(idx) => { + *printer += const_text("DW_OP_WASM_global"); + *printer += const_text("(") + display(*idx) + const_text(")"); + } + ExpressionOp::WasmStack(idx) => { + *printer += const_text("DW_OP_WASM_stack"); + *printer += const_text("(") + display(*idx) + const_text(")"); + } + ExpressionOp::ConstU64(val) => { + *printer += const_text("DW_OP_constu"); + *printer += const_text("(") + display(*val) + const_text(")"); + } + ExpressionOp::ConstS64(val) => { + *printer += const_text("DW_OP_consts"); + *printer += const_text("(") + display(*val) + const_text(")"); + } + ExpressionOp::PlusUConst(val) => { + *printer += const_text("DW_OP_plus_uconst"); + *printer += const_text("(") + display(*val) + const_text(")"); + } + ExpressionOp::Minus => *printer += const_text("DW_OP_minus"), + ExpressionOp::Plus => *printer += const_text("DW_OP_plus"), + ExpressionOp::Deref => *printer += const_text("DW_OP_deref"), + ExpressionOp::StackValue => *printer += const_text("DW_OP_stack_value"), + ExpressionOp::Piece(size) => { + *printer += const_text("DW_OP_piece"); + *printer += const_text("(") + display(*size) + const_text(")"); + } + ExpressionOp::BitPiece { size, offset } => { + *printer += const_text("DW_OP_bit_piece"); + *printer += const_text("(") + + display(*size) + + const_text(",") + + display(*offset) + + const_text(")"); + } + ExpressionOp::FrameBase { + global_index, + byte_offset, + } => { + if let Some(local_index) = decode_frame_base_local_index(*global_index) { + *printer += const_text("DW_OP_fbreg(local, "); + *printer += text(format!("{}{:+}", local_index, byte_offset)); + *printer += const_text(")"); + } else { + *printer += const_text("DW_OP_fbreg(global, "); + *printer += text(format!("{}{:+}", global_index, byte_offset)); + *printer += const_text(")"); + } + } + ExpressionOp::Address { address } => { + *printer += const_text("DW_OP_addr"); + *printer += const_text("(") + display(*address) + const_text(")"); + } + ExpressionOp::Unsupported(name) => *printer += const_text(name.as_str()), + } + } + *printer += const_text("]"); + } +} + +impl AttrParser for ExpressionAttr { + fn parse( + parser: &mut dyn crate::parse::Parser<'_>, + ) -> crate::parse::ParseResult { + use crate::parse::Delimiter; + + let mut ops = Vec::default(); + parser.parse_comma_separated_list( + Delimiter::OptionalBracket, + Some("DIExpression"), + |parser| { + ops.push(ExpressionOp::parse(parser)?); + + Ok(true) + }, + )?; + + let attr = parser + .context_rc() + .create_attribute::(Expression::with_ops(ops)); + + Ok(attr.as_attribute_ref()) + } +} + +/// High-bit marker used to carry a Wasm-local frame base through the existing +/// `FrameBase { global_index, byte_offset }` debug-location shape without +/// changing the VM-facing `DebugVarLocation` ABI. +/// +/// Before MASM lowering completes, the low bits hold a raw Wasm local index. +/// After local patching, the low 16 bits hold the signed FMP-relative offset of +/// the Miden local containing the frame-base byte address. +pub const FRAME_BASE_LOCAL_MARKER: u32 = 1 << 31; + +pub fn encode_frame_base_local_index(local_index: u32) -> Option { + if local_index < FRAME_BASE_LOCAL_MARKER { + Some(FRAME_BASE_LOCAL_MARKER | local_index) + } else { + None + } +} + +pub fn decode_frame_base_local_index(encoded: u32) -> Option { + (encoded & FRAME_BASE_LOCAL_MARKER != 0).then_some(encoded & !FRAME_BASE_LOCAL_MARKER) +} + +pub fn encode_frame_base_local_offset(local_offset: i16) -> u32 { + FRAME_BASE_LOCAL_MARKER | u16::from_le_bytes(local_offset.to_le_bytes()) as u32 +} + +pub fn decode_frame_base_local_offset(encoded: u32) -> Option { + if encoded & FRAME_BASE_LOCAL_MARKER == 0 { + return None; + } + let low_bits = (encoded & 0xffff) as u16; + Some(i16::from_le_bytes(low_bits.to_le_bytes())) +} diff --git a/hir/src/dialects/debuginfo/attributes/subprogram.rs b/hir/src/dialects/debuginfo/attributes/subprogram.rs new file mode 100644 index 000000000..078a47f20 --- /dev/null +++ b/hir/src/dialects/debuginfo/attributes/subprogram.rs @@ -0,0 +1,223 @@ +use alloc::{format, sync::Arc, vec::Vec}; + +use crate::{ + AttrPrinter, Type, attributes::AttrParser, derive::DialectAttribute, + dialects::debuginfo::DebugInfoDialect, interner::Symbol, parse::ParserExt, print::AsmPrinter, +}; + +/// Represents a subprogram (function) scope for debug information. +/// The compile unit is not embedded but typically stored separately on the module. +#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] +#[attribute(dialect = DebugInfoDialect, implements(AttrPrinter))] +pub struct Subprogram { + pub name: Symbol, + pub linkage_name: Option, + pub file: Symbol, + pub line: u32, + pub column: Option, + pub is_definition: bool, + pub is_local: bool, + pub ty: Option, + pub param_names: Vec, +} + +impl Default for Subprogram { + fn default() -> Self { + Self { + name: crate::interner::symbols::Empty, + linkage_name: None, + file: crate::interner::symbols::Empty, + line: 0, + column: None, + is_definition: false, + is_local: false, + ty: None, + param_names: Vec::new(), + } + } +} + +impl Subprogram { + pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { + Self { + name, + linkage_name: None, + file, + line, + column, + is_definition: true, + is_local: false, + ty: None, + param_names: Vec::new(), + } + } + + pub fn with_function_type(mut self, ty: crate::FunctionType) -> Self { + self.ty = Some(Type::Function(Arc::new(ty))); + self + } + + pub fn with_param_names(mut self, names: I) -> Self + where + I: IntoIterator, + { + self.param_names = names.into_iter().collect(); + self + } +} + +impl AttrPrinter for SubprogramAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + use crate::formatter::*; + + *printer += const_text("{ "); + + *printer += const_text("name") + const_text(" = "); + printer.print_string(self.name.as_str()); + *printer += const_text(", "); + + *printer += const_text("file") + const_text(" = "); + printer.print_string(self.file.as_str()); + *printer += const_text(", "); + + *printer += const_text("line") + const_text(" = "); + printer.print_decimal_integer(self.line); + + if let Some(column) = self.column { + *printer += const_text(", "); + *printer += const_text("column") + const_text(" = "); + printer.print_decimal_integer(column); + } + + if let Some(linkage) = self.linkage_name { + *printer += const_text(", "); + *printer += const_text("linkage") + const_text(" = "); + printer.print_string(linkage.as_str()); + } + + if let Some(ty) = &self.ty { + *printer += const_text(", "); + *printer += const_text("ty") + const_text(" = "); + printer.print_type(ty); + } + + if !self.param_names.is_empty() { + let names = self + .param_names + .iter() + .map(|name| const_text(name.as_str())) + .intersperse(const_text(", ")) + .fold(Document::Empty, |acc, item| acc + item); + let names = const_text("[") + names + const_text("]"); + *printer += const_text(", "); + *printer += const_text("params") + const_text(" = ") + names; + } + + *printer += const_text(", "); + *printer += const_text("definition") + const_text(" = "); + printer.print_bool(self.is_definition); + + *printer += const_text(", "); + *printer += const_text("local") + const_text(" = "); + printer.print_bool(self.is_local); + + *printer += const_text(" }"); + } +} + +impl AttrParser for SubprogramAttr { + fn parse( + parser: &mut dyn crate::parse::Parser<'_>, + ) -> crate::parse::ParseResult { + use crate::parse::Token; + + parser.parse_lbrace()?; + + parser.parse_custom_keyword("name")?; + parser.parse_equal()?; + let name = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + parser.parse_custom_keyword("file")?; + parser.parse_equal()?; + let file = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + parser.parse_custom_keyword("line")?; + parser.parse_equal()?; + let line = parser.parse_decimal_integer::()?.into_inner(); + + let mut subprogram = Subprogram::new(name.into(), file.into(), line, None); + + while parser.parse_optional_comma()? { + let (span, prop) = parser + .token_stream_mut() + .expect_map("Subprogram property", |tok| match tok { + Token::BareIdent( + prop @ ("column" | "linkage" | "ty" | "params" | "definition" | "local"), + ) => Some(prop), + _ => None, + })? + .into_parts(); + match prop { + "column" if subprogram.column.is_none() => { + parser.parse_equal()?; + subprogram.column = Some(parser.parse_decimal_integer::()?.into_inner()); + } + "linkage" if subprogram.linkage_name.is_none() => { + parser.parse_equal()?; + subprogram.linkage_name = Some(parser.parse_string()?.into_inner().into()); + } + "ty" if subprogram.ty.is_none() => { + parser.parse_equal()?; + subprogram.ty = Some(parser.parse_type()?.into_inner()); + } + "params" if subprogram.param_names.is_empty() => { + parser.parse_equal()?; + parser.parse_comma_separated_list( + crate::parse::Delimiter::OptionalBracket, + Some("parameter names"), + |parser| { + subprogram.param_names.push(parser.parse_string()?.into_inner().into()); + Ok(true) + }, + )?; + } + "definition" => { + parser.parse_equal()?; + subprogram.is_definition = parser + .token_stream_mut() + .expect_map("boolean", |tok| match tok { + Token::BareIdent("true") => Some(true), + Token::BareIdent("false") => Some(false), + _ => None, + })? + .into_inner(); + } + "local" => { + parser.parse_equal()?; + subprogram.is_local = parser + .token_stream_mut() + .expect_map("boolean", |tok| match tok { + Token::BareIdent("true") => Some(true), + Token::BareIdent("false") => Some(false), + _ => None, + })? + .into_inner(); + } + prop => { + return Err(crate::parse::ParserError::InvalidAttributeValue { + span, + reason: format!("duplicate DILocalVariableAttr property '{prop}'"), + }); + } + } + } + + parser.parse_rbrace()?; + + let attr = parser.context_rc().create_attribute::(subprogram); + + Ok(attr.as_attribute_ref()) + } +} diff --git a/hir/src/dialects/debuginfo/attributes/variable.rs b/hir/src/dialects/debuginfo/attributes/variable.rs new file mode 100644 index 000000000..61347ff12 --- /dev/null +++ b/hir/src/dialects/debuginfo/attributes/variable.rs @@ -0,0 +1,146 @@ +use alloc::format; + +use crate::{ + AttrPrinter, Type, attributes::AttrParser, derive::DialectAttribute, + dialects::debuginfo::DebugInfoDialect, interner::Symbol, parse::ParserExt, print::AsmPrinter, +}; + +/// Represents a local variable debug record. +/// The scope (Subprogram) is not embedded but instead stored on the containing function. +#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] +#[attribute(dialect = DebugInfoDialect, implements(AttrPrinter))] +pub struct Variable { + pub name: Symbol, + pub arg_index: Option, + pub file: Symbol, + pub line: u32, + pub column: Option, + pub ty: Option, +} + +impl Default for Variable { + fn default() -> Self { + Self { + name: crate::interner::symbols::Empty, + arg_index: None, + file: crate::interner::symbols::Empty, + line: 0, + column: None, + ty: None, + } + } +} + +impl Variable { + pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { + Self { + name, + arg_index: None, + file, + line, + column, + ty: None, + } + } +} + +impl AttrPrinter for VariableAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + use crate::formatter::*; + + *printer += const_text("{ "); + + *printer += const_text("name") + const_text(" = "); + printer.print_string(self.name.as_str()); + *printer += const_text(", "); + + *printer += const_text("file") + const_text(" = "); + printer.print_string(self.file.as_str()); + *printer += const_text(", "); + + *printer += const_text("line") + const_text(" = "); + printer.print_decimal_integer(self.line); + + if let Some(column) = self.column { + *printer += const_text(", "); + *printer += const_text("column") + const_text(" = "); + printer.print_decimal_integer(column); + } + + if let Some(arg_index) = self.arg_index { + *printer += const_text(", "); + *printer += const_text("arg") + const_text(" = "); + printer.print_decimal_integer(arg_index); + } + + if let Some(ty) = &self.ty { + *printer += const_text(", "); + *printer += const_text("ty") + const_text(" = "); + printer.print_type(ty); + } + + *printer += const_text(" }"); + } +} + +impl AttrParser for VariableAttr { + fn parse( + parser: &mut dyn crate::parse::Parser<'_>, + ) -> crate::parse::ParseResult { + use crate::parse::Token; + + parser.parse_lbrace()?; + + parser.parse_custom_keyword("name")?; + parser.parse_equal()?; + let name = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + parser.parse_custom_keyword("file")?; + parser.parse_equal()?; + let file = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + parser.parse_custom_keyword("line")?; + parser.parse_equal()?; + let line = parser.parse_decimal_integer::()?.into_inner(); + + let mut var = Variable::new(name.into(), file.into(), line, None); + + while parser.parse_optional_comma()? { + let (span, prop) = parser + .token_stream_mut() + .expect_map("DILocalVariable property", |tok| match tok { + Token::BareIdent(prop @ ("column" | "arg" | "ty")) => Some(prop), + _ => None, + })? + .into_parts(); + match prop { + "column" if var.column.is_none() => { + parser.parse_equal()?; + var.column = Some(parser.parse_decimal_integer::()?.into_inner()); + } + "arg" if var.arg_index.is_none() => { + parser.parse_equal()?; + var.column = Some(parser.parse_decimal_integer::()?.into_inner()); + } + "ty" if var.ty.is_none() => { + parser.parse_equal()?; + var.ty = Some(parser.parse_type()?.into_inner()); + } + prop => { + return Err(crate::parse::ParserError::InvalidAttributeValue { + span, + reason: format!("duplicate DILocalVariableAttr property '{prop}'"), + }); + } + } + } + + parser.parse_rbrace()?; + + let attr = parser.context_rc().create_attribute::(var); + + Ok(attr.as_attribute_ref()) + } +} diff --git a/hir/src/dialects/debuginfo/builders.rs b/hir/src/dialects/debuginfo/builders.rs new file mode 100644 index 000000000..bf300c12a --- /dev/null +++ b/hir/src/dialects/debuginfo/builders.rs @@ -0,0 +1,95 @@ +use midenc_hir::{ + Builder, BuilderExt, Report, SourceSpan, ValueRef, + dialects::debuginfo::attributes::{Expression, Variable}, +}; + +use super::ops::*; + +/// Builder trait for creating debug info operations. +/// +/// This trait follows the same pattern as other dialect builders (`ArithOpBuilder`, `HirOpBuilder`, +/// etc.) and can be implemented for any type that wraps a [Builder]. +/// +/// # Usage +/// +/// ```ignore +/// // Emit a debug value tracking where variable 'x' lives: +/// builder.debug_value(ssa_value, variable_attr, span)?; +/// +/// // With a custom expression (e.g., value needs a deref): +/// builder.debug_value_with_expr(ssa_value, variable_attr, Some(expr), span)?; +/// +/// // Emit a debug declaration for storage described by an expression: +/// builder.debug_declare(variable_attr, expr, span)?; +/// +/// // Mark a variable as dead: +/// builder.debug_kill(variable_attr, span)?; +/// ``` +pub trait DIBuilder<'f, B: ?Sized + Builder> { + /// Emit a `di.value` operation that records the current value of a source-level variable. + /// + /// This creates an SSA use of `value`, ensuring that transforms cannot silently drop the value + /// without updating the debug info. + fn debug_value( + &mut self, + value: ValueRef, + variable: Variable, + span: SourceSpan, + ) -> Result { + self.debug_value_with_expr(value, variable, None, span) + } + + /// Emit a `di.value` operation with an optional expression that describes how to recover the + /// source-level value from the IR value. + /// + /// The expression encodes the *inverse* of whatever transformation was applied to the value. + /// For example, if a value was promoted to a stack allocation (pointer), the expression would + /// contain a `deref` operation to recover the original value. + fn debug_value_with_expr( + &mut self, + value: ValueRef, + variable: Variable, + expression: Option, + span: SourceSpan, + ) -> Result { + let expr = expression.unwrap_or_default(); + let op_builder = self.builder_mut().create::(span); + op_builder(value, variable, expr) + } + + /// Emit a `di.debug_declare` operation that records the storage location of a source-level + /// variable. + fn debug_declare( + &mut self, + variable: Variable, + expression: Expression, + span: SourceSpan, + ) -> Result { + let op_builder = self.builder_mut().create::(span); + op_builder(variable, expression) + } + + /// Emit a `di.kill` operation that marks a variable as dead. + /// + /// After this point, the debugger should report the variable as unavailable until the next + /// `debug_value` or `debug_declare` for the same variable. + fn debug_kill(&mut self, variable: Variable, span: SourceSpan) -> Result { + let op_builder = self.builder_mut().create::(span); + op_builder(variable) + } + + fn builder(&self) -> &B; + fn builder_mut(&mut self) -> &mut B; +} + +impl DIBuilder<'_, B> for B { + #[inline(always)] + fn builder(&self) -> &B { + self + } + + #[inline(always)] + fn builder_mut(&mut self) -> &mut B { + self + } +} diff --git a/hir/src/dialects/debuginfo/ops.rs b/hir/src/dialects/debuginfo/ops.rs new file mode 100644 index 000000000..72ceb2beb --- /dev/null +++ b/hir/src/dialects/debuginfo/ops.rs @@ -0,0 +1,119 @@ +use midenc_hir::{ + OpPrinter, UnsafeIntrusiveEntityRef, + derive::{EffectOpInterface, OpParser, OpPrinter, operation}, + dialects::debuginfo::attributes::{ExpressionAttr, VariableAttr}, + effects::{ + DebugEffect, DebugEffectOpInterface, EffectOpInterface, MemoryEffect, + MemoryEffectOpInterface, + }, + smallvec, + traits::{AnyType, Transparent}, +}; + +use super::DebugInfoDialect; + +pub type DebugValueRef = UnsafeIntrusiveEntityRef; +pub type DebugDeclareRef = UnsafeIntrusiveEntityRef; +pub type DebugKillRef = UnsafeIntrusiveEntityRef; + +/// Records the current value of a source-level variable. +/// +/// This is the core operation of the debuginfo dialect. It records a transparent SSA use of the +/// value, which means: +/// +/// - If a transform deletes the value without updating its debug uses, that's a hard error (not a +/// silent drop like with metadata-based approaches). +/// - Standard MLIR-style use-def tracking automatically enforces this — transforms must call +/// `replace_all_uses_with`, explicitly handle debug uses, or drop the debug op when its referent +/// is dead. +/// +/// The `variable` attribute identifies the source variable, and the `expression` attribute +/// describes how to recover the source-level value from the IR value (e.g., "dereference this +/// pointer" if the value was promoted to an alloca). +/// +/// # Example +/// +/// ```text +/// di.value %0 #[variable = di.local_variable(name = x, ...)] +/// #[expression = di.expression(DW_OP_WASM_local 0)] +/// ``` +#[derive(EffectOpInterface, OpParser, OpPrinter)] +#[operation( + dialect = DebugInfoDialect, + traits(Transparent), + implements(DebugEffectOpInterface, MemoryEffectOpInterface, OpPrinter) +)] +pub struct DebugValue { + #[operand] + #[effects(DebugEffect(DebugEffect::Read, DebugEffect::Write))] + value: AnyType, + #[attr] + #[effects(DebugEffect(DebugEffect::Write))] + variable: VariableAttr, + #[attr] + expression: ExpressionAttr, +} + +impl EffectOpInterface for DebugValue { + fn effects(&self) -> midenc_hir::effects::EffectIterator { + midenc_hir::effects::EffectIterator::from_smallvec(smallvec![]) + } +} + +/// Records the storage location (address) of a source-level variable. +/// +/// Unlike [DebugValue] which tracks values, [DebugDeclare] tracks the location where a variable is +/// stored. This is useful for variables that live in memory (e.g., stack slots) where the address is +/// described by a debug expression such as `DW_OP_fbreg`. +#[derive(EffectOpInterface, OpParser, OpPrinter)] +#[operation( + dialect = DebugInfoDialect, + traits(Transparent), + implements(DebugEffectOpInterface, MemoryEffectOpInterface, OpPrinter) +)] +pub struct DebugDeclare { + #[attr] + #[effects(DebugEffect(DebugEffect::Allocate))] + variable: VariableAttr, + #[attr] + #[effects(DebugEffect(DebugEffect::Write))] + expression: ExpressionAttr, +} + +impl EffectOpInterface for DebugDeclare { + fn effects(&self) -> midenc_hir::effects::EffectIterator { + midenc_hir::effects::EffectIterator::from_smallvec(smallvec![]) + } +} + +/// Marks a source-level variable as dead at this program point. +/// +/// This provides explicit lifetime boundaries for variables, giving the debugger precise +/// information about when a variable is no longer valid. Without this, debuggers must rely on +/// scope-based heuristics which can be inaccurate after optimizations. +/// +/// After a `debuginfo.kill`, the debugger should report the variable as "optimized out" or "not +/// available" until the next `di.value` or `di.debug_declare` for the same variable. +/// +/// # Example +/// +/// ```text +/// di.kill #[variable = di.local_variable(name = x, ...)] +/// ``` +#[derive(EffectOpInterface, OpParser, OpPrinter)] +#[operation( + dialect = DebugInfoDialect, + traits(Transparent), + implements(DebugEffectOpInterface, MemoryEffectOpInterface, OpPrinter) +)] +pub struct DebugKill { + #[attr] + #[effects(DebugEffect(DebugEffect::Free))] + variable: VariableAttr, +} + +impl EffectOpInterface for DebugKill { + fn effects(&self) -> midenc_hir::effects::EffectIterator { + midenc_hir::effects::EffectIterator::from_smallvec(smallvec![]) + } +} diff --git a/hir/src/dialects/debuginfo/transform.rs b/hir/src/dialects/debuginfo/transform.rs new file mode 100644 index 000000000..330f31e01 --- /dev/null +++ b/hir/src/dialects/debuginfo/transform.rs @@ -0,0 +1,254 @@ +//! Transform utilities for maintaining debug info across IR transformations. +//! +//! This module provides the "transformation hooks" that make the debuginfo dialect practical. +//! Following Mojo's approach, these utilities make it easy for transform authors to keep debug info +//! valid — they only need to describe the *inverse* of their transformation. +//! +//! # Design Philosophy +//! +//! The `di` dialect uses SSA use-def chains for debug values, which means transforms *cannot* +//! silently drop debug info. When a transform replaces or deletes a value, any `di.value` +//! operations using that value must be updated. The standard `replace_all_uses_with` already +//! handles this correctly for simple value replacements. +//! +//! For more complex transforms (e.g., promoting a value to memory, splitting a value into pieces), +//! the transform author uses `salvage_debug_info` to describe how the debug expression should be +//! updated to recover the source-level value from the new representation. +//! +//! # Examples +//! +//! ## Simple value replacement (handled automatically) +//! +//! When CSE replaces `%1 = add %a, %b` with an existing `%0 = add %a, %b`: +//! +//! ```text,ignore +//! // Before: di.value %1 #[variable = x] +//! rewriter.replace_all_uses_with(%1, %0) +//! // After: di.value %0 #[variable = x] -- automatic! +//! ``` +//! +//! ## Value promoted to memory (using `salvage_debug_info`) +//! +//! When a transform promotes a value to a stack allocation: +//! +//! ```text +//! // Before: di.value %val #[variable = x] +//! // Transform creates: %ptr = alloca T +//! // store %val, %ptr +//! // Call: salvage_debug_info(%val, SalvageAction::Deref { new_value: %ptr }) +//! // After: di.value %ptr #[variable = x, expression = di.expression(DW_OP_deref)] +//! ``` +use alloc::vec::Vec; + +use midenc_hir::{ + Builder, DialectRegistration, Operation, OperationRef, SmallVec, Spanned, ValueRef, + dialects::debuginfo::attributes::ExpressionOp, +}; + +use super::{DIBuilder, ops::DebugValue}; + +/// Describes how to recover the original source-level value after a transformation. +/// +/// When a transform changes a value's representation, it creates a [SalvageAction] describing the +/// inverse operation. The debuginfo framework then updates the `DIExpressionAttr` accordingly so +/// the debugger can still find the variable's value. +/// +/// Transform authors only need to pick the right variant — the framework handles updating all +/// affected `di.value` operations. +#[derive(Clone, Debug)] +pub enum SalvageAction { + /// The value is now behind a pointer; dereference to recover the original. + /// + /// Use this when a value is promoted to a stack allocation. The expression will have + /// `DW_OP_deref` prepended. + Deref { + /// The new pointer value that replaces the original. + new_value: ValueRef, + }, + + /// A constant offset was added to the value. + /// + /// Use this when a value is relocated by a fixed amount (e.g., frame pointer adjustments). The + /// expression will encode the inverse subtraction. + OffsetBy { + /// The new value (original + offset). + new_value: ValueRef, + /// The offset that was added. + offset: u64, + }, + + /// The value was replaced by a new value with an arbitrary expression. + /// + /// Use this for complex transformations where the simple patterns don't apply. The caller + /// provides the full expression describing how to recover the source-level value from the new + /// IR value. + WithExpression { + /// The new value replacing the original. + new_value: ValueRef, + /// Expression operations describing the inverse transform. + ops: Vec, + }, + + /// The value is now a constant. + /// + /// Use this when constant propagation determines the value at this point. + Constant { + /// The constant value. + value: u64, + }, + + /// The value was completely removed with no recovery possible. + /// + /// Use this as a last resort when the value cannot be recovered. This will emit a `di.kill` for + /// the affected variable. + Undef, +} + +/// Salvage debug info for all `di.value` operations that use `old_value`. +/// +/// When a transform is about to delete or replace a value, call this function to update all debug +/// uses. The `action` describes how the debugger can recover the original source-level value from +/// the new representation. +/// +/// This is the main entry point for transform authors who need to update debug info beyond simple +/// `replace_all_uses_with` scenarios. +/// +/// # Example +/// +/// ```rust,ignore +/// // Value was promoted to memory: +/// let ptr = builder.alloca(ty, span)?; +/// builder.store(old_val, ptr, span)?; +/// salvage_debug_info( +/// &old_val, +/// &SalvageAction::Deref { new_value: ptr }, +/// &mut builder, +/// ); +/// ``` +pub fn salvage_debug_info( + old_value: &ValueRef, + action: &SalvageAction, + builder: &mut B, +) { + // Collect all debug value ops that use the old value + for mut debug_op in debug_value_users(old_value) { + apply_salvage_action(&mut debug_op, action, builder); + } +} + +/// Apply a salvage action to a single debug value operation. +fn apply_salvage_action( + debug_op: &mut OperationRef, + action: &SalvageAction, + builder: &mut B, +) { + let span = debug_op.borrow().span(); + + match action { + SalvageAction::Deref { new_value } => { + // Get existing expression and prepend deref + let (variable, mut expr) = { + let op = debug_op.borrow(); + let dv = op.downcast_ref::().unwrap(); + (dv.variable().as_value().clone(), dv.expression().as_value().clone()) + }; + expr.operations.insert(0, ExpressionOp::Deref); + + // Erase old op and create new one with updated value and expression + debug_op.borrow_mut().erase(); + let _ = builder.debug_value_with_expr(*new_value, variable, Some(expr), span); + } + + SalvageAction::OffsetBy { new_value, offset } => { + let (variable, mut expr) = { + let op = debug_op.borrow(); + let dv = op.downcast_ref::().unwrap(); + (dv.variable().as_value().clone(), dv.expression().as_value().clone()) + }; + // To recover: subtract the offset that was added + expr.operations.push(ExpressionOp::ConstU64(*offset)); + expr.operations.push(ExpressionOp::Minus); + + debug_op.borrow_mut().erase(); + let _ = builder.debug_value_with_expr(*new_value, variable, Some(expr), span); + } + + SalvageAction::WithExpression { new_value, ops } => { + let (variable, mut expr) = { + let op = debug_op.borrow(); + let dv = op.downcast_ref::().unwrap(); + (dv.variable().as_value().clone(), dv.expression().as_value().clone()) + }; + expr.operations.extend(ops.iter().cloned()); + + debug_op.borrow_mut().erase(); + let _ = builder.debug_value_with_expr(*new_value, variable, Some(expr), span); + } + + SalvageAction::Constant { value } => { + let variable = { + let op = debug_op.borrow(); + let dv = op.downcast_ref::().unwrap(); + dv.variable().as_value().clone() + }; + + debug_op.borrow_mut().erase(); + // Emit a kill since we can't create a di.value without a live SSA operand for constants + // — the constant value is encoded in the expression + let _ = builder.debug_kill(variable, span); + // TODO: in the future, could emit a di.value with a materialized constant and a + // ConstU64/StackValue expression pair + let _ = value; + } + + SalvageAction::Undef => { + let variable = { + let op = debug_op.borrow(); + let dv = op.downcast_ref::().unwrap(); + dv.variable().as_value().clone() + }; + + debug_op.borrow_mut().erase(); + let _ = builder.debug_kill(variable, span); + } + } +} + +/// Check if an operation is a debug info operation. +/// +/// This is useful for transforms that need to skip or handle debug ops differently (e.g., DCE +/// should not consider debug uses as "real" uses that keep a value alive). +pub fn is_debug_info_op(op: &Operation) -> bool { + op.dialect().name() == super::DebugInfoDialect::NAMESPACE +} + +/// Collect all `di.value` operations that reference the given value. +/// +/// Useful for transforms that need to inspect or update debug info for a specific value. +pub fn debug_value_users(value: &ValueRef) -> SmallVec<[OperationRef; 2]> { + let value = value.borrow(); + let mut ops = SmallVec::new_const(); + for user in value.iter_uses() { + if user.owner.borrow().is::() { + ops.push(user.owner); + } + } + ops +} + +/// Recursively collect all debug info operations within an operation's regions. +pub fn collect_debug_ops(op: &OperationRef) -> Vec { + let mut debug_ops = Vec::new(); + collect_debug_ops_recursive(op, &mut debug_ops); + debug_ops +} + +fn collect_debug_ops_recursive(op: &OperationRef, debug_ops: &mut Vec) { + use midenc_hir::{Forward, RawWalk}; + + op.raw_prewalk_all::(|op: OperationRef| { + if is_debug_info_op(&op.borrow()) { + debug_ops.push(op); + } + }); +} diff --git a/hir/src/ir/effects.rs b/hir/src/ir/effects.rs index 2a492ea92..fe5b0d262 100644 --- a/hir/src/ir/effects.rs +++ b/hir/src/ir/effects.rs @@ -1,3 +1,4 @@ +mod debug; mod instance; mod interface; mod memory; @@ -5,7 +6,7 @@ mod speculation; use core::fmt; -pub use self::{instance::EffectInstance, interface::*, memory::*, speculation::*}; +pub use self::{debug::*, instance::EffectInstance, interface::*, memory::*, speculation::*}; use crate::{DynPartialEq, any::AsAny, eq::PartialEqable}; pub trait Effect: AsAny + fmt::Debug {} diff --git a/hir/src/ir/effects/debug.rs b/hir/src/ir/effects/debug.rs new file mode 100644 index 000000000..6eb2d28f7 --- /dev/null +++ b/hir/src/ir/effects/debug.rs @@ -0,0 +1,53 @@ +use super::*; + +/// Debug effects are similar to memory effects in that they reflect how a debugger may observe the +/// effect during execution/debugging. +/// +/// Similarly, optimizations must avoid reordering operations around debug effects in the same way +/// they must not reorder around memory effects (i.e. an op with a `write` memory effect on some +/// resource must not be reordered before an op with a `read` debug effect on that same resource). +/// In practice, debug operations may declare both memory effects and debug effects, to ensure that +/// transformations which are unaware of debug effects still do the right thing with respect to +/// those operations - but this should be considered a last resort. +/// +/// An operation whose value uses only include debug effects, are ignored when considering the +/// liveness of those values. This allows debug metadata to be recorded in the use-def graph, +/// without interfering with dead-code elimination and other similar optimizations. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum DebugEffect { + /// The following effect indicates that the operation reads from some resource. + /// + /// A 'read' effect implies that a debugger may attempt dereferencing of the resource + Read, + /// The following effect indicates that the operation writes to some resource. + /// + /// A 'write' effect implies that a debugger will modify its internal state with respect to + /// some resource (e.g. the storage type or location of a value). This effect only describes + /// mutation of the state, not any visible dereference or read. + Write, + /// The following effect indicates that the operation allocates some resource. + /// + /// An 'allocate' effect implies only allocation of the resource, and not any visible mutation or + /// dereference. In the case of a debugger, this might correspond to allocating a new call frame + /// or start tracking the state of a local variable. + Allocate, + /// The following effect indicates that the operation frees some resource that has been + /// allocated. + /// + /// A 'free' effect implies only de-allocation of the resource, and not any visible + /// allocation, mutation or dereference. In a debugging context, this might correspond to + /// popping a frame from the call stack, or marking the end of the live range of some local + /// variable. + Free, +} + +impl PartialEq for &DebugEffect { + #[inline] + fn eq(&self, other: &DebugEffect) -> bool { + (**self).eq(other) + } +} + +impl Effect for DebugEffect {} + +pub trait DebugEffectOpInterface = EffectOpInterface; diff --git a/hir/src/ir/region/transforms/dce.rs b/hir/src/ir/region/transforms/dce.rs index 9ca6ec6e7..cc232e694 100644 --- a/hir/src/ir/region/transforms/dce.rs +++ b/hir/src/ir/region/transforms/dce.rs @@ -7,7 +7,7 @@ use crate::{ OpOperandImpl, OpResult, Operation, OperationRef, PostOrderBlockIter, Region, RegionRef, Rewriter, SuccessorOperands, ValueRef, adt::SmallSet, - traits::{BranchOpInterface, Terminator}, + traits::{BranchOpInterface, Terminator, Transparent}, }; /// Data structure used to track which values have already been proved live. @@ -67,7 +67,12 @@ impl LiveMap { pub fn is_use_specially_known_dead(&self, user: &OpOperandImpl) -> bool { // DCE generally treats all uses of an op as live if the op itself is considered live. - // However, for successor operands to terminators we need a finer-grained notion where we + // + // However, there are two special cases: + // + // ## Successor Operands + // + // For successor operands to terminators we need a finer-grained notion where we // deduce liveness for operands individually. The reason for this is easiest to think about // in terms of a classical phi node based SSA IR, where each successor operand is really an // operand to a _separate_ phi node, rather than all operands to the branch itself as with @@ -76,15 +81,24 @@ impl LiveMap { // And similarly, because each successor operand is really an operand to a phi node, rather // than to the terminator op itself, a terminator op can't e.g. "print" the value of a // successor operand. - let owner = &user.owner; - if owner.borrow().implements::() - && let Some(branch_interface) = owner.borrow().as_trait::() + // + // ## Transparent Metadata + // + // Some Transparent operations "use" SSA values only as metadata. If the use would + // otherwise be dead if the op didn't exist, then we treat both the metadata op and the + // value use as dead, so that these ops do not interfere with dead-code elimination. + let owner_ref = &user.owner; + let owner = owner_ref.borrow(); + if owner.implements::() + && let Some(branch_interface) = owner.as_trait::() && let Some(arg) = branch_interface.get_successor_block_argument(user.index as usize) { return !self.was_proven_live(&arg.upcast()); } - false + // If the owning op is transparent, then its value uses are not considered when determining + // liveness. + owner.implements::() } pub fn propagate_region_liveness(&mut self, region: &Region) { @@ -100,15 +114,6 @@ impl LiveMap { self.propagate_liveness(&op); } - // We currently do not remove entry block arguments, so there is no need to track their - // liveness. - // - // TODO(pauls): We could track these and enable removing dead operands/arguments from - // region control flow operations in the future. - if block.is_entry_block() { - continue; - } - for arg in block.arguments().iter().copied() { let arg = arg as ValueRef; if !self.was_proven_live(&arg) { @@ -135,7 +140,36 @@ impl LiveMap { } // Process this op - if !op.would_be_trivially_dead() { + if op.implements::() { + // If this op is Transparent, it has zero or one operands and no results. + // + // We consider such ops live IFF it either: + // + // 1. Has no operands + // 2. Has an operand which has at least one real use + if op.has_operands() { + for operand in op.operands().iter() { + let operand = operand.borrow(); + if let Some(defining_op) = operand.value().get_defining_op() + && self.was_op_proven_live(&defining_op) + { + self.set_op_proved_live(op.as_operation_ref()); + return; + } else if self.was_proven_live(&operand.as_value_ref()) { + self.set_op_proved_live(op.as_operation_ref()); + return; + } + } + } else { + // Transparent ops with no SSA operands are always treated as live here, as we can + // not otherwise determine whether it is valid to remove it or not + // + // TODO(pauls): We may need to reject such ops, as it would otherwise not be + // generally possible to determine how to handle them during transformations other + // than DCE + self.set_op_proved_live(op.as_operation_ref()); + } + } else if !op.would_be_trivially_dead() { self.set_op_proved_live(op.as_operation_ref()); } @@ -191,6 +225,11 @@ impl Region { rewriter: &mut dyn Rewriter, ) -> Result<(), RegionTransformFailed> { log::debug!(target: "region-simplify", "starting region dead code elimination"); + let live_map = Self::compute_liveness(regions); + Self::cleanup_dead_code(regions, rewriter, &live_map) + } + + fn compute_liveness(regions: &[RegionRef]) -> LiveMap { let mut live_map = LiveMap::default(); loop { live_map.mark_unchanged(); @@ -206,8 +245,7 @@ impl Region { break; } } - - Self::cleanup_dead_code(regions, rewriter, &live_map) + live_map } /// Erase the unreachable blocks within the regions in `regions`. @@ -396,3 +434,124 @@ impl Region { } } } + +#[cfg(test)] +mod tests { + use alloc::format; + + use midenc_expect_test::expect_file; + use midenc_session::diagnostics::SourceSpan; + + use super::*; + use crate::{ + Builder, BuilderExt, Op, Type, + derive::{EffectOpInterface, operation}, + dialects::{ + builtin::BuiltinOpBuilder, + test::{TestDialect, TestOpBuilder}, + }, + effects::MemoryEffectOpInterface, + patterns::{NoopRewriterListener, RewriterImpl}, + testing::Test, + traits::{AnyType, Transparent}, + }; + + #[operation( + dialect = TestDialect, + traits(Transparent), + implements(MemoryEffectOpInterface) + )] + #[derive(EffectOpInterface)] + pub struct DebugValue { + #[operand] + #[effects(MemoryEffect())] + value: AnyType, + } + + #[test] + fn transparent_ops_are_not_considered_dead_unless_their_referent_value_is_dead() { + let mut test = + Test::new("transparent_ops_inherit_liveness_of_referent", &[Type::U32], &[Type::U32]); + + let op = test.function(); + let mut builder = test.function_builder(); + let entry = builder.entry_block(); + + let builder = builder.builder_mut(); + builder.set_insertion_point_to_end(entry); + + let input = entry.borrow().arguments()[0] as ValueRef; + + let unused_output = builder.add(input, input, SourceSpan::UNKNOWN).unwrap(); + let dead_debug_var = builder.create::(SourceSpan::UNKNOWN); + let dead_debug_var_op = dead_debug_var(unused_output).unwrap(); + + let output = builder.add(input, input, SourceSpan::UNKNOWN).unwrap(); + let live_debug_var = builder.create::(SourceSpan::UNKNOWN); + let live_debug_var_op = live_debug_var(output).unwrap(); + let ret_op = builder.ret([output], SourceSpan::UNKNOWN).unwrap(); + + let region = op.borrow().body().as_region_ref(); + let live_map = Region::compute_liveness(&[region]); + + // A ret op is always live in region dce + assert!(live_map.was_op_proven_live(&ret_op.as_operation_ref())); + // The `output` value must be live because it is an operand of the ret + assert!(live_map.was_proven_live(&output)); + // `live_debug_var_op` is live because `output` is live + assert!(live_map.was_op_proven_live(&live_debug_var_op.as_operation_ref())); + // `input` is live because it is used by the live `add` + assert!(live_map.was_proven_live(&input)); + // `unused_output` must be dead because it has no non-transparent users + assert!(!live_map.was_proven_live(&unused_output)); + // `dead_debug_var_op` must be dead because `unused_output` is dead + assert!(!live_map.was_op_proven_live(&dead_debug_var_op.as_operation_ref())); + } + + #[test] + fn transparent_ops_do_not_interfere_with_dead_code_elimination() { + let mut test = Test::new("transparent_ops_no_dce_interference", &[Type::U32], &[Type::U32]); + + let op = test.function(); + { + let mut builder = test.function_builder(); + let entry = builder.entry_block(); + + let builder = builder.builder_mut(); + builder.set_insertion_point_to_end(entry); + + let input = entry.borrow().arguments()[0] as ValueRef; + + let unused_output = builder.add(input, input, SourceSpan::UNKNOWN).unwrap(); + let dead_debug_var = builder.create::(SourceSpan::UNKNOWN); + let _dead_debug_var_op = dead_debug_var(unused_output).unwrap(); + + let output = builder.add(input, input, SourceSpan::UNKNOWN).unwrap(); + let live_debug_var = builder.create::(SourceSpan::UNKNOWN); + let _live_debug_var_op = live_debug_var(output).unwrap(); + builder.ret([output], SourceSpan::UNKNOWN).unwrap(); + } + + let before = format!("{}", op.borrow().as_operation()); + expect_file!["expected/transparent_ops_do_not_interfere_with_dce_before.hir"] + .assert_eq(&before); + + let region = op.borrow().body().as_region_ref(); + + { + let mut rewriter = RewriterImpl::::new(test.context_rc()); + Region::dead_code_elimination(&[region], &mut rewriter) + .expect("dead code elimination failed unexpectedly"); + } + + let after = format!("{}", op.borrow().as_operation()); + expect_file!["expected/transparent_ops_do_not_interfere_with_dce_after.hir"] + .assert_eq(&after); + + assert_ne!(&before, &after); + assert_eq!(before.matches("test.debug_value").count(), 2); + assert_eq!(before.matches("test.add").count(), 2); + assert_eq!(after.matches("test.debug_value").count(), 1); + assert_eq!(after.matches("test.add").count(), 1); + } +} diff --git a/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_after.hir b/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_after.hir new file mode 100644 index 000000000..3084583bc --- /dev/null +++ b/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_after.hir @@ -0,0 +1,5 @@ +builtin.function public extern("C") @transparent_ops_no_dce_interference(%0: u32) -> u32 { + %2 = test.add %0, %0 <{ overflow = #builtin.overflow }>; + "test.debug_value"(%2) : u32 -> (); + builtin.ret %2 : (u32); +}; \ No newline at end of file diff --git a/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_before.hir b/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_before.hir new file mode 100644 index 000000000..9c44fb38d --- /dev/null +++ b/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_before.hir @@ -0,0 +1,7 @@ +builtin.function public extern("C") @transparent_ops_no_dce_interference(%0: u32) -> u32 { + %1 = test.add %0, %0 <{ overflow = #builtin.overflow }>; + "test.debug_value"(%1) : u32 -> (); + %2 = test.add %0, %0 <{ overflow = #builtin.overflow }>; + "test.debug_value"(%2) : u32 -> (); + builtin.ret %2 : (u32); +}; \ No newline at end of file diff --git a/hir/src/ir/traits.rs b/hir/src/ir/traits.rs index 081342127..c28310b53 100644 --- a/hir/src/ir/traits.rs +++ b/hir/src/ir/traits.rs @@ -319,3 +319,55 @@ pub trait SingleRegion { // pub trait HasParent {} // pub trait ParentOneOf<(T,...)> {} + +/// Marker trait for ops which: +/// +/// * Represent the attachment of metadata to values in the IR +/// * Should not be considered as a "real" user for purposes of determining liveness of its operands +/// * Should not be considered dead unless all of its operands are also dead +/// * Does not result in any code being emitted during codegen +/// +/// The goal of such operations is to attach important metadata, such as debug information, to +/// values in the IR, ensuring that the metadata is preserved through transformations, while not +/// interfering with optimizations that may make the original value dead except for the uses by +/// transparent ops. +#[operation_trait] +pub trait Transparent { + #[verifier] + fn has_no_results(op: &Operation, context: &Context) -> Result<(), Report> { + if op.results().is_empty() { + Ok(()) + } else { + Err(context + .diagnostics() + .diagnostic(Severity::Error) + .with_message(::alloc::format!("invalid operation {}", op.name())) + .with_primary_label(op.span(), "expected operation to have no results") + .with_help( + "this operator implements 'Transparent', which requires it to have no results", + ) + .into_report()) + } + } + + #[verifier] + fn has_no_more_than_one_operand(op: &Operation, context: &Context) -> Result<(), Report> { + if op.num_operands() > 1 { + Err(context + .diagnostics() + .diagnostic(Severity::Error) + .with_message(::alloc::format!("invalid operation {}", op.name())) + .with_primary_label( + op.span(), + "expected operation to have no more than one operand", + ) + .with_help( + "this operator implements 'Transparent', which requires it to have an arity < \ + 2", + ) + .into_report()) + } else { + Ok(()) + } + } +} diff --git a/hir/src/ir/value.rs b/hir/src/ir/value.rs index d09352e2d..ad5bf9378 100644 --- a/hir/src/ir/value.rs +++ b/hir/src/ir/value.rs @@ -10,7 +10,7 @@ pub use self::{ stack::StackOperand, }; use super::*; -use crate::{DynHash, DynPartialEq, PartialEqable, any::AsAny, interner}; +use crate::{DynHash, DynPartialEq, PartialEqable, any::AsAny, interner, traits::Transparent}; /// A unique identifier for a [Value] in the IR #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -175,6 +175,13 @@ pub trait Value: self.iter_uses() .any(|user| user.owner.parent().is_some_and(|blk| !BlockRef::ptr_eq(&blk, block))) } + /// Returns true if this value has at least one non-transparent user that should keep it alive + fn has_real_uses(&self) -> bool { + // The value is used so long as at least one using op is not Transparent. + self.uses() + .iter() + .any(|user| !user.owner.borrow().implements::()) + } /// Replace all uses of `self` with `replacement` fn replace_all_uses_with(&mut self, mut replacement: ValueRef) { let mut cursor = self.uses_mut().front_mut(); diff --git a/hir/src/lib.rs b/hir/src/lib.rs index 1b01ee544..1aee97a7c 100644 --- a/hir/src/lib.rs +++ b/hir/src/lib.rs @@ -26,6 +26,7 @@ #![feature(extend_one)] #![feature(extend_one_unchecked)] #![feature(iter_advance_by)] +#![feature(iter_intersperse)] #![feature(iter_next_chunk)] #![feature(iter_collect_into)] #![feature(trusted_len)] @@ -82,8 +83,8 @@ pub use midenc_session::diagnostics; pub use self::{ attributes::{ - Attribute, AttributeName, AttributeRef, AttributeRegistration, NamedAttribute, - NamedAttributeList, + Attribute, AttributeName, AttributeRef, AttributeRegistration, AttributeValue, + NamedAttribute, NamedAttributeList, }, dialects::builtin::attributes::{Location, Overflow, Visibility, version}, direction::{Backward, Direction, Forward}, diff --git a/hir/src/patterns/rewriter.rs b/hir/src/patterns/rewriter.rs index 2738890bb..925509494 100644 --- a/hir/src/patterns/rewriter.rs +++ b/hir/src/patterns/rewriter.rs @@ -6,10 +6,11 @@ use smallvec::SmallVec; use crate::{ BlockRef, Builder, Context, InsertionGuard, Listener, ListenerType, OpBuilder, OpOperandImpl, - OperationRef, PostOrderBlockIter, ProgramPoint, RegionRef, Report, SourceSpan, Usable, + OperationRef, PostOrderBlockIter, ProgramPoint, RegionRef, Report, SourceSpan, Usable, Value, ValueRef, formatter::{DisplayOptional, DisplayValues}, patterns::Pattern, + traits::Transparent, }; /// A [Rewriter] is a [Builder] extended with additional functionality that is of primary use when @@ -51,7 +52,24 @@ pub trait Rewriter: Builder + RewriterListener { /// This method erases an operation that is known to have no uses. fn erase_op(&mut self, mut op: OperationRef) { - assert!(!op.borrow().is_used(), "expected op to have no uses"); + // Assert `op` has no real uses, and erase any transparent users as they are now dead + { + let op = op.borrow(); + for result in op.results().iter() { + let result = result.borrow(); + for user in result.iter_uses() { + log::info!(target: "erase_op", "{}", user.owner.borrow()); + } + assert!(!result.has_real_uses(), "expected op to have no real uses"); + // If there are remaining uses, they must be transparent, so remove them + for user in result.iter_uses() { + let owner = user.owner; + drop(user); + assert!(owner.borrow().implements::()); + self.erase_op(owner); + } + } + } // If no listener is attached, the op can be dropped all at once. if !self.has_listener() { diff --git a/midenc-compile/src/stages/rewrite.rs b/midenc-compile/src/stages/rewrite.rs index 637d3529e..ee2ca94ae 100644 --- a/midenc-compile/src/stages/rewrite.rs +++ b/midenc-compile/src/stages/rewrite.rs @@ -108,6 +108,14 @@ impl Stage for ApplyRewritesStage { log::trace!(target: "driver", "after rewrites: {}", input.world.borrow().as_operation()); log::debug!(target: "driver", "rewrites successful"); + // Emit HIR if requested + let session = context.session(); + if session.should_emit(midenc_session::OutputType::Hir) { + session + .emit(midenc_session::OutputMode::Text, &*input.component.borrow()) + .into_diagnostic()?; + } + if context.session().rewrite_only() { log::debug!(target: "driver", "stopping compiler early (rewrite-only=true)"); Err(CompilerStopped.into()) diff --git a/tests/integration-network/src/mockchain/notes/basic_wallet.rs b/tests/integration-network/src/mockchain/notes/basic_wallet.rs index 79bda2c71..4220244d4 100644 --- a/tests/integration-network/src/mockchain/notes/basic_wallet.rs +++ b/tests/integration-network/src/mockchain/notes/basic_wallet.rs @@ -125,7 +125,7 @@ pub fn basic_wallet_p2id_transfers_asset_with_custom_tx_script() { &mut note_rng, ); let tx_measurements = execute_tx(&mut chain, alice_tx_context_builder); - expect!["26217"].assert_eq(tx_script_processing_cycles(&tx_measurements)); + expect!["26223"].assert_eq(tx_script_processing_cycles(&tx_measurements)); eprintln!("\n=== Step 4: Bob consumes p2id note ==="); let consume_tx_context_builder = chain.build_tx_context(bob_id, &[bob_note.id()], &[]).unwrap(); @@ -255,7 +255,7 @@ pub fn basic_wallet_p2ide_allows_recipient_claim() { let consume_tx_context_builder = chain.build_tx_context(bob_id, &[p2ide_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, consume_tx_context_builder); - expect!["21211"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); + expect!["21212"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); // Step 5: verify balances let bob_account = chain.committed_account(bob_id).unwrap(); @@ -380,7 +380,7 @@ pub fn basic_wallet_p2ide_allows_sender_reclaim() { let reclaim_tx_context_builder = chain.build_tx_context(alice_id, &[p2ide_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, reclaim_tx_context_builder); - expect!["22871"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); + expect!["22872"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); // Step 5: verify Alice has her original amount back let alice_account = chain.committed_account(alice_id).unwrap(); diff --git a/tests/integration/src/end_to_end/debuginfo/expected/debug_conditional_assignment.hir b/tests/integration/src/end_to_end/debuginfo/expected/debug_conditional_assignment.hir new file mode 100644 index 000000000..4dc3531c9 --- /dev/null +++ b/tests/integration/src/end_to_end/debuginfo/expected/debug_conditional_assignment.hir @@ -0,0 +1,88 @@ +builtin.component private @root_ns:root@1.0.0 { + builtin.module private @test_rust_6a38c85fac04c065bce04eac12569a730e53b0bf392ff986bec40561d4ead6b0 { + builtin.function public extern("C") @entrypoint(%0: i32) -> i32 { + hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); + %2 = builtin.global_symbol ::@root_ns:root@1.0.0::@test_rust_6a38c85fac04c065bce04eac12569a730e53b0bf392ff986bec40561d4ead6b0::@__stack_pointer : ptr; + %3 = hir.bitcast %2 <{ ty = #builtin.type> }>; + %4 = hir.load %3; + di.debug_value %0 <{ variable = #di.variable<{ name = "x", file = "test_rust_6a38c85fac04c065bce04eac12569a730e53b0bf392ff986bec40561d4ead6b0.rs", line = 19, arg = 0, ty = i32 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+12)]> }> : (i32); + di.debug_declare <{ variable = #di.variable<{ name = "result", file = "test_rust_6a38c85fac04c065bce04eac12569a730e53b0bf392ff986bec40561d4ead6b0.rs", line = 20 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+8)]> }>; + %5 = arith.constant 16 : i32; + %6 = arith.sub %4, %5 <{ overflow = #builtin.overflow }>; + hir.store_local %6 <{ local = #builtin.local_variable<1, i32> }> : (i32); + %7 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %8 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %9 = hir.bitcast %7 <{ ty = #builtin.type }>; + %10 = arith.constant 12 : u32; + %11 = arith.add %9, %10 <{ overflow = #builtin.overflow }>; + %12 = arith.constant 4 : u32; + %13 = arith.mod %11, %12; + hir.assertz %13 <{ code = #builtin.u32<250> }> : (u32); + %14 = hir.int_to_ptr %11 <{ ty = #builtin.type> }>; + hir.store %14, %8 : (ptr, i32); + %15 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %16 = arith.constant 10 : i32; + %17 = hir.bitcast %15 <{ ty = #builtin.type }>; + %18 = hir.bitcast %16 <{ ty = #builtin.type }>; + %19 = arith.gt %17, %18; + %20 = arith.zext %19 <{ ty = #builtin.type }>; + %21 = hir.bitcast %20 <{ ty = #builtin.type }>; + %22 = arith.constant 1 : i32; + %23 = arith.band %21, %22; + %24 = arith.constant 0 : i32; + %25 = arith.neq %23, %24; + cf.cond_br %25 ^block9, ^block10 : (i1); + ^block7(%1: i32): + + ^block8: + %47 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %48 = hir.bitcast %47 <{ ty = #builtin.type }>; + %49 = arith.constant 8 : u32; + %50 = arith.add %48, %49 <{ overflow = #builtin.overflow }>; + %51 = arith.constant 4 : u32; + %52 = arith.mod %50, %51; + hir.assertz %52 <{ code = #builtin.u32<250> }> : (u32); + %53 = hir.int_to_ptr %50 <{ ty = #builtin.type> }>; + %54 = hir.load %53; + builtin.ret %54 : (i32); + ^block9: + %36 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %37 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %38 = arith.constant 1 : i32; + %39 = hir.bitcast %38 <{ ty = #builtin.type }>; + %40 = arith.shl %37, %39; + %41 = hir.bitcast %36 <{ ty = #builtin.type }>; + %42 = arith.constant 8 : u32; + %43 = arith.add %41, %42 <{ overflow = #builtin.overflow }>; + %44 = arith.constant 4 : u32; + %45 = arith.mod %43, %44; + hir.assertz %45 <{ code = #builtin.u32<250> }> : (u32); + %46 = hir.int_to_ptr %43 <{ ty = #builtin.type> }>; + hir.store %46, %40 : (ptr, i32); + cf.br ^block8; + ^block10: + %26 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %27 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %28 = arith.constant 1 : i32; + %29 = arith.add %27, %28 <{ overflow = #builtin.overflow }>; + %30 = hir.bitcast %26 <{ ty = #builtin.type }>; + %31 = arith.constant 8 : u32; + %32 = arith.add %30, %31 <{ overflow = #builtin.overflow }>; + %33 = arith.constant 4 : u32; + %34 = arith.mod %32, %33; + hir.assertz %34 <{ code = #builtin.u32<250> }> : (u32); + %35 = hir.int_to_ptr %32 <{ ty = #builtin.type> }>; + hir.store %35, %29 : (ptr, i32); + cf.br ^block8; + }; + builtin.global_variable private @__stack_pointer : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @global1 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @global2 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/src/end_to_end/debuginfo/expected/debug_multiple_locals.hir b/tests/integration/src/end_to_end/debuginfo/expected/debug_multiple_locals.hir new file mode 100644 index 000000000..af923c727 --- /dev/null +++ b/tests/integration/src/end_to_end/debuginfo/expected/debug_multiple_locals.hir @@ -0,0 +1,80 @@ +builtin.component private @root_ns:root@1.0.0 { + builtin.module private @test_rust_4a512383b70b366e158ab1775d5a9229476df2d2f66794836c3f1deff13c59ae { + builtin.function public extern("C") @entrypoint(%0: i32) -> i32 { + hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); + %2 = builtin.global_symbol ::@root_ns:root@1.0.0::@test_rust_4a512383b70b366e158ab1775d5a9229476df2d2f66794836c3f1deff13c59ae::@__stack_pointer : ptr; + %3 = hir.bitcast %2 <{ ty = #builtin.type> }>; + %4 = hir.load %3; + di.debug_value %0 <{ variable = #di.variable<{ name = "n", file = "test_rust_4a512383b70b366e158ab1775d5a9229476df2d2f66794836c3f1deff13c59ae.rs", line = 19, arg = 0, ty = i32 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+0)]> }> : (i32); + di.debug_declare <{ variable = #di.variable<{ name = "a", file = "test_rust_4a512383b70b366e158ab1775d5a9229476df2d2f66794836c3f1deff13c59ae.rs", line = 20 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+4)]> }>; + di.debug_declare <{ variable = #di.variable<{ name = "b", file = "test_rust_4a512383b70b366e158ab1775d5a9229476df2d2f66794836c3f1deff13c59ae.rs", line = 21 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+8)]> }>; + %5 = arith.constant 16 : i32; + %6 = arith.sub %4, %5 <{ overflow = #builtin.overflow }>; + hir.store_local %6 <{ local = #builtin.local_variable<1, i32> }> : (i32); + %7 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %8 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %9 = hir.bitcast %7 <{ ty = #builtin.type }>; + %10 = arith.constant 4 : u32; + %11 = arith.mod %9, %10; + hir.assertz %11 <{ code = #builtin.u32<250> }> : (u32); + %12 = hir.int_to_ptr %9 <{ ty = #builtin.type> }>; + hir.store %12, %8 : (ptr, i32); + %13 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %14 = arith.constant 1 : i32; + %15 = arith.add %13, %14 <{ overflow = #builtin.overflow }>; + hir.store_local %15 <{ local = #builtin.local_variable<2, i32> }> : (i32); + %16 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %17 = hir.load_local <{ local = #builtin.local_variable<2, i32> }>; + %18 = hir.bitcast %16 <{ ty = #builtin.type }>; + %19 = arith.constant 4 : u32; + %20 = arith.add %18, %19 <{ overflow = #builtin.overflow }>; + %21 = arith.constant 4 : u32; + %22 = arith.mod %20, %21; + hir.assertz %22 <{ code = #builtin.u32<250> }> : (u32); + %23 = hir.int_to_ptr %20 <{ ty = #builtin.type> }>; + hir.store %23, %17 : (ptr, i32); + %24 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %25 = arith.constant 1 : i32; + %26 = hir.bitcast %25 <{ ty = #builtin.type }>; + %27 = arith.shl %24, %26; + hir.store_local %27 <{ local = #builtin.local_variable<3, i32> }> : (i32); + %28 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %29 = hir.load_local <{ local = #builtin.local_variable<3, i32> }>; + %30 = hir.bitcast %28 <{ ty = #builtin.type }>; + %31 = arith.constant 8 : u32; + %32 = arith.add %30, %31 <{ overflow = #builtin.overflow }>; + %33 = arith.constant 4 : u32; + %34 = arith.mod %32, %33; + hir.assertz %34 <{ code = #builtin.u32<250> }> : (u32); + %35 = hir.int_to_ptr %32 <{ ty = #builtin.type> }>; + hir.store %35, %29 : (ptr, i32); + %36 = hir.load_local <{ local = #builtin.local_variable<2, i32> }>; + %37 = hir.load_local <{ local = #builtin.local_variable<3, i32> }>; + %38 = arith.add %36, %37 <{ overflow = #builtin.overflow }>; + hir.store_local %38 <{ local = #builtin.local_variable<4, i32> }> : (i32); + %39 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %40 = hir.load_local <{ local = #builtin.local_variable<4, i32> }>; + %41 = hir.bitcast %39 <{ ty = #builtin.type }>; + %42 = arith.constant 12 : u32; + %43 = arith.add %41, %42 <{ overflow = #builtin.overflow }>; + %44 = arith.constant 4 : u32; + %45 = arith.mod %43, %44; + hir.assertz %45 <{ code = #builtin.u32<250> }> : (u32); + %46 = hir.int_to_ptr %43 <{ ty = #builtin.type> }>; + hir.store %46, %40 : (ptr, i32); + %47 = hir.load_local <{ local = #builtin.local_variable<4, i32> }>; + builtin.ret %47 : (i32); + ^block7(%1: i32): + + }; + builtin.global_variable private @__stack_pointer : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @global1 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @global2 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/src/end_to_end/debuginfo/expected/debug_nested_loops.hir b/tests/integration/src/end_to_end/debuginfo/expected/debug_nested_loops.hir new file mode 100644 index 000000000..1c215e90e --- /dev/null +++ b/tests/integration/src/end_to_end/debuginfo/expected/debug_nested_loops.hir @@ -0,0 +1,192 @@ +builtin.component private @root_ns:root@1.0.0 { + builtin.module private @test_rust_0c3ae86ec9024ba37eabb5e0f732abe49b77e739f7696fe0d9cb0b5d1f72d1c1 { + builtin.function public extern("C") @entrypoint(%0: i32) -> i32 { + hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); + %2 = builtin.global_symbol ::@root_ns:root@1.0.0::@test_rust_0c3ae86ec9024ba37eabb5e0f732abe49b77e739f7696fe0d9cb0b5d1f72d1c1::@__stack_pointer : ptr; + %3 = hir.bitcast %2 <{ ty = #builtin.type> }>; + %4 = hir.load %3; + di.debug_value %0 <{ variable = #di.variable<{ name = "n", file = "test_rust_0c3ae86ec9024ba37eabb5e0f732abe49b77e739f7696fe0d9cb0b5d1f72d1c1.rs", line = 19, column = 21, arg = 0, ty = i32 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+12)]> }> : (i32); + di.debug_declare <{ variable = #di.variable<{ name = "total", file = "test_rust_0c3ae86ec9024ba37eabb5e0f732abe49b77e739f7696fe0d9cb0b5d1f72d1c1.rs", line = 20 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+0)]> }>; + di.debug_declare <{ variable = #di.variable<{ name = "i", file = "test_rust_0c3ae86ec9024ba37eabb5e0f732abe49b77e739f7696fe0d9cb0b5d1f72d1c1.rs", line = 21 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+4)]> }>; + di.debug_declare <{ variable = #di.variable<{ name = "j", file = "test_rust_0c3ae86ec9024ba37eabb5e0f732abe49b77e739f7696fe0d9cb0b5d1f72d1c1.rs", line = 23 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+8)]> }>; + %5 = arith.constant 16 : i32; + %6 = arith.sub %4, %5 <{ overflow = #builtin.overflow }>; + hir.store_local %6 <{ local = #builtin.local_variable<1, i32> }> : (i32); + %7 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %8 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %9 = hir.bitcast %7 <{ ty = #builtin.type }>; + %10 = arith.constant 12 : u32; + %11 = arith.add %9, %10 <{ overflow = #builtin.overflow }>; + %12 = arith.constant 4 : u32; + %13 = arith.mod %11, %12; + hir.assertz %13 <{ code = #builtin.u32<250> }> : (u32); + %14 = hir.int_to_ptr %11 <{ ty = #builtin.type> }>; + hir.store %14, %8 : (ptr, i32); + %15 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %16 = arith.constant 0 : i32; + %17 = hir.bitcast %15 <{ ty = #builtin.type }>; + %18 = arith.constant 4 : u32; + %19 = arith.mod %17, %18; + hir.assertz %19 <{ code = #builtin.u32<250> }> : (u32); + %20 = hir.int_to_ptr %17 <{ ty = #builtin.type> }>; + hir.store %20, %16 : (ptr, i32); + %21 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %22 = arith.constant 0 : i32; + %23 = hir.bitcast %21 <{ ty = #builtin.type }>; + %24 = arith.constant 4 : u32; + %25 = arith.add %23, %24 <{ overflow = #builtin.overflow }>; + %26 = arith.constant 4 : u32; + %27 = arith.mod %25, %26; + hir.assertz %27 <{ code = #builtin.u32<250> }> : (u32); + %28 = hir.int_to_ptr %25 <{ ty = #builtin.type> }>; + hir.store %28, %22 : (ptr, i32); + cf.br ^block8; + ^block7(%1: i32): + + ^block8: + %30 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %31 = hir.bitcast %30 <{ ty = #builtin.type }>; + %32 = arith.constant 4 : u32; + %33 = arith.add %31, %32 <{ overflow = #builtin.overflow }>; + %34 = arith.constant 4 : u32; + %35 = arith.mod %33, %34; + hir.assertz %35 <{ code = #builtin.u32<250> }> : (u32); + %36 = hir.int_to_ptr %33 <{ ty = #builtin.type> }>; + %37 = hir.load %36; + %38 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %39 = hir.bitcast %37 <{ ty = #builtin.type }>; + %40 = hir.bitcast %38 <{ ty = #builtin.type }>; + %41 = arith.lt %39, %40; + %42 = arith.zext %41 <{ ty = #builtin.type }>; + %43 = hir.bitcast %42 <{ ty = #builtin.type }>; + %44 = arith.constant 1 : i32; + %45 = arith.band %43, %44; + %46 = arith.constant 0 : i32; + %47 = arith.neq %45, %46; + cf.cond_br %47 ^block10, ^block11 : (i1); + ^block9(%29: i32): + + ^block10: + %54 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %55 = arith.constant 0 : i32; + %56 = hir.bitcast %54 <{ ty = #builtin.type }>; + %57 = arith.constant 8 : u32; + %58 = arith.add %56, %57 <{ overflow = #builtin.overflow }>; + %59 = arith.constant 4 : u32; + %60 = arith.mod %58, %59; + hir.assertz %60 <{ code = #builtin.u32<250> }> : (u32); + %61 = hir.int_to_ptr %58 <{ ty = #builtin.type> }>; + hir.store %61, %55 : (ptr, i32); + cf.br ^block12; + ^block11: + %48 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %49 = hir.bitcast %48 <{ ty = #builtin.type }>; + %50 = arith.constant 4 : u32; + %51 = arith.mod %49, %50; + hir.assertz %51 <{ code = #builtin.u32<250> }> : (u32); + %52 = hir.int_to_ptr %49 <{ ty = #builtin.type> }>; + %53 = hir.load %52; + builtin.ret %53 : (i32); + ^block12: + %63 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %64 = hir.bitcast %63 <{ ty = #builtin.type }>; + %65 = arith.constant 8 : u32; + %66 = arith.add %64, %65 <{ overflow = #builtin.overflow }>; + %67 = arith.constant 4 : u32; + %68 = arith.mod %66, %67; + hir.assertz %68 <{ code = #builtin.u32<250> }> : (u32); + %69 = hir.int_to_ptr %66 <{ ty = #builtin.type> }>; + %70 = hir.load %69; + %71 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %72 = hir.bitcast %71 <{ ty = #builtin.type }>; + %73 = arith.constant 4 : u32; + %74 = arith.add %72, %73 <{ overflow = #builtin.overflow }>; + %75 = arith.constant 4 : u32; + %76 = arith.mod %74, %75; + hir.assertz %76 <{ code = #builtin.u32<250> }> : (u32); + %77 = hir.int_to_ptr %74 <{ ty = #builtin.type> }>; + %78 = hir.load %77; + %79 = hir.bitcast %70 <{ ty = #builtin.type }>; + %80 = hir.bitcast %78 <{ ty = #builtin.type }>; + %81 = arith.lt %79, %80; + %82 = arith.zext %81 <{ ty = #builtin.type }>; + %83 = hir.bitcast %82 <{ ty = #builtin.type }>; + %84 = arith.constant 1 : i32; + %85 = arith.band %83, %84; + %86 = arith.constant 0 : i32; + %87 = arith.neq %85, %86; + cf.cond_br %87 ^block14, ^block15 : (i1); + ^block13(%62: i32): + + ^block14: + %105 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %106 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %107 = hir.bitcast %106 <{ ty = #builtin.type }>; + %108 = arith.constant 4 : u32; + %109 = arith.mod %107, %108; + hir.assertz %109 <{ code = #builtin.u32<250> }> : (u32); + %110 = hir.int_to_ptr %107 <{ ty = #builtin.type> }>; + %111 = hir.load %110; + %112 = arith.constant 1 : i32; + %113 = arith.add %111, %112 <{ overflow = #builtin.overflow }>; + %114 = hir.bitcast %105 <{ ty = #builtin.type }>; + %115 = arith.constant 4 : u32; + %116 = arith.mod %114, %115; + hir.assertz %116 <{ code = #builtin.u32<250> }> : (u32); + %117 = hir.int_to_ptr %114 <{ ty = #builtin.type> }>; + hir.store %117, %113 : (ptr, i32); + %118 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %119 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %120 = hir.bitcast %119 <{ ty = #builtin.type }>; + %121 = arith.constant 8 : u32; + %122 = arith.add %120, %121 <{ overflow = #builtin.overflow }>; + %123 = arith.constant 4 : u32; + %124 = arith.mod %122, %123; + hir.assertz %124 <{ code = #builtin.u32<250> }> : (u32); + %125 = hir.int_to_ptr %122 <{ ty = #builtin.type> }>; + %126 = hir.load %125; + %127 = arith.constant 1 : i32; + %128 = arith.add %126, %127 <{ overflow = #builtin.overflow }>; + %129 = hir.bitcast %118 <{ ty = #builtin.type }>; + %130 = arith.constant 8 : u32; + %131 = arith.add %129, %130 <{ overflow = #builtin.overflow }>; + %132 = arith.constant 4 : u32; + %133 = arith.mod %131, %132; + hir.assertz %133 <{ code = #builtin.u32<250> }> : (u32); + %134 = hir.int_to_ptr %131 <{ ty = #builtin.type> }>; + hir.store %134, %128 : (ptr, i32); + cf.br ^block12; + ^block15: + %88 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %89 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %90 = hir.bitcast %89 <{ ty = #builtin.type }>; + %91 = arith.constant 4 : u32; + %92 = arith.add %90, %91 <{ overflow = #builtin.overflow }>; + %93 = arith.constant 4 : u32; + %94 = arith.mod %92, %93; + hir.assertz %94 <{ code = #builtin.u32<250> }> : (u32); + %95 = hir.int_to_ptr %92 <{ ty = #builtin.type> }>; + %96 = hir.load %95; + %97 = arith.constant 1 : i32; + %98 = arith.add %96, %97 <{ overflow = #builtin.overflow }>; + %99 = hir.bitcast %88 <{ ty = #builtin.type }>; + %100 = arith.constant 4 : u32; + %101 = arith.add %99, %100 <{ overflow = #builtin.overflow }>; + %102 = arith.constant 4 : u32; + %103 = arith.mod %101, %102; + hir.assertz %103 <{ code = #builtin.u32<250> }> : (u32); + %104 = hir.int_to_ptr %101 <{ ty = #builtin.type> }>; + hir.store %104, %98 : (ptr, i32); + cf.br ^block8; + }; + builtin.global_variable private @__stack_pointer : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @global1 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @global2 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/src/end_to_end/debuginfo/expected/debug_simple_params.hir b/tests/integration/src/end_to_end/debuginfo/expected/debug_simple_params.hir new file mode 100644 index 000000000..0425b0c80 --- /dev/null +++ b/tests/integration/src/end_to_end/debuginfo/expected/debug_simple_params.hir @@ -0,0 +1,51 @@ +builtin.component private @root_ns:root@1.0.0 { + builtin.module private @test_rust_f87bb1f3934b1e6844c1c9b6ceccb97d68349d13274693606c56673a28b6e537 { + builtin.function public extern("C") @entrypoint(%0: i32, %1: i32) -> i32 { + hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); + hir.store_local %1 <{ local = #builtin.local_variable<1, i32> }> : (i32); + %3 = builtin.global_symbol ::@root_ns:root@1.0.0::@test_rust_f87bb1f3934b1e6844c1c9b6ceccb97d68349d13274693606c56673a28b6e537::@__stack_pointer : ptr; + %4 = hir.bitcast %3 <{ ty = #builtin.type> }>; + %5 = hir.load %4; + di.debug_value %0 <{ variable = #di.variable<{ name = "a", file = "test_rust_f87bb1f3934b1e6844c1c9b6ceccb97d68349d13274693606c56673a28b6e537.rs", line = 19, arg = 0, ty = i32 }>, expression = #di.expression<[DW_OP_fbreg(local, 2+8)]> }> : (i32); + di.debug_value %1 <{ variable = #di.variable<{ name = "b", file = "test_rust_f87bb1f3934b1e6844c1c9b6ceccb97d68349d13274693606c56673a28b6e537.rs", line = 19, arg = 1, ty = i32 }>, expression = #di.expression<[DW_OP_fbreg(local, 2+12)]> }> : (i32); + %6 = arith.constant 16 : i32; + %7 = arith.sub %5, %6 <{ overflow = #builtin.overflow }>; + hir.store_local %7 <{ local = #builtin.local_variable<2, i32> }> : (i32); + %8 = hir.load_local <{ local = #builtin.local_variable<2, i32> }>; + %9 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %10 = hir.bitcast %8 <{ ty = #builtin.type }>; + %11 = arith.constant 8 : u32; + %12 = arith.add %10, %11 <{ overflow = #builtin.overflow }>; + %13 = arith.constant 4 : u32; + %14 = arith.mod %12, %13; + hir.assertz %14 <{ code = #builtin.u32<250> }> : (u32); + %15 = hir.int_to_ptr %12 <{ ty = #builtin.type> }>; + hir.store %15, %9 : (ptr, i32); + %16 = hir.load_local <{ local = #builtin.local_variable<2, i32> }>; + %17 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %18 = hir.bitcast %16 <{ ty = #builtin.type }>; + %19 = arith.constant 12 : u32; + %20 = arith.add %18, %19 <{ overflow = #builtin.overflow }>; + %21 = arith.constant 4 : u32; + %22 = arith.mod %20, %21; + hir.assertz %22 <{ code = #builtin.u32<250> }> : (u32); + %23 = hir.int_to_ptr %20 <{ ty = #builtin.type> }>; + hir.store %23, %17 : (ptr, i32); + %24 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %25 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %26 = arith.add %24, %25 <{ overflow = #builtin.overflow }>; + builtin.ret %26 : (i32); + ^block7(%2: i32): + + }; + builtin.global_variable private @__stack_pointer : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @global1 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @global2 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/src/end_to_end/debuginfo/expected/debug_variable_locations.hir b/tests/integration/src/end_to_end/debuginfo/expected/debug_variable_locations.hir new file mode 100644 index 000000000..2ae0a6ea9 --- /dev/null +++ b/tests/integration/src/end_to_end/debuginfo/expected/debug_variable_locations.hir @@ -0,0 +1,142 @@ +builtin.component private @root_ns:root@1.0.0 { + builtin.module private @test_rust_abc71f0181dc4e2804cd5c8e0cf50175a2515fac3096a395915f9b0604eb287b { + builtin.function public extern("C") @entrypoint(%0: i32) -> i32 { + hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); + %2 = builtin.global_symbol ::@root_ns:root@1.0.0::@test_rust_abc71f0181dc4e2804cd5c8e0cf50175a2515fac3096a395915f9b0604eb287b::@__stack_pointer : ptr; + %3 = hir.bitcast %2 <{ ty = #builtin.type> }>; + %4 = hir.load %3; + di.debug_value %0 <{ variable = #di.variable<{ name = "n", file = "test_rust_abc71f0181dc4e2804cd5c8e0cf50175a2515fac3096a395915f9b0604eb287b.rs", line = 19, arg = 0, ty = i32 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+12)]> }> : (i32); + di.debug_declare <{ variable = #di.variable<{ name = "sum", file = "test_rust_abc71f0181dc4e2804cd5c8e0cf50175a2515fac3096a395915f9b0604eb287b.rs", line = 20 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+4)]> }>; + di.debug_declare <{ variable = #di.variable<{ name = "i", file = "test_rust_abc71f0181dc4e2804cd5c8e0cf50175a2515fac3096a395915f9b0604eb287b.rs", line = 21 }>, expression = #di.expression<[DW_OP_fbreg(local, 1+8)]> }>; + %5 = arith.constant 16 : i32; + %6 = arith.sub %4, %5 <{ overflow = #builtin.overflow }>; + hir.store_local %6 <{ local = #builtin.local_variable<1, i32> }> : (i32); + %7 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %8 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %9 = hir.bitcast %7 <{ ty = #builtin.type }>; + %10 = arith.constant 12 : u32; + %11 = arith.add %9, %10 <{ overflow = #builtin.overflow }>; + %12 = arith.constant 4 : u32; + %13 = arith.mod %11, %12; + hir.assertz %13 <{ code = #builtin.u32<250> }> : (u32); + %14 = hir.int_to_ptr %11 <{ ty = #builtin.type> }>; + hir.store %14, %8 : (ptr, i32); + %15 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %16 = arith.constant 0 : i32; + %17 = hir.bitcast %15 <{ ty = #builtin.type }>; + %18 = arith.constant 4 : u32; + %19 = arith.add %17, %18 <{ overflow = #builtin.overflow }>; + %20 = arith.constant 4 : u32; + %21 = arith.mod %19, %20; + hir.assertz %21 <{ code = #builtin.u32<250> }> : (u32); + %22 = hir.int_to_ptr %19 <{ ty = #builtin.type> }>; + hir.store %22, %16 : (ptr, i32); + %23 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %24 = arith.constant 0 : i32; + %25 = hir.bitcast %23 <{ ty = #builtin.type }>; + %26 = arith.constant 8 : u32; + %27 = arith.add %25, %26 <{ overflow = #builtin.overflow }>; + %28 = arith.constant 4 : u32; + %29 = arith.mod %27, %28; + hir.assertz %29 <{ code = #builtin.u32<250> }> : (u32); + %30 = hir.int_to_ptr %27 <{ ty = #builtin.type> }>; + hir.store %30, %24 : (ptr, i32); + cf.br ^block8; + ^block7(%1: i32): + + ^block8: + %32 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %33 = hir.bitcast %32 <{ ty = #builtin.type }>; + %34 = arith.constant 8 : u32; + %35 = arith.add %33, %34 <{ overflow = #builtin.overflow }>; + %36 = arith.constant 4 : u32; + %37 = arith.mod %35, %36; + hir.assertz %37 <{ code = #builtin.u32<250> }> : (u32); + %38 = hir.int_to_ptr %35 <{ ty = #builtin.type> }>; + %39 = hir.load %38; + %40 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %41 = hir.bitcast %39 <{ ty = #builtin.type }>; + %42 = hir.bitcast %40 <{ ty = #builtin.type }>; + %43 = arith.lte %41, %42; + %44 = arith.zext %43 <{ ty = #builtin.type }>; + %45 = hir.bitcast %44 <{ ty = #builtin.type }>; + %46 = arith.constant 1 : i32; + %47 = arith.band %45, %46; + %48 = arith.constant 0 : i32; + %49 = arith.neq %47, %48; + cf.cond_br %49 ^block10, ^block11 : (i1); + ^block9(%31: i32): + + ^block10: + %58 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %59 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %60 = hir.bitcast %59 <{ ty = #builtin.type }>; + %61 = arith.constant 8 : u32; + %62 = arith.add %60, %61 <{ overflow = #builtin.overflow }>; + %63 = arith.constant 4 : u32; + %64 = arith.mod %62, %63; + hir.assertz %64 <{ code = #builtin.u32<250> }> : (u32); + %65 = hir.int_to_ptr %62 <{ ty = #builtin.type> }>; + %66 = hir.load %65; + %67 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %68 = hir.bitcast %67 <{ ty = #builtin.type }>; + %69 = arith.constant 4 : u32; + %70 = arith.add %68, %69 <{ overflow = #builtin.overflow }>; + %71 = arith.constant 4 : u32; + %72 = arith.mod %70, %71; + hir.assertz %72 <{ code = #builtin.u32<250> }> : (u32); + %73 = hir.int_to_ptr %70 <{ ty = #builtin.type> }>; + %74 = hir.load %73; + %75 = arith.add %66, %74 <{ overflow = #builtin.overflow }>; + %76 = hir.bitcast %58 <{ ty = #builtin.type }>; + %77 = arith.constant 4 : u32; + %78 = arith.add %76, %77 <{ overflow = #builtin.overflow }>; + %79 = arith.constant 4 : u32; + %80 = arith.mod %78, %79; + hir.assertz %80 <{ code = #builtin.u32<250> }> : (u32); + %81 = hir.int_to_ptr %78 <{ ty = #builtin.type> }>; + hir.store %81, %75 : (ptr, i32); + %82 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %83 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %84 = hir.bitcast %83 <{ ty = #builtin.type }>; + %85 = arith.constant 8 : u32; + %86 = arith.add %84, %85 <{ overflow = #builtin.overflow }>; + %87 = arith.constant 4 : u32; + %88 = arith.mod %86, %87; + hir.assertz %88 <{ code = #builtin.u32<250> }> : (u32); + %89 = hir.int_to_ptr %86 <{ ty = #builtin.type> }>; + %90 = hir.load %89; + %91 = arith.constant 1 : i32; + %92 = arith.add %90, %91 <{ overflow = #builtin.overflow }>; + %93 = hir.bitcast %82 <{ ty = #builtin.type }>; + %94 = arith.constant 8 : u32; + %95 = arith.add %93, %94 <{ overflow = #builtin.overflow }>; + %96 = arith.constant 4 : u32; + %97 = arith.mod %95, %96; + hir.assertz %97 <{ code = #builtin.u32<250> }> : (u32); + %98 = hir.int_to_ptr %95 <{ ty = #builtin.type> }>; + hir.store %98, %92 : (ptr, i32); + cf.br ^block8; + ^block11: + %50 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %51 = hir.bitcast %50 <{ ty = #builtin.type }>; + %52 = arith.constant 4 : u32; + %53 = arith.add %51, %52 <{ overflow = #builtin.overflow }>; + %54 = arith.constant 4 : u32; + %55 = arith.mod %53, %54; + hir.assertz %55 <{ code = #builtin.u32<250> }> : (u32); + %56 = hir.int_to_ptr %53 <{ ty = #builtin.type> }>; + %57 = hir.load %56; + builtin.ret %57 : (i32); + }; + builtin.global_variable private @__stack_pointer : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @global1 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @global2 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/src/end_to_end/debuginfo/mod.rs b/tests/integration/src/end_to_end/debuginfo/mod.rs index 480253df1..32f32d69d 100644 --- a/tests/integration/src/end_to_end/debuginfo/mod.rs +++ b/tests/integration/src/end_to_end/debuginfo/mod.rs @@ -1 +1,2 @@ mod source_locations; +mod variables; diff --git a/tests/integration/src/end_to_end/debuginfo/variables.rs b/tests/integration/src/end_to_end/debuginfo/variables.rs new file mode 100644 index 000000000..165080468 --- /dev/null +++ b/tests/integration/src/end_to_end/debuginfo/variables.rs @@ -0,0 +1,116 @@ +use std::borrow::Cow; + +use midenc_expect_test::expect_file; + +use crate::{CompilerTestBuilder, testing::setup}; + +fn debug_rustflags() -> [Cow<'static, str>; 6] { + [ + Cow::Borrowed("-C"), + Cow::Borrowed("debuginfo=2"), + Cow::Borrowed("-C"), + Cow::Borrowed("opt-level=0"), + Cow::Borrowed("-C"), + Cow::Borrowed("overflow-checks=off"), + ] +} + +#[test] +fn variable_locations_schedule() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (n: u32) -> u32 { + let mut sum = 0u32; + let mut i = 0u32; + while i <= n { + sum += i; + i += 1; + } + sum + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags(debug_rustflags()); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["expected/debug_variable_locations.hir"]); +} + +#[test] +fn debug_simple_params() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (a: u32, b: u32) -> u32 { + a + b + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags(debug_rustflags()); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["expected/debug_simple_params.hir"]); +} + +#[test] +fn debug_conditional_assignment() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (x: u32) -> u32 { + let result = if x > 10 { x * 2 } else { x + 1 }; + result + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags(debug_rustflags()); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["expected/debug_conditional_assignment.hir"]); +} + +#[test] +fn debug_multiple_locals() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (n: u32) -> u32 { + let a: u32 = n + 1; + let b: u32 = n * 2; + let c: u32 = a + b; + c + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags(debug_rustflags()); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["expected/debug_multiple_locals.hir"]); +} + +#[test] +fn debug_nested_loops() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (n: u32) -> u32 { + let mut total = 0u32; + let mut i = 0u32; + while i < n { + let mut j = 0u32; + while j < i { + total += 1; + j += 1; + } + i += 1; + } + total + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags(debug_rustflags()); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["expected/debug_nested_loops.hir"]); +} diff --git a/tests/integration/src/end_to_end/examples/basic_wallet_package_sizes.rs b/tests/integration/src/end_to_end/examples/basic_wallet_package_sizes.rs index 48575ec08..603711064 100644 --- a/tests/integration/src/end_to_end/examples/basic_wallet_package_sizes.rs +++ b/tests/integration/src/end_to_end/examples/basic_wallet_package_sizes.rs @@ -10,7 +10,7 @@ fn basic_wallet_and_p2id() { CompilerTest::rust_source_cargo_miden("../../examples/basic-wallet", config.clone(), []); let account_package = account_test.compile_package(); assert!(account_package.is_library(), "expected library"); - expect!["35906"].assert_eq(stripped_mast_size_str(&account_package)); + expect!["36014"].assert_eq(stripped_mast_size_str(&account_package)); let mut tx_script_test = CompilerTest::rust_source_cargo_miden( "../../examples/basic-wallet-tx-script", @@ -19,17 +19,17 @@ fn basic_wallet_and_p2id() { ); let tx_script_package = tx_script_test.compile_package(); assert!(tx_script_package.is_program(), "expected program"); - expect!["56437"].assert_eq(stripped_mast_size_str(&tx_script_package)); + expect!["56555"].assert_eq(stripped_mast_size_str(&tx_script_package)); let mut p2id_test = CompilerTest::rust_source_cargo_miden("../../examples/p2id-note", config.clone(), []); let note_package = p2id_test.compile_package(); assert!(note_package.is_library(), "expected library"); - expect!["53082"].assert_eq(stripped_mast_size_str(¬e_package)); + expect!["53190"].assert_eq(stripped_mast_size_str(¬e_package)); let mut p2ide_test = CompilerTest::rust_source_cargo_miden("../../examples/p2ide-note", config, []); let p2ide_package = p2ide_test.compile_package(); assert!(p2ide_package.is_library(), "expected library"); - expect!["62672"].assert_eq(stripped_mast_size_str(&p2ide_package)); + expect!["62781"].assert_eq(stripped_mast_size_str(&p2ide_package)); } diff --git a/tests/lit/debug/function_metadata.rs b/tests/lit/debug/function_metadata.rs new file mode 100644 index 000000000..7bf99b7dc --- /dev/null +++ b/tests/lit/debug/function_metadata.rs @@ -0,0 +1,14 @@ +#![no_std] +#![no_main] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn multiply(x: u32, y: u32) -> u32 { + x * y +} diff --git a/tests/lit/debug/function_metadata.shtest b/tests/lit/debug/function_metadata.shtest new file mode 100644 index 000000000..5909e6eea --- /dev/null +++ b/tests/lit/debug/function_metadata.shtest @@ -0,0 +1,6 @@ +# Test that HIR includes source locations for function parameters +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/function_metadata.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/function_metadata.wasm\" && bin/midenc \"\$TMPDIR/function_metadata.wasm\" --entrypoint=function_metadata::multiply -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Check that function has source location annotations +# CHECK-LABEL: builtin.function{{.*}}@multiply +# CHECK: loc({{.*}}function_metadata.rs:{{[0-9]+}} diff --git a/tests/lit/debug/lit.suite.toml b/tests/lit/debug/lit.suite.toml new file mode 100644 index 000000000..f95cc52c0 --- /dev/null +++ b/tests/lit/debug/lit.suite.toml @@ -0,0 +1,5 @@ +name = "debug" +patterns = ["*.shtest"] +working_dir = "../../../" + +[format.shtest] diff --git a/tests/lit/debug/location_expressions.rs b/tests/lit/debug/location_expressions.rs new file mode 100644 index 000000000..c899970e3 --- /dev/null +++ b/tests/lit/debug/location_expressions.rs @@ -0,0 +1,25 @@ +// Test file to verify location expressions in debug info +// Using no_std to avoid runtime overhead + +#![no_std] +#![no_main] + +#[panic_handler] +fn panic(_: &core::panic::PanicInfo) -> ! { + loop {} +} + +#[no_mangle] +pub extern "C" fn test_expressions(p0: i32, p1: i32, p2: i32, p3: i32) -> i32 { + // These parameters should be in WASM locals 0, 1, 2, 3 + // The debug info expressions should show: + // p0 -> DW_OP_WASM_local 0 + // p1 -> DW_OP_WASM_local 1 + // p2 -> DW_OP_WASM_local 2 + // p3 -> DW_OP_WASM_local 3 + + // Simple arithmetic using all parameters + let sum1 = p0.wrapping_add(p1); + let sum2 = p2.wrapping_add(p3); + sum1.wrapping_add(sum2) +} \ No newline at end of file diff --git a/tests/lit/debug/location_expressions.shtest b/tests/lit/debug/location_expressions.shtest new file mode 100644 index 000000000..0d7ca8088 --- /dev/null +++ b/tests/lit/debug/location_expressions.shtest @@ -0,0 +1,9 @@ +# Test that debug info with source locations is properly represented in HIR +# This test verifies that operations include source location annotations +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/location_expressions.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/location_expressions.wasm\" && bin/midenc \"\$TMPDIR/location_expressions.wasm\" --entrypoint=location_expressions::test_expressions -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Test that the function exists with 4 parameters +# CHECK-LABEL: builtin.function{{.*}}@test_expressions({{.*}}: i32, {{.*}}: i32, {{.*}}: i32, {{.*}}: i32) -> i32 + +# Test that operations have source location annotations +# CHECK: loc({{.*}}location_expressions.rs:{{[0-9]+}} diff --git a/tests/lit/debug/simple_debug.rs b/tests/lit/debug/simple_debug.rs new file mode 100644 index 000000000..342241ecb --- /dev/null +++ b/tests/lit/debug/simple_debug.rs @@ -0,0 +1,14 @@ +#![no_std] +#![no_main] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn add(a: u32, b: u32) -> u32 { + a + b +} diff --git a/tests/lit/debug/simple_debug.shtest b/tests/lit/debug/simple_debug.shtest new file mode 100644 index 000000000..073ec66e3 --- /dev/null +++ b/tests/lit/debug/simple_debug.shtest @@ -0,0 +1,6 @@ +# Test that basic debug info source locations are emitted for a simple function +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/simple_debug.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/simple_debug.wasm\" && bin/midenc \"\$TMPDIR/simple_debug.wasm\" --entrypoint=simple_debug::add -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Check that function has source location annotations +# CHECK-LABEL: builtin.function{{.*}}@add +# CHECK: loc({{.*}}simple_debug.rs:{{[0-9]+}} diff --git a/tests/lit/debug/variable_locations.rs b/tests/lit/debug/variable_locations.rs new file mode 100644 index 000000000..623d268de --- /dev/null +++ b/tests/lit/debug/variable_locations.rs @@ -0,0 +1,21 @@ +#![no_std] +#![no_main] +#![allow(unused_unsafe)] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn entrypoint(n: u32) -> u32 { + let mut sum = 0u32; + let mut i = 0u32; + while i <= n { + sum = sum + i; + i = i + 1; + } + sum +} diff --git a/tests/lit/debug/variable_locations.shtest b/tests/lit/debug/variable_locations.shtest new file mode 100644 index 000000000..2ddf07a5a --- /dev/null +++ b/tests/lit/debug/variable_locations.shtest @@ -0,0 +1,6 @@ +# Test that debug info tracks source locations in a loop +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/variable_locations.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/variable_locations.wasm\" && bin/midenc \"\$TMPDIR/variable_locations.wasm\" --entrypoint=variable_locations::entrypoint -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Check that function has source location annotations +# CHECK-LABEL: builtin.function{{.*}}@entrypoint +# CHECK: loc({{.*}}variable_locations.rs:{{[0-9]+}} diff --git a/tests/lit/debugdump/lit.suite.toml b/tests/lit/debugdump/lit.suite.toml new file mode 100644 index 000000000..00831fd3c --- /dev/null +++ b/tests/lit/debugdump/lit.suite.toml @@ -0,0 +1,11 @@ +name = "debugdump" +patterns = ["*.wat"] +working_dir = "../../../" + +[substitutions] +"midenc" = "$$MIDENC_BIN_DIR/midenc" +"miden-objtool" = "$$MIDENC_BIN_DIR/miden-objtool" +"%cargo" = "cargo +$$CARGO_MAKE_RUSTUP_TOOLCHAIN_NAME" +"%target_dir" = "$$CARGO_TARGET_DIR" + +[format.shtest] diff --git a/tests/lit/debugdump/locations-source-loc.wat b/tests/lit/debugdump/locations-source-loc.wat new file mode 100644 index 000000000..380ec9d73 --- /dev/null +++ b/tests/lit/debugdump/locations-source-loc.wat @@ -0,0 +1,25 @@ +;; Test that .debug_loc section shows DebugVar entries with source locations +;; from a real Rust project compiled with debug info. +;; +;; RUN: %cargo build --target-dir %target_dir/debugdump-source-location --release --target wasm32-unknown-unknown --manifest-path tests/lit/source-location/test-project/Cargo.toml 2>&1 +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%target_dir/debugdump-source-location/wasm32-unknown-unknown/release/source_location_test.wasm' --lib --debug full -o \"\$TMPFILE\" && miden-objtool dump debug-info \"\$TMPFILE\" --section locations" | filecheck %s + +;; Check header +;; CHECK: .debug_loc contents (DebugVar entries from MAST): +;; CHECK: Total DebugVar entries: 4 +;; CHECK: Unique variable names: 3 + +;; Check variable "arg0" - parameter from test_assertion function +;; CHECK: Variable: "arg0" +;; CHECK: 1 location entries: +;; CHECK: FMP-4 (param #1) + +;; Check variable "local3" - from panic handler +;; CHECK: Variable: "local3" +;; CHECK: 1 location entries: +;; CHECK: FMP-1 + +;; Check variable "x" - parameter from entrypoint function +;; CHECK: Variable: "x" +;; CHECK: 2 location entries: +;; CHECK: FMP-4 (param #1) diff --git a/tests/lit/debugdump/locations.wat b/tests/lit/debugdump/locations.wat new file mode 100644 index 000000000..37bd8d372 --- /dev/null +++ b/tests/lit/debugdump/locations.wat @@ -0,0 +1,22 @@ +;; Test that .debug_loc section is present and handles empty case +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-objtool dump debug-info \"\$TMPFILE\" --section locations" | filecheck %s + +;; Check header for .debug_loc section +;; CHECK: .debug_loc contents (DebugVar entries from MAST): +;; For raw WAT files without debug info, we expect no decorators +;; CHECK: (no DebugVar entries found) + +(module + (func $add (export "add") (param i32 i32) (result i32) + local.get 0 + local.get 1 + i32.add + ) + + (func $entrypoint (export "entrypoint") + i32.const 5 + i32.const 3 + call $add + drop + ) +) diff --git a/tests/lit/debugdump/simple.wat b/tests/lit/debugdump/simple.wat new file mode 100644 index 000000000..d3ebbd6d2 --- /dev/null +++ b/tests/lit/debugdump/simple.wat @@ -0,0 +1,32 @@ +;; Test that miden-objtool correctly parses and displays debug info from a .masp file +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-objtool dump debug-info \"\$TMPFILE\"" | filecheck %s + +;; Check header +;; CHECK: DEBUG INFO DUMP: +;; CHECK: Debug info versions: + +;; Check summary section is present +;; CHECK: .debug_info summary: +;; CHECK: Strings: +;; CHECK: Types: +;; CHECK: Files: +;; CHECK: Functions: + +;; Check that we have functions from the WAT +;; CHECK: .debug_functions contents: +;; CHECK: FUNCTION: add +;; CHECK: FUNCTION: multiply + +(module + (func $add (export "add") (param $a i32) (param $b i32) (result i32) + local.get $a + local.get $b + i32.add + ) + + (func $multiply (export "multiply") (param $x i32) (param $y i32) (result i32) + local.get $x + local.get $y + i32.mul + ) +) diff --git a/tests/lit/debugdump/summary.wat b/tests/lit/debugdump/summary.wat new file mode 100644 index 000000000..102425333 --- /dev/null +++ b/tests/lit/debugdump/summary.wat @@ -0,0 +1,21 @@ +;; Test that miden-objtool --summary shows only summary output +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-objtool dump debuginfo \"\$TMPFILE\" --summary" | filecheck %s + +;; Check summary is present +;; CHECK: .debug_info summary: +;; CHECK: Strings: +;; CHECK: Types:{{.*}}entries +;; CHECK: Files:{{.*}}entries +;; CHECK: Functions:{{.*}}entries + +;; Make sure full dump sections are NOT present with --summary +;; CHECK-NOT: .debug_str contents: +;; CHECK-NOT: .debug_types contents: +;; CHECK-NOT: .debug_files contents: +;; CHECK-NOT: .debug_functions contents: + +(module + (func $test (export "test") (param i32) (result i32) + local.get 0 + ) +) diff --git a/tests/lit/lit.suite.toml b/tests/lit/lit.suite.toml index 826f0e25a..834e059fb 100644 --- a/tests/lit/lit.suite.toml +++ b/tests/lit/lit.suite.toml @@ -4,6 +4,7 @@ patterns = ["*.wat", "*.masm", "*.stderr"] [substitutions] "midenc" = "$$MIDENC_BIN_DIR/midenc" "hir-opt" = "$$MIDENC_BIN_DIR/hir-opt" +"miden-objtool" = "$$MIDENC_BIN_DIR/miden-objtool" "%cargo" = "cargo +$$CARGO_MAKE_RUSTUP_TOOLCHAIN_NAME" "%target_dir" = "$$CARGO_TARGET_DIR" diff --git a/tests/lit/source-location/test-project/Cargo.toml b/tests/lit/source-location/test-project/Cargo.toml new file mode 100644 index 000000000..9abe2f178 --- /dev/null +++ b/tests/lit/source-location/test-project/Cargo.toml @@ -0,0 +1,17 @@ +cargo-features = ["trim-paths"] + +[package] +name = "source_location_test" +version = "0.1.0" +edition = "2024" + +[lib] +crate-type = ["cdylib"] + +[profile.release] +debug = true +trim-paths = ["diagnostics", "object"] + +[profile.dev] +debug = true +trim-paths = ["diagnostics", "object"] diff --git a/tests/lit/source-location/test-project/src/lib.rs b/tests/lit/source-location/test-project/src/lib.rs new file mode 100644 index 000000000..35082cd10 --- /dev/null +++ b/tests/lit/source-location/test-project/src/lib.rs @@ -0,0 +1,20 @@ +#![no_std] +#![no_main] + +#[panic_handler] +fn my_panic(_info: &core::panic::PanicInfo) -> ! { + core::arch::wasm32::unreachable() +} + +#[unsafe(no_mangle)] +pub extern "C" fn test_assertion(x: u32) -> u32 { + assert!(x > 100, "x should be greater than 100"); + + x +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub fn entrypoint(x: u32) -> u32 { + test_assertion(x) +} diff --git a/tests/lit/variable_locations.rs b/tests/lit/variable_locations.rs new file mode 100644 index 000000000..623d268de --- /dev/null +++ b/tests/lit/variable_locations.rs @@ -0,0 +1,21 @@ +#![no_std] +#![no_main] +#![allow(unused_unsafe)] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn entrypoint(n: u32) -> u32 { + let mut sum = 0u32; + let mut i = 0u32; + while i <= n { + sum = sum + i; + i = i + 1; + } + sum +} diff --git a/tests/support/src/compiler_test.rs b/tests/support/src/compiler_test.rs index 4dadc4390..6b72bd760 100644 --- a/tests/support/src/compiler_test.rs +++ b/tests/support/src/compiler_test.rs @@ -336,28 +336,27 @@ impl CompilerTestBuilder { } } - // All test source types support custom RUSTFLAGS - let mut rustflags_env = None::; - if !self.rustflags.is_empty() { - let mut flags = String::with_capacity( - self.rustflags.iter().map(|flag| flag.len()).sum::() + self.rustflags.len(), - ); - for (i, flag) in self.rustflags.iter().enumerate() { - if i > 0 { - flags.push(' '); - } - flags.push_str(flag.as_ref()); - } - command.env("RUSTFLAGS", &flags); - rustflags_env = Some(flags); - } - // Pipe output of command to terminal command.stdout(Stdio::piped()); // Build test match source { CompilerTestInputType::CargoMiden(config) => { + let mut rustflags_env = None::; + if !self.rustflags.is_empty() { + let mut flags = String::with_capacity( + self.rustflags.iter().map(|flag| flag.len()).sum::() + + self.rustflags.len(), + ); + for (i, flag) in self.rustflags.iter().enumerate() { + if i > 0 { + flags.push(' '); + } + flags.push_str(flag.as_ref()); + } + command.env("RUSTFLAGS", &flags); + rustflags_env = Some(flags); + } maybe_dump_cargo_expand(&config, rustflags_env.as_deref()); let mut args = vec![command.get_program().to_str().unwrap().to_string()]; @@ -416,6 +415,7 @@ impl CompilerTestBuilder { .target_dir .clone() .unwrap_or_else(|| std::env::temp_dir().join(config.name.as_ref())); + let working_dir = working_dir.canonicalize().unwrap_or(working_dir); if working_dir.exists() { fs::remove_dir_all(&working_dir).unwrap(); } @@ -429,9 +429,29 @@ impl CompilerTestBuilder { // Output is the same name as the input, just with a different extension let output_file = basename.with_extension("wasm"); + // `RUSTFLAGS` is for Cargo, direct `rustc` invocations need those flags + // passed via argv. + let mut rustc_flags = Vec::with_capacity(self.rustflags.len()); + let mut flags = self.rustflags.iter().map(|flag| flag.as_ref()); + while let Some(flag) = flags.next() { + if flag == "-C" + && let Some(value) = flags.next() + { + if value == "panic=immediate-abort" { + continue; + } + rustc_flags.extend([flag, value]); + } else { + rustc_flags.push(flag); + } + } + let output = command - .args(["-C", "opt-level=z"]) // optimize for size + .arg("--remap-path-prefix") + .arg(format!("{}=.", working_dir.display())) + .args(["-C", "opt-level=s"]) // optimize for size .args(["-C", "target-feature=+wide-arithmetic"]) + .args(rustc_flags) .arg("--target") .arg(config.target.as_ref()) .arg("-o") diff --git a/tools/cargo-miden/src/commands/build.rs b/tools/cargo-miden/src/commands/build.rs index 1ffe56e91..f27babdb7 100644 --- a/tools/cargo-miden/src/commands/build.rs +++ b/tools/cargo-miden/src/commands/build.rs @@ -198,9 +198,14 @@ fn build_cargo_args(cargo_opts: &CargoOptions) -> Vec { // Add build-std flags required for Miden compilation args.extend( - ["-Z", "build-std=core,alloc,panic_abort", "-Z", "build-std-features="] - .into_iter() - .map(|s| s.to_string()), + [ + "-Z", + "build-std=core,alloc,panic_abort", + "-Z", + "build-std-features=optimize_for_size", + ] + .into_iter() + .map(|s| s.to_string()), ); // Configure profile settings diff --git a/tools/objtool/Cargo.toml b/tools/objtool/Cargo.toml index e2c99c4e8..d8b1cc15a 100644 --- a/tools/objtool/Cargo.toml +++ b/tools/objtool/Cargo.toml @@ -25,6 +25,7 @@ bench = false [dependencies] anyhow.workspace = true clap.workspace = true -miden-core.workspace = true miden-assembly-syntax = { workspace = true, features = ["std"] } +miden-core.workspace = true miden-mast-package.workspace = true +thiserror.workspace = true diff --git a/tools/objtool/src/decorators.rs b/tools/objtool/src/decorators.rs index f0b018471..85bae0147 100644 --- a/tools/objtool/src/decorators.rs +++ b/tools/objtool/src/decorators.rs @@ -9,8 +9,10 @@ use miden_core::{ use miden_mast_package::{Package, TargetType}; #[derive(Debug, Clone, Args)] +#[command(arg_required_else_help = true)] pub struct DecoratorsCommand { /// Path to the input .masp file + #[arg(required = true)] pub path: PathBuf, } diff --git a/tools/objtool/src/dump.rs b/tools/objtool/src/dump.rs new file mode 100644 index 000000000..4a4b518cc --- /dev/null +++ b/tools/objtool/src/dump.rs @@ -0,0 +1,51 @@ +mod debuginfo; + +use clap::{Subcommand, ValueEnum}; + +/// Dump useful information from assembled Miden packages +#[derive(Debug, Subcommand)] +#[command(name = "debuginfo", rename_all = "kebab-case")] +pub enum Dump { + /// Dump debug information encoded in a .masp file + DebugInfo(debuginfo::Config), +} + +/// The set of known sections that we've added dump support for +#[derive(Debug, Clone, Copy, ValueEnum)] +pub enum Section { + /// Show string table + Strings, + /// Show type information + Types, + /// Show source file information + Files, + /// Show function debug information + Functions, + /// Show variable information within functions + Variables, + /// Show variable location decorators from MAST (similar to DWARF .debug_loc) + Locations, +} + +#[derive(Debug, thiserror::Error)] +pub enum DumpError { + #[error("failed to read file: {0}")] + Io(#[from] std::io::Error), + #[error("failed to parse package: {0}")] + Parse(String), + #[error("no debug_info section found in package")] + NoDebugInfo, +} + +impl From for DumpError { + #[inline] + fn from(err: miden_core::serde::DeserializationError) -> Self { + Self::Parse(err.to_string()) + } +} + +pub fn run(command: &Dump) -> Result<(), DumpError> { + match command { + Dump::DebugInfo(config) => debuginfo::dump(config), + } +} diff --git a/tools/objtool/src/dump/debuginfo.rs b/tools/objtool/src/dump/debuginfo.rs new file mode 100644 index 000000000..f1721751c --- /dev/null +++ b/tools/objtool/src/dump/debuginfo.rs @@ -0,0 +1,679 @@ +//! A command to dump debug information from MASP packages +//! +//! Similar to llvm-dwarfdump, this tool parses the `.debug_info` section from compiled MASP +//! packages and displays the debug metadata in a human-readable format. +use std::{collections::BTreeMap, path::PathBuf}; + +use clap::Args; +use miden_core::{ + mast::MastForest, + operations::{DebugVarInfo, DebugVarLocation}, + serde::{Deserializable, SliceReader}, +}; +use miden_mast_package::{ + Package, SectionId, + debug_info::{ + DebugFileInfo, DebugFunctionInfo, DebugFunctionsSection, DebugPrimitiveType, + DebugSourcesSection, DebugTypeIdx, DebugTypeInfo, DebugTypesSection, DebugVariableInfo, + }, +}; + +use super::{DumpError, Section}; + +/// Dump debug information encoded in a .masp file +#[derive(Debug, Args)] +pub struct Config { + /// The input package to dump info from + #[arg(required = true)] + input: PathBuf, + + /// Filter output to a specific section + #[arg(short, long, value_enum)] + section: Option
, + + /// Show all available information + #[arg(short, long)] + verbose: bool, + + /// Show raw indices instead of resolved names + #[arg(long)] + raw: bool, + + /// Only show summary statistics + #[arg(long)] + summary: bool, +} + +pub fn dump(config: &Config) -> Result<(), DumpError> { + // Read the MASP file + let bytes = std::fs::read_to_string(&config.input)?.into_bytes(); + + // Parse the package + let package: Package = Package::read_from(&mut SliceReader::new(&bytes)) + .map_err(|e| DumpError::Parse(e.to_string()))?; + + // Get the MAST forest for location decorators + let mast_forest = package.mast.mast_forest(); + + // Find the three debug sections + let types_section = extract_section::(&package, SectionId::DEBUG_TYPES)?; + let sources_section = + extract_section::(&package, SectionId::DEBUG_SOURCES)?; + let functions_section = + extract_section::(&package, SectionId::DEBUG_FUNCTIONS)?; + + // We need at least one section to proceed + if types_section.is_none() && sources_section.is_none() && functions_section.is_none() { + return Err(DumpError::NoDebugInfo); + } + + // Parse each section (use empty defaults if missing) + let debug_sections = DebugSections { + types: types_section.unwrap_or_default(), + sources: sources_section.unwrap_or_default(), + functions: functions_section.unwrap_or_default(), + }; + + // Print header + println!("{}", "=".repeat(80)); + println!("Package Info:"); + println!(" | Name: {}", &package.name); + println!(" | Version: {}", &package.version); + println!(" | Kind: {}", &package.kind); + println!("Section Versioning:"); + println!(" | Types: {}", debug_sections.types.version); + println!(" | Sources: {}", debug_sections.sources.version); + println!(" | Functions: {}", debug_sections.functions.version); + println!("{}", "=".repeat(80)); + println!(); + + if config.summary { + print_summary(&debug_sections, mast_forest); + return Ok(()); + } + + match config.section { + Some(Section::Strings) => print_strings(&debug_sections), + Some(Section::Types) => print_types(&debug_sections, config.raw), + Some(Section::Files) => print_files(&debug_sections, config.raw), + Some(Section::Functions) => print_functions(&debug_sections, config.raw, config.verbose), + Some(Section::Variables) => print_variables(&debug_sections, config.raw), + Some(Section::Locations) => print_locations(mast_forest, &debug_sections, config.verbose), + None => { + // Print everything + print_summary(&debug_sections, mast_forest); + println!(); + print_strings(&debug_sections); + println!(); + print_types(&debug_sections, config.raw); + println!(); + print_files(&debug_sections, config.raw); + println!(); + print_functions(&debug_sections, config.raw, config.verbose); + println!(); + print_locations(mast_forest, &debug_sections, config.verbose); + } + } + + Ok(()) +} + +fn extract_section(package: &Package, id: SectionId) -> Result, DumpError> +where + T: Deserializable, +{ + let Some(section) = package.sections.iter().find(|s| s.id == id) else { + return Ok(None); + }; + + T::read_from_bytes(§ion.data).map(Some).map_err(DumpError::from) +} + +const FRAME_BASE_LOCAL_MARKER: u32 = 1 << 31; + +fn decode_frame_base_local_offset(encoded: u32) -> Option { + if encoded & FRAME_BASE_LOCAL_MARKER == 0 { + return None; + } + + let low_bits = (encoded & 0xffff) as u16; + Some(i16::from_le_bytes(low_bits.to_le_bytes())) +} + +fn format_debug_var_location(location: &DebugVarLocation) -> String { + if let DebugVarLocation::FrameBase { + global_index, + byte_offset, + } = location + && let Some(offset) = decode_frame_base_local_offset(*global_index) + { + format!("frame_base(FMP{offset:+}){byte_offset:+}") + } else { + location.to_string() + } +} + +/// Holds the three debug info sections with helper accessors. +struct DebugSections { + types: DebugTypesSection, + sources: DebugSourcesSection, + functions: DebugFunctionsSection, +} + +impl DebugSections { + /// Look up a string in the types section's string table. + fn get_type_string(&self, idx: u32) -> Option { + self.types.get_string(idx).map(|s| s.to_string()) + } + + /// Look up a string in the sources section's string table. + fn get_source_string(&self, idx: u32) -> Option { + self.sources.get_string(idx).map(|s| s.to_string()) + } + + /// Look up a string in the functions section's string table. + fn get_func_string(&self, idx: u32) -> Option { + self.functions.get_string(idx).map(|s| s.to_string()) + } + + /// Look up a type by index. + fn get_type(&self, idx: DebugTypeIdx) -> Option<&DebugTypeInfo> { + self.types.get_type(idx) + } + + /// Look up a file by index. + fn get_file(&self, idx: u32) -> Option<&DebugFileInfo> { + self.sources.get_file(idx) + } +} + +fn print_summary(debug_sections: &DebugSections, mast_forest: &MastForest) { + println!("Summary:"); + println!(); + + println!("Types:"); + println!(" | records: {}", &debug_sections.types.types.len()); + println!(" | strings: {}", &debug_sections.types.strings.len()); + println!(); + + println!("Sources:"); + println!(" | records: {}", &debug_sections.sources.files.len()); + println!(" | strings: {}", &debug_sections.sources.strings.len()); + println!(); + + let total_vars: usize = + debug_sections.functions.functions.iter().map(|f| f.variables.len()).sum(); + let total_inlined: usize = + debug_sections.functions.functions.iter().map(|f| f.inlined_calls.len()).sum(); + println!("Functions:"); + println!(" | records: {}", &debug_sections.functions.functions.len()); + println!(" | strings: {}", &debug_sections.functions.strings.len()); + println!(" | variables: {total_vars} (total across all functions)"); + println!(" | inlined: {total_inlined} call sites"); + println!(); + + // Count debug vars in MAST + let debug_var_count = mast_forest.debug_info().debug_vars().len(); + println!("Found {debug_var_count} debug variable records"); +} + +fn print_strings(debug_sections: &DebugSections) { + println!(".debug_str contents:"); + println!("{:-<80}", ""); + + println!(" [types string table]"); + for (idx, s) in debug_sections.types.strings.iter().enumerate() { + println!(" [{:4}] \"{}\"", idx, s); + } + println!(); + println!(" [sources string table]"); + for (idx, s) in debug_sections.sources.strings.iter().enumerate() { + println!(" [{:4}] \"{}\"", idx, s); + } + println!(); + println!(" [functions string table]"); + for (idx, s) in debug_sections.functions.strings.iter().enumerate() { + println!(" [{:4}] \"{}\"", idx, s); + } +} + +fn print_types(debug_sections: &DebugSections, raw: bool) { + println!(".debug_types contents:"); + println!("{:-<80}", ""); + for (idx, ty) in debug_sections.types.types.iter().enumerate() { + print!(" [{:4}] ", idx); + print_type(ty, debug_sections, raw, 0); + println!(); + } +} + +fn print_type(ty: &DebugTypeInfo, debug_sections: &DebugSections, raw: bool, indent: usize) { + let pad = " ".repeat(indent); + match ty { + DebugTypeInfo::Primitive(prim) => { + print!("{}PRIMITIVE: {}", pad, primitive_name(*prim)); + print!(" (size: {} bytes, {} felts)", prim.size_in_bytes(), prim.size_in_felts()); + } + DebugTypeInfo::Pointer { pointee_type_idx } => { + if raw { + print!("{}POINTER -> type[{}]", pad, pointee_type_idx.as_u32()); + } else { + print!("{}POINTER -> ", pad); + if let Some(pointee) = debug_sections.get_type(*pointee_type_idx) { + print_type_brief(pointee, debug_sections); + } else { + print!("", pointee_type_idx.as_u32()); + } + } + } + DebugTypeInfo::Array { + element_type_idx, + count, + } => { + if raw { + print!("{}ARRAY [{}; {:?}]", pad, element_type_idx.as_u32(), count); + } else { + print!("{}ARRAY [", pad); + if let Some(elem) = debug_sections.get_type(*element_type_idx) { + print_type_brief(elem, debug_sections); + } else { + print!(""); + } + match count { + Some(n) => print!("; {}]", n), + None => print!("; ?]"), + } + } + } + DebugTypeInfo::Struct { + name_idx, + size, + fields, + } => { + let name = if raw { + format!("str[{}]", name_idx) + } else { + debug_sections.get_type_string(*name_idx).unwrap_or_else(|| "".into()) + }; + print!("{}STRUCT {} (size: {} bytes)", pad, name, size); + if !fields.is_empty() { + println!(); + for field in fields { + let field_name = if raw { + format!("str[{}]", field.name_idx) + } else { + debug_sections + .get_type_string(field.name_idx) + .unwrap_or_else(|| "".into()) + }; + print!("{} +{:4}: {} : ", pad, field.offset, field_name); + if let Some(fty) = debug_sections.get_type(field.type_idx) { + print_type_brief(fty, debug_sections); + } else { + print!(""); + } + println!(); + } + } + } + DebugTypeInfo::Function { + return_type_idx, + param_type_indices, + } => { + print!("{}FUNCTION (", pad); + for (i, param_idx) in param_type_indices.iter().enumerate() { + if i > 0 { + print!(", "); + } + if raw { + print!("type[{}]", param_idx.as_u32()); + } else if let Some(pty) = debug_sections.get_type(*param_idx) { + print_type_brief(pty, debug_sections); + } else { + print!(""); + } + } + print!(") -> "); + match return_type_idx { + Some(idx) => { + if raw { + print!("type[{}]", idx.as_u32()); + } else if let Some(rty) = debug_sections.get_type(*idx) { + print_type_brief(rty, debug_sections); + } else { + print!(""); + } + } + None => print!("void"), + } + } + DebugTypeInfo::Unknown => { + print!("{}UNKNOWN", pad); + } + } +} + +fn print_type_brief(ty: &DebugTypeInfo, debug_sections: &DebugSections) { + match ty { + DebugTypeInfo::Primitive(prim) => print!("{}", primitive_name(*prim)), + DebugTypeInfo::Pointer { pointee_type_idx } => { + print!("*"); + if let Some(p) = debug_sections.get_type(*pointee_type_idx) { + print_type_brief(p, debug_sections); + } + } + DebugTypeInfo::Array { + element_type_idx, + count, + } => { + print!("["); + if let Some(e) = debug_sections.get_type(*element_type_idx) { + print_type_brief(e, debug_sections); + } + match count { + Some(n) => print!("; {}]", n), + None => print!("]"), + } + } + DebugTypeInfo::Struct { name_idx, .. } => { + print!( + "struct {}", + debug_sections.get_type_string(*name_idx).unwrap_or_else(|| "?".into()) + ); + } + DebugTypeInfo::Function { .. } => print!("fn(...)"), + DebugTypeInfo::Unknown => print!("?"), + } +} + +fn primitive_name(prim: DebugPrimitiveType) -> &'static str { + match prim { + DebugPrimitiveType::Void => "void", + DebugPrimitiveType::Bool => "bool", + DebugPrimitiveType::I8 => "i8", + DebugPrimitiveType::U8 => "u8", + DebugPrimitiveType::I16 => "i16", + DebugPrimitiveType::U16 => "u16", + DebugPrimitiveType::I32 => "i32", + DebugPrimitiveType::U32 => "u32", + DebugPrimitiveType::I64 => "i64", + DebugPrimitiveType::U64 => "u64", + DebugPrimitiveType::I128 => "i128", + DebugPrimitiveType::U128 => "u128", + DebugPrimitiveType::F32 => "f32", + DebugPrimitiveType::F64 => "f64", + DebugPrimitiveType::Felt => "felt", + DebugPrimitiveType::Word => "word", + } +} + +fn print_files(debug_sections: &DebugSections, raw: bool) { + println!(".debug_files contents:"); + println!("{:-<80}", ""); + for (idx, file) in debug_sections.sources.files.iter().enumerate() { + print_file(idx, file, debug_sections, raw); + } +} + +fn print_file(idx: usize, file: &DebugFileInfo, debug_sections: &DebugSections, raw: bool) { + let path = if raw { + format!("str[{}]", file.path_idx) + } else { + debug_sections + .get_source_string(file.path_idx) + .unwrap_or_else(|| "".into()) + }; + + print!(" [{:4}] {}", idx, path); + + if let Some(checksum) = &file.checksum { + print!(" [checksum: "); + for byte in &checksum[..4] { + print!("{:02x}", byte); + } + print!("...]"); + } + + println!(); +} + +fn print_functions(debug_sections: &DebugSections, raw: bool, verbose: bool) { + println!(".debug_functions contents:"); + println!("{:-<80}", ""); + for (idx, func) in debug_sections.functions.functions.iter().enumerate() { + print_function(idx, func, debug_sections, raw, verbose); + println!(); + } +} + +fn print_function( + idx: usize, + func: &DebugFunctionInfo, + debug_sections: &DebugSections, + raw: bool, + verbose: bool, +) { + let name = if raw { + format!("str[{}]", func.name_idx) + } else { + debug_sections + .get_func_string(func.name_idx) + .unwrap_or_else(|| "".into()) + }; + + println!(" [{:4}] FUNCTION: {}", idx, name); + + // Linkage name + if let Some(linkage_idx) = func.linkage_name_idx { + let linkage = if raw { + format!("str[{}]", linkage_idx) + } else { + debug_sections + .get_func_string(linkage_idx) + .unwrap_or_else(|| "".into()) + }; + println!(" Linkage name: {}", linkage); + } + + // Location + let file_path = if raw { + format!("file[{}]", func.file_idx) + } else { + debug_sections + .get_file(func.file_idx) + .and_then(|f| debug_sections.get_source_string(f.path_idx)) + .unwrap_or_else(|| "".into()) + }; + println!(" Location: {}:{}:{}", file_path, func.line, func.column); + + // Type + if let Some(type_idx) = func.type_idx { + print!(" Type: "); + if raw { + println!("type[{}]", type_idx.as_u32()); + } else if let Some(ty) = debug_sections.get_type(type_idx) { + print_type_brief(ty, debug_sections); + println!(); + } else { + println!(""); + } + } + + // MAST root + if let Some(root) = &func.mast_root { + print!(" MAST root: 0x"); + for byte in &root.as_bytes() { + print!("{:02x}", byte); + } + println!(); + } + + // Variables + if !func.variables.is_empty() { + println!(" Variables ({}):", func.variables.len()); + for var in &func.variables { + print_variable(var, debug_sections, raw, verbose); + } + } + + // Inlined calls + if !func.inlined_calls.is_empty() && verbose { + println!(" Inlined calls ({}):", func.inlined_calls.len()); + for call in &func.inlined_calls { + let callee = if raw { + format!("func[{}]", call.callee_idx) + } else { + debug_sections + .functions + .functions + .get(call.callee_idx as usize) + .and_then(|f| debug_sections.get_func_string(f.name_idx)) + .unwrap_or_else(|| "".into()) + }; + let call_file = if raw { + format!("file[{}]", call.file_idx) + } else { + debug_sections + .get_file(call.file_idx) + .and_then(|f| debug_sections.get_source_string(f.path_idx)) + .unwrap_or_else(|| "".into()) + }; + println!( + " - {} inlined at {}:{}:{}", + callee, call_file, call.line, call.column + ); + } + } +} + +fn print_variable( + var: &DebugVariableInfo, + debug_sections: &DebugSections, + raw: bool, + _verbose: bool, +) { + let name = if raw { + format!("str[{}]", var.name_idx) + } else { + debug_sections + .get_func_string(var.name_idx) + .unwrap_or_else(|| "".into()) + }; + + let kind = if var.is_parameter() { + format!("param #{}", var.arg_index) + } else { + "local".to_string() + }; + + print!(" - {} ({}): ", name, kind); + + if raw { + print!("type[{}]", var.type_idx.as_u32()); + } else if let Some(ty) = debug_sections.get_type(var.type_idx) { + print_type_brief(ty, debug_sections); + } else { + print!(""); + } + + print!(" @ {}:{}", var.line, var.column); + + if var.scope_depth > 0 { + print!(" [scope depth: {}]", var.scope_depth); + } + + println!(); +} + +fn print_variables(debug_sections: &DebugSections, raw: bool) { + println!(".debug_variables contents (all functions):"); + println!("{:-<80}", ""); + + for func in &debug_sections.functions.functions { + if func.variables.is_empty() { + continue; + } + + let func_name = debug_sections + .get_func_string(func.name_idx) + .unwrap_or_else(|| "".into()); + println!(" Function: {}", func_name); + + for var in &func.variables { + print_variable(var, debug_sections, raw, false); + } + println!(); + } +} + +/// Prints the .debug_loc section - variable location entries from MAST +/// +/// This is analogous to DWARF's .debug_loc section which contains location +/// lists describing where a variable's value can be found at runtime. +fn print_locations(mast_forest: &MastForest, debug_sections: &DebugSections, verbose: bool) { + println!(".debug_loc contents (DebugVar entries from MAST):"); + println!("{:-<80}", ""); + + // Collect all debug vars from the MastForest + let debug_vars = mast_forest.debug_info().debug_vars(); + + if debug_vars.is_empty() { + println!(" (no DebugVar entries found)"); + return; + } + + // Group by variable name for a cleaner view + let mut by_name: BTreeMap<&str, Vec<(usize, &DebugVarInfo)>> = BTreeMap::new(); + for (idx, info) in debug_vars.iter().enumerate() { + by_name.entry(info.name()).or_default().push((idx, info)); + } + + println!(" Total DebugVar entries: {}", debug_vars.len()); + println!(" Unique variable names: {}", by_name.len()); + println!(); + + for (name, entries) in &by_name { + println!(" Variable: \"{}\"", name); + println!(" {} location entries:", entries.len()); + + for (var_idx, info) in entries { + print!(" [var#{}] ", var_idx); + + // Print value location + print!("{}", format_debug_var_location(info.value_location())); + + // Print argument info if present + if let Some(arg_idx) = info.arg_index() { + print!(" (param #{})", arg_idx); + } + + // Print type info if present and we can resolve it + if let Some(type_id) = info.type_id() { + let type_idx = DebugTypeIdx::from(type_id); + if let Some(ty) = debug_sections.get_type(type_idx) { + print!(" : "); + print_type_brief(ty, debug_sections); + } else { + print!(" : type[{}]", type_id); + } + } + + // Print source location if present + if let Some(loc) = info.location() { + print!(" @ {}:{}:{}", loc.uri, loc.line, loc.column); + } + + println!(); + } + println!(); + } + + // In verbose mode, also show raw list + if verbose { + println!(" Raw debug var list (in order):"); + println!(" {:-<76}", ""); + for (idx, info) in debug_vars.iter().enumerate() { + println!(" [{:4}] {}", idx, info); + } + } +} diff --git a/tools/objtool/src/lib.rs b/tools/objtool/src/lib.rs index 4e5aaef1a..fee5fba57 100644 --- a/tools/objtool/src/lib.rs +++ b/tools/objtool/src/lib.rs @@ -1 +1,2 @@ pub mod decorators; +pub mod dump; diff --git a/tools/objtool/src/main.rs b/tools/objtool/src/main.rs index e69d3d5fd..7bb14f8f5 100644 --- a/tools/objtool/src/main.rs +++ b/tools/objtool/src/main.rs @@ -1,24 +1,19 @@ -use clap::{Parser, Subcommand}; -use miden_assembly_syntax::{Report, diagnostics::reporting}; -use miden_objtool::decorators; +use clap::Parser; +use miden_assembly_syntax::{ + Report, + diagnostics::{IntoDiagnostic, reporting}, +}; +use miden_objtool::{decorators, dump}; +/// Common utilities for analyzing Miden artifacts #[derive(Debug, Parser)] -#[command( - name = "miden-objtool", - version, - about = "Common utilities for analyzing Miden artifacts", - long_about = None, - arg_required_else_help = true, -)] -struct Cli { - #[command(subcommand)] - command: Commands, -} - -#[derive(Debug, Subcommand)] -enum Commands { +#[command(name = "miden-objtool", version, arg_required_else_help = true)] +enum Cli { /// Compare serialized MAST forest sizes after stripping decorators. Decorators(decorators::DecoratorsCommand), + /// Dump various types of information from assembled packages + #[command(subcommand)] + Dump(dump::Dump), } fn main() -> Result<(), Report> { @@ -31,7 +26,8 @@ fn main() -> Result<(), Report> { reporting::set_panic_hook(); } - match &cli.command { - Commands::Decorators(command) => decorators::run(command).map_err(Report::msg), + match &cli { + Cli::Decorators(command) => decorators::run(command).map_err(Report::msg), + Cli::Dump(command) => dump::run(command).into_diagnostic(), } }