From 5a5a09337f636d49d4e808969585f918168374f7 Mon Sep 17 00:00:00 2001 From: djole Date: Fri, 9 Jan 2026 17:56:58 +0100 Subject: [PATCH 01/32] Add debug variable location tracking to HIR and MASM This commit adds debug information support for tracking variable locations through the compilation pipeline: - Add debug info representation on HIR level (builtin.dbg_value ops, DIExpression, DILocalVariable attributes) - Handle DebugVars during lowering from HIR to MASM - Add DebugInfoBuilder for constructing debug metadata - Add RemoveDeadDebugOps pass to clean up debug ops with dead operands - Add miden-debugdump tool for inspecting .debug_info sections in MASP packages (similar to llvm-dwarfdump) - Add documentation for the debug info format (docs/DebugInfoFormat.md, docs/DebugInfoMetadata.md) - Add lit tests for debug functionality Run tests: $ litcheck lit run --verbose tests/lit/debugdump $ litcheck lit run --path bin ./tests/lit/debug/ --- .gitignore | 2 + Cargo.lock | 11 + Makefile.toml | 5 +- codegen/masm/src/lower/lowering.rs | 87 +++ docs/DebugInfoFormat.md | 360 +++++++++ docs/DebugInfoMetadata.md | 216 ++++++ frontend/wasm/src/code_translator/mod.rs | 2 + frontend/wasm/src/module/build_ir.rs | 18 +- frontend/wasm/src/module/debug_info.rs | 697 ++++++++++++++++++ .../wasm/src/module/func_translation_state.rs | 13 +- frontend/wasm/src/module/func_translator.rs | 14 +- .../wasm/src/module/function_builder_ext.rs | 213 +++++- frontend/wasm/src/module/mod.rs | 9 + frontend/wasm/src/module/module_env.rs | 17 +- hir-transform/src/dead_debug_ops.rs | 126 ++++ hir-transform/src/lib.rs | 2 + hir/src/attributes/debug.rs | 254 +++++++ hir/src/dialects/builtin/builders.rs | 21 + hir/src/dialects/builtin/ops.rs | 1 + hir/src/dialects/builtin/ops/debug.rs | 75 ++ midenc-compile/Cargo.toml | 1 + midenc-compile/src/debug_info.rs | 312 ++++++++ midenc-compile/src/lib.rs | 1 + midenc-compile/src/stages/assemble.rs | 59 +- midenc-compile/src/stages/codegen.rs | 20 + midenc-compile/src/stages/rewrite.rs | 18 +- .../expected/debug_variable_locations.hir | 47 ++ .../integration/src/rust_masm_tests/debug.rs | 27 + tests/integration/src/rust_masm_tests/mod.rs | 1 + tests/lit/debug/function_metadata.rs | 14 + tests/lit/debug/function_metadata.shtest | 6 + tests/lit/debug/lit.suite.toml | 5 + tests/lit/debug/location_expressions.rs | 25 + tests/lit/debug/location_expressions.shtest | 9 + tests/lit/debug/simple_debug.rs | 14 + tests/lit/debug/simple_debug.shtest | 6 + tests/lit/debug/variable_locations.rs | 21 + tests/lit/debug/variable_locations.shtest | 6 + tests/lit/debugdump/lit.suite.toml | 5 + tests/lit/debugdump/locations-source-loc.wat | 25 + tests/lit/debugdump/locations.wat | 22 + tests/lit/debugdump/simple.wat | 32 + tests/lit/debugdump/summary.wat | 21 + tests/lit/lit.cfg.py | 51 ++ tests/lit/variable_locations.rs | 21 + tools/debugdump/Cargo.toml | 25 + tools/debugdump/src/main.rs | 638 ++++++++++++++++ 47 files changed, 3560 insertions(+), 15 deletions(-) create mode 100644 docs/DebugInfoFormat.md create mode 100644 docs/DebugInfoMetadata.md create mode 100644 frontend/wasm/src/module/debug_info.rs create mode 100644 hir-transform/src/dead_debug_ops.rs create mode 100644 hir/src/attributes/debug.rs create mode 100644 hir/src/dialects/builtin/ops/debug.rs create mode 100644 midenc-compile/src/debug_info.rs create mode 100644 tests/integration/expected/debug_variable_locations.hir create mode 100644 tests/integration/src/rust_masm_tests/debug.rs create mode 100644 tests/lit/debug/function_metadata.rs create mode 100644 tests/lit/debug/function_metadata.shtest create mode 100644 tests/lit/debug/lit.suite.toml create mode 100644 tests/lit/debug/location_expressions.rs create mode 100644 tests/lit/debug/location_expressions.shtest create mode 100644 tests/lit/debug/simple_debug.rs create mode 100644 tests/lit/debug/simple_debug.shtest create mode 100644 tests/lit/debug/variable_locations.rs create mode 100644 tests/lit/debug/variable_locations.shtest create mode 100644 tests/lit/debugdump/lit.suite.toml create mode 100644 tests/lit/debugdump/locations-source-loc.wat create mode 100644 tests/lit/debugdump/locations.wat create mode 100644 tests/lit/debugdump/simple.wat create mode 100644 tests/lit/debugdump/summary.wat create mode 100644 tests/lit/lit.cfg.py create mode 100644 tests/lit/variable_locations.rs create mode 100644 tools/debugdump/Cargo.toml create mode 100644 tools/debugdump/src/main.rs diff --git a/.gitignore b/.gitignore index c663bcca4..e92412b9b 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ book/ # Ignore Cargo.lock in test projects examples/**/Cargo.lock tests/**/Cargo.lock +**/src/bindings.rs +*.lit_test_times.txt* diff --git a/Cargo.lock b/Cargo.lock index 43a07dbc5..3bd98a720 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2736,6 +2736,16 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "miden-debugdump" +version = "0.8.1" +dependencies = [ + "clap", + "miden-core", + "miden-mast-package", + "miden-thiserror", +] + [[package]] name = "miden-field" version = "0.23.0" @@ -3310,6 +3320,7 @@ dependencies = [ "inventory", "log", "miden-assembly", + "miden-debug-types", "miden-mast-package", "miden-thiserror", "midenc-codegen-masm", diff --git a/Makefile.toml b/Makefile.toml index c5128742d..b5c355d15 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -442,7 +442,10 @@ args = [ "--verbose", "--path", "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/bin", - "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit", + "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/parse", + "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/wasm-translation", + "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/source-location", + "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/debugdump", ] dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt"] diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index 044973b5f..2db93d698 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1262,6 +1262,93 @@ impl HirLowering for arith::Split { } } +impl HirLowering for builtin::DbgValue { + fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + use miden_core::{DebugVarInfo, DebugVarLocation, Felt}; + use midenc_hir::DIExpressionOp; + + // Get the variable info + let var = self.variable(); + + // Build the DebugVarLocation from DIExpression + let expr = self.expression(); + let value = self.value().as_value_ref(); + + // If the value is not on the stack and there's no expression info, + // skip emitting this debug info (the value has been optimized away) + let has_location_expr = expr.operations.first().is_some_and(|op| { + matches!( + op, + DIExpressionOp::WasmStack(_) + | DIExpressionOp::WasmLocal(_) + | DIExpressionOp::ConstU64(_) + | DIExpressionOp::ConstS64(_) + ) + }); + if !has_location_expr && emitter.stack.find(&value).is_none() { + // Value has been dropped and we have no other location info, skip + return Ok(()); + } + let value_location = if let Some(first_op) = expr.operations.first() { + match first_op { + DIExpressionOp::WasmStack(offset) => DebugVarLocation::Stack(*offset as u8), + DIExpressionOp::WasmLocal(idx) => DebugVarLocation::Local(*idx as u16), + DIExpressionOp::WasmGlobal(_) | DIExpressionOp::Deref => { + // For global or dereference, check the stack position of the value + if let Some(pos) = emitter.stack.find(&value) { + DebugVarLocation::Stack(pos as u8) + } else { + DebugVarLocation::Expression(vec![]) + } + } + DIExpressionOp::ConstU64(val) => DebugVarLocation::Const(Felt::new(*val)), + DIExpressionOp::ConstS64(val) => DebugVarLocation::Const(Felt::new(*val as u64)), + _ => { + // For other operations, try to find the value on the stack + if let Some(pos) = emitter.stack.find(&value) { + DebugVarLocation::Stack(pos as u8) + } else { + DebugVarLocation::Expression(vec![]) + } + } + } + } else { + // No expression, try to find the value on the stack + if let Some(pos) = emitter.stack.find(&value) { + DebugVarLocation::Stack(pos as u8) + } else { + // Value not found, use expression + DebugVarLocation::Expression(vec![]) + } + }; + + let mut debug_var = DebugVarInfo::new(var.name.to_string(), value_location); + + // Set arg_index if this is a parameter + if let Some(arg_index) = var.arg_index { + debug_var.set_arg_index(arg_index + 1); // Convert to 1-based + } + + // Set source location + if let Some(line) = core::num::NonZeroU32::new(var.line) { + use miden_assembly::debuginfo::{ColumnNumber, FileLineCol, LineNumber, Uri}; + let uri = Uri::new(var.file.as_str()); + let file_line_col = FileLineCol::new( + uri, + LineNumber::new(line.get()).unwrap_or_default(), + var.column.and_then(ColumnNumber::new).unwrap_or_default(), + ); + debug_var.set_location(file_line_col); + } + + // Emit the instruction + let inst = masm::Instruction::DebugVar(debug_var); + emitter.emit_op(masm::Op::Inst(Span::new(self.span(), inst))); + + Ok(()) + } +} + impl HirLowering for builtin::GlobalSymbol { fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { let context = self.as_operation().context(); diff --git a/docs/DebugInfoFormat.md b/docs/DebugInfoFormat.md new file mode 100644 index 000000000..ae646f71a --- /dev/null +++ b/docs/DebugInfoFormat.md @@ -0,0 +1,360 @@ +# Debug Info Format Specification + +This document describes the `.debug_info` custom section format used in MASP (Miden Assembly Package) files. This section contains source-level debug information that enables debuggers to map between Miden VM execution state and the original source code. + +## Overview + +The debug info section is stored as a custom section in the MASP package with the section ID `debug_info`. It is designed to be: + +- **Compact**: Uses index-based references and string deduplication +- **Self-contained**: All information needed for debugging is in this section +- **Extensible**: Version field allows for future format evolution + +## Section Structure + +The `.debug_info` section contains the following logical subsections: + +``` +┌─────────────────────────────────────────┐ +│ Debug Info Header │ +│ - version (u8) │ +├─────────────────────────────────────────┤ +│ .debug_str │ +│ - String table (deduplicated) │ +├─────────────────────────────────────────┤ +│ .debug_types │ +│ - Type definitions │ +├─────────────────────────────────────────┤ +│ .debug_files │ +│ - Source file information │ +├─────────────────────────────────────────┤ +│ .debug_functions │ +│ - Function metadata │ +│ - Variables (nested) │ +│ - Inlined calls (nested) │ +└─────────────────────────────────────────┘ +``` + +## Format Version + +Current version: **1** + +The version byte is the first field in the section and indicates the format version. Readers should reject sections with unsupported versions. + +--- + +## .debug_str - String Table + +The string table contains all strings used in the debug info, deduplicated to save space. Other sections reference strings by their index into this table. + +### Contents + +- File paths +- Function names +- Variable names +- Type names +- Linkage/mangled names + +### Example Output + +``` +.debug_str contents: + [ 0] "/Users/user/project/src/lib.rs" + [ 1] "my_function" + [ 2] "x" + [ 3] "result" +``` + +--- + +## .debug_types - Type Information + +The type table contains definitions for all types referenced by variables and functions. Types can reference other types by index, allowing for complex type hierarchies. + +### Type Kinds + +| Tag | Kind | Description | +|-----|------|-------------| +| 0 | Primitive | Built-in scalar types | +| 1 | Pointer | Pointer to another type | +| 2 | Array | Fixed or dynamic array | +| 3 | Struct | Composite type with fields | +| 4 | Function | Function signature | +| 5 | Unknown | Opaque/unknown type | + +### Primitive Types + +| Value | Type | Size (bytes) | Size (felts) | +|-------|------|--------------|--------------| +| 0 | void | 0 | 0 | +| 1 | bool | 1 | 1 | +| 2 | i8 | 1 | 1 | +| 3 | u8 | 1 | 1 | +| 4 | i16 | 2 | 1 | +| 5 | u16 | 2 | 1 | +| 6 | i32 | 4 | 1 | +| 7 | u32 | 4 | 1 | +| 8 | i64 | 8 | 2 | +| 9 | u64 | 8 | 2 | +| 10 | i128 | 16 | 4 | +| 11 | u128 | 16 | 4 | +| 12 | f32 | 4 | 2 | +| 13 | f64 | 8 | 2 | +| 14 | felt | 8 | 1 | +| 15 | word | 32 | 4 | + +### Example Output + +``` +.debug_types contents: + [ 0] PRIMITIVE: i32 (size: 4 bytes, 1 felts) + [ 1] PRIMITIVE: felt (size: 8 bytes, 1 felts) + [ 2] POINTER -> i32 + [ 3] ARRAY [felt; 4] + [ 4] STRUCT Point (size: 16 bytes) + + 0: x : felt + + 8: y : felt +``` + +--- + +## .debug_files - Source File Information + +The file table contains information about source files referenced by functions and variables. + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| path_idx | u32 | Index into string table for file path | +| directory_idx | Option\ | Optional index for directory path | +| checksum | Option\<[u8; 32]\> | Optional SHA-256 checksum for verification | + +### Example Output + +``` +.debug_files contents: + [ 0] /Users/user/project/src/lib.rs + [ 1] /rustc/abc123.../library/core/src/panicking.rs + [ 2] unknown +``` + +--- + +## .debug_functions - Function Information + +The function table contains debug metadata for each function in the compiled program. + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| name_idx | u32 | Index into string table for function name | +| linkage_name_idx | Option\ | Optional mangled/linkage name | +| file_idx | u32 | Index into file table | +| line | u32 | Line number where function is defined | +| column | u32 | Column number | +| type_idx | Option\ | Optional function type (index into type table) | +| mast_root | Option\<[u8; 32]\> | MAST root digest linking to compiled code | +| variables | Vec | Local variables and parameters | +| inlined_calls | Vec | Inlined function call sites | + +### Variables + +Each function contains a list of variables (parameters and locals): + +| Field | Type | Description | +|-------|------|-------------| +| name_idx | u32 | Index into string table | +| type_idx | u32 | Index into type table | +| arg_index | u32 | 1-based parameter index (0 = local variable) | +| line | u32 | Declaration line | +| column | u32 | Declaration column | +| scope_depth | u32 | Lexical scope depth (0 = function scope) | + +### Inlined Calls + +For tracking inlined function calls: + +| Field | Type | Description | +|-------|------|-------------| +| callee_idx | u32 | Index into function table for inlined function | +| file_idx | u32 | Call site file | +| line | u32 | Call site line | +| column | u32 | Call site column | + +### Example Output + +``` +.debug_functions contents: + [ 0] FUNCTION: my_function + Location: /Users/user/project/src/lib.rs:10:1 + MAST root: 0xabcd1234... + Variables (3): + - x (param #1): i32 @ 10:14 + - y (param #2): i32 @ 10:22 + - result (local): i32 @ 11:9 [scope depth: 1] + Inlined calls (1): + - helper_fn inlined at lib.rs:12:5 +``` + +--- + +## Usage + +### Generating Debug Info + +Compile with debug info enabled: + +```bash +midenc input.wasm --exe --debug full -o output.masp +``` + +For projects using `trim-paths`, use the `-Z trim-path-prefix` option to preserve absolute paths: + +```bash +midenc input.wasm --exe --debug full \ + -Z trim-path-prefix="/path/to/project" \ + -o output.masp +``` + +### Inspecting Debug Info + +Use the `miden-debugdump` tool to inspect debug info in a MASP file: + +```bash +# Full dump (includes all sections) +miden-debugdump output.masp + +# Summary only +miden-debugdump output.masp --summary + +# Specific section from .debug_info +miden-debugdump output.masp --section functions +miden-debugdump output.masp --section variables +miden-debugdump output.masp --section types +miden-debugdump output.masp --section files +miden-debugdump output.masp --section strings + +# Show DebugVar decorators from MAST (.debug_loc) +miden-debugdump output.masp --section locations + +# Verbose mode (shows additional details like raw decorator list) +miden-debugdump output.masp --section locations --verbose + +# Raw indices (for debugging the debug info itself) +miden-debugdump output.masp --raw +``` + +--- + +## Design Rationale + +### Index-Based References + +All cross-references use indices rather than embedding data directly. This: +- Enables string deduplication (file paths, names appear once) +- Reduces section size +- Allows efficient random access + +### Separation of Concerns + +The section is divided into logical subsections: +- **Strings**: Shared across all other sections +- **Types**: Can be referenced by multiple variables/functions +- **Files**: Shared by multiple functions +- **Functions**: Contains variables and inlined calls inline + +### Compatibility with DWARF + +The format is inspired by DWARF but simplified for Miden's needs: +- No complex DIE tree structure +- No location expressions (handled by `DebugVar` decorators in MAST) +- No line number tables (locations embedded in functions/variables) + +--- + +## Debug Variable Locations + +Debug information in MASP is split between two locations: the `.debug_info` custom section (documented above) and `Decorator::DebugVar` entries embedded in the MAST instruction stream. + +### Architecture Overview + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ MASP Package │ +├──────────────────────────────────────────────────────────────────┤ +│ MAST Forest │ +│ ├── MastNode[] │ +│ │ └── Decorator::DebugVar(DebugVarInfo) ← Runtime locations │ +│ │ • name: "x" │ +│ │ • value_location: Stack(0) / Local(2) / Memory(...) │ +│ │ • source location │ +│ └── String table (for names) │ +├──────────────────────────────────────────────────────────────────┤ +│ .debug_info Section (separate custom section) │ +│ ├── .debug_str (deduplicated strings) │ +│ ├── .debug_types (type definitions) │ +│ ├── .debug_files (source file paths) │ +│ └── .debug_functions (static metadata, variables, inlined) │ +└──────────────────────────────────────────────────────────────────┘ +``` + +### Why Two Locations? + +| Aspect | `Decorator::DebugVar` in MAST | `.debug_info` Section | +|--------|-------------------------------|----------------------| +| **Where stored** | Embedded in instruction stream | Custom section at end of MASP | +| **Purpose** | Runtime value location at specific execution points | Static metadata (types, files, function info) | +| **When used** | During execution, debugger reads variable values | To display type names, source files, etc. | +| **DWARF analog** | Location lists (`.debug_loc`) | `.debug_info` / `.debug_abbrev` | + +The `.debug_info` section tells you **what** variables exist (name, type, scope). The `DebugVar` decorators tell you **where** a variable's value is at a specific point during execution. + +### DebugVarInfo Structure + +Each `Decorator::DebugVar` contains a `DebugVarInfo` with the following fields: + +| Field | Type | Description | +|-------|------|-------------| +| name | String | Variable name | +| value_location | DebugVarLocation | Where to find the value | +| type_id | Option\ | Index into `.debug_types` | +| arg_index | Option\ | 1-based parameter index (if parameter) | +| location | Option\ | Source location of declaration | + +### DebugVarLocation Variants + +The `value_location` field describes where the variable's value can be found at runtime: + +| Variant | Encoding | Description | +|---------|----------|-------------| +| `Stack(u8)` | Tag 0 + u8 | Value is at stack position N (0 = top) | +| `Memory(u32)` | Tag 1 + u32 | Value is at memory word address | +| `Const(u64)` | Tag 2 + u64 | Value is a constant field element | +| `Local(u16)` | Tag 3 + u16 | Value is in local variable slot N | +| `Expression(Vec)` | Tag 4 + len + bytes | Complex location (DWARF-style expression) | + +### Example + +For a function like: +```rust +fn add(x: i32, y: i32) -> i32 { + let sum = x + y; + sum +} +``` + +The MAST will contain decorators like: +``` +# At function entry +Decorator::DebugVar { name: "x", value_location: Local(0), arg_index: Some(1), ... } +Decorator::DebugVar { name: "y", value_location: Local(1), arg_index: Some(2), ... } + +# After computing sum +Decorator::DebugVar { name: "sum", value_location: Stack(0), arg_index: None, ... } +``` + +A debugger pausing at a specific instruction can read these decorators to know where each variable's value is stored at that moment. + +--- diff --git a/docs/DebugInfoMetadata.md b/docs/DebugInfoMetadata.md new file mode 100644 index 000000000..bdf71aa21 --- /dev/null +++ b/docs/DebugInfoMetadata.md @@ -0,0 +1,216 @@ +# Debug Info Metadata Pipeline + +This note describes how the Miden compiler now threads source-level variable +metadata through HIR when compiling Wasm input. The goal is to make every HIR +function carry `DI*` attributes and `dbg.*` intrinsics that mirror the DWARF +records present in the Wasm binary, so downstream passes (or tooling consuming +serialized HIR) can reason about user variables. + +## High-Level Flow + +1. **DWARF ingestion** – while `ModuleEnvironment` parses the module, we retain + the full set of DWARF sections (`.debug_info`, `.debug_line`, etc.) and the + wasm name section. +2. **Metadata extraction** – before we translate functions, we walk the DWARF + using `addr2line` to determine source files and fall back to the wasm module + path when no debug info is present. We also load parameter/local names from + the name section. The result is a `FunctionDebugInfo` record containing a + `DICompileUnitAttr`, `DISubprogramAttr`, and a per-index list of + `DILocalVariableAttr`s. +3. **Translation-time tracking** – every `FuncTranslator` receives the + `FunctionDebugInfo` for the function it is translating. `FunctionBuilderExt` + attaches the compile-unit/subprogram attrs to the function op, records entry + parameters, and emits `builtin.dbg_value` intrinsics whenever locals change. +4. **Span-aware updates** – as each wasm operator is translated we store the + real `SourceSpan`. The first non-unknown span is used to retroactively patch + the compile unit, subprogram, and parameter variable records with real file, + line, and column information so the resulting HIR references surfaces from + the actual user file. + +The emitted HIR therefore contains both the SSA instructions and the debug +intrinsics that map values back to the user program. + +## HIR Metadata Constructs + +The core types live in `hir/src/attributes/debug.rs`: + +- `DICompileUnitAttr` – captures language, primary file, optional directory, + producer string, and optimized flag. Stored once per function/module. +- `DISubprogramAttr` – names the function, file, line/column, optional linkage + name, and flags indicating definition/local status. Does not embed the compile + unit to avoid redundancy - stored once per function. +- `DILocalVariableAttr` – describes parameters or locals, including the source + location, optional argument index, and optional `Type`. Does not embed the + scope to avoid redundancy - the scope is implied by the containing function. +- `DIExpressionAttr` – represents DWARF location expressions that describe how + to compute or locate a variable's value. +- `DIExpressionOp` – individual operations within a DIExpression, including: + - `WasmLocal(u32)` - Variable is in a WebAssembly local + - `WasmGlobal(u32)` - Variable is in a WebAssembly global + - `WasmStack(u32)` - Variable is on the WebAssembly operand stack + - `ConstU64(u64)` - Unsigned constant value + - Additional DWARF operations for complex expressions + +These attrs are exported from `midenc_hir` so clients can construct them +programmatically. The debug intrinsic (`builtin.dbg_value` from +`hir/src/dialects/builtin/ops/debug.rs`) consume a `Value` plus the +metadata attributes. The `dbg_value` operation includes a `DIExpressionAttr` +field that describes the location or computation of the variable's value. + +## Collecting Metadata from Wasm + +`frontend/wasm/src/module/debug_info.rs` is the central collector. The key +steps are: + +1. Iterate over the bodies scheduled for translation (`ParsedModule::function_body_inputs`). +2. For each body, determine the source file and first line using `addr2line` and + store fallbacks (module path or `unknown`) when debug info is missing. +3. Construct `DICompileUnitAttr`/`DISubprogramAttr` and a `Vec>` + that covers both signature parameters and wasm locals. Parameter/local names + sourced from the name section are used when available; otherwise we emit + synthesized names (`arg{n}`, `local{n}`). +4. Store the result in a map `FxHashMap>>` + attached to `ParsedModule`. We use `RefCell` so later stages can patch the + attrs once the translator sees more accurate spans. + +## Using Metadata During Translation + +The translation machinery picks up those records as follows: + +- `build_ir.rs` moves the precomputed map onto the `FuncTranslator` invocation. +- `FuncTranslator::translate_body` installs the debug info on its + `FunctionBuilderExt` before any instructions are emitted. +- `FunctionBuilderExt::set_debug_metadata` attaches compile-unit/subprogram + attrs to the function op and resets its internal bookkeeping. +- Entry parameters are stored via `register_parameter` so we can emit + `dbg.value` instructions after we encounter the first real span (parameters + have no dedicated wasm operator with source ranges). +- Every wasm operator calls `builder.record_debug_span(span)` prior to emission; + the first non-unknown span updates the compile unit/subprogram attrs and + triggers parameter `dbg.value` emission so arguments are tied to the correct + location. +- `def_var_with_dbg` is the canonical entry point for `local.set` and + `local.tee`. It updates the SSA value and immediately emits a + `builtin.dbg_value` with the precise span of the store. +- Decoded `DW_AT_location` ranges are normalized into a per-function schedule. + As the translator visits each wasm offset we opportunistically emit extra + `dbg.value` intrinsics so source variables track transitions between Wasm + locals without relying on `builtin.dbg_declare`. +- When present, `DW_AT_decl_line`/`DW_AT_decl_column` on variables override the + default span so we keep the original lexical definition sites instead of + inheriting the statement we first observed during translation. + +Locals declared in the wasm prologue receive an initial value but no debug +intrinsic until they are defined in user code. Subsequent writes insert +additional `dbg.value` ops so consumers can track value changes over time. + +## Example + +In the serialized HIR for the test pipeline you now see: + +```hir +builtin.dbg_value v0 #[expression = di.expression(DW_OP_WASM_local 0)] + #[variable = di.local_variable( + name = arg0, + file = /path/to/lib.rs, + line = 25, + column = 5, + arg = 1, + ty = i32 + )] # /path/to/lib.rs:25:5; +``` + +The `expression` attribute indicates that the variable is stored in WASM local 0. +When a variable moves between locations, additional `dbg_value` operations are +emitted with updated expressions: + +```hir +builtin.dbg_value v22 #[expression = di.expression(DW_OP_WASM_local 3)] + #[variable = di.local_variable(name = sum, ...)] +``` + +Both the attribute and the trailing comment reference the same source location +so downstream tooling can disambiguate the variable regardless of how it parses +HIR. + +## Kinda Fallback Behavior/Best Effort cases + +- If DWARF lookup fails entirely, we still emit attrs but populate + `file = unknown`, `line = 0`, and omit columns. As soon as a real span is + observed, those fields are patched. +- If the wasm name section lacks parameter/local names, we keep the generated + `arg{n}`/`local{n}` placeholders in the HIR. This mirrors LLVM’s behavior when + debug names are unavailable. + +## What we can do next and what are the limitations + +- **Location expressions** – We now decode `DW_AT_location` records for locals + and parameters, interpret simple Wasm location opcodes (including locals, + globals, and operand-stack slots), and attach them to `dbg.value` operations + as `DIExpressionAttr`. The system emits additional `dbg.value` intrinsics + whenever a variable's storage changes, with each operation containing the + appropriate expression. This allows modeling multi-location lifetimes where + variables move between different storage locations. Support for more complex + composite expressions (pieces, arithmetic operations, etc.) is implemented + but not fully utilized from DWARF parsing yet. +- **Lifetimes** – we reset the compile-unit/subprogram metadata to the first + span we encounter, but we do not track scopes or lexical block DIEs. Extending + the collector to read `DW_TAG_lexical_block` and other scope markers would + allow more precise lifetime modelling. +- **Cross-language inputs** – the language string comes from DWARF or defaults + to `"wasm"`. If the Wasm file was produced by Rust/C compilers we could read + `DW_AT_language` to provide richer values. +- **Incremental spans** – parameter debug entries currently use the first + non-unknown span in the function. For multi-file functions we might wish to + attach per-parameter spans using `DW_AT_decl_file`/`DW_AT_decl_line` if the + DWARF provides them. +- **MASM codegen** – The MASM backend emits `Decorator::DebugVar` entries + containing `DebugVarInfo` with variable names, runtime locations + (`DebugVarLocation::Stack`, `Local`, etc.), source positions, and type + information. These decorators are embedded in the MAST instruction stream, + enabling debuggers to track variable values at specific execution points. + +These refinements can be implemented without changing the public HIR surface; we +would only update the metadata collector and the builder helpers. + +## Testing + +The debug info implementation is validated by lit tests in `tests/lit/debug/`: + +- **simple_debug.shtest** – verifies basic debug info for function parameters +- **function_metadata.shtest** – tests debug metadata on multi-parameter functions +- **variable_locations.shtest** – validates debug info tracking for variables in a loop +- more... + +Each test compiles a small Rust snippet with DWARF enabled (`-C debuginfo=2`), +runs it through `midenc compile --emit hir`, and uses `FileCheck` to verify that +`builtin.dbg_value` intrinsics are emitted with the correct `di.local_variable` +attributes containing variable names, file paths, line numbers, and types. + +To run the debug info tests: + +```bash +/opt/homebrew/bin/lit -va tests/lit/debug/ +``` + +Or to run a specific test: + +```bash +/opt/homebrew/bin/lit -va tests/lit/debug/simple_debug.shtest +``` + +## Bottomline + +- HIR now exposes DWARF-like metadata via reusable `DI*` attributes including + `DIExpressionAttr` for location expressions. +- The wasm frontend precomputes function metadata, keeps it mutable during + translation, and emits `dbg.value` intrinsics with location expressions for + every parameter/variable assignment. +- Location expressions (DW_OP_WASM_local, etc.) are preserved from DWARF and + attached to `dbg.value` operations, enabling accurate tracking of variables + as they move between different storage locations. +- The serialized HIR describes user variables with accurate file/line/column + information and storage locations, providing a foundation for future tooling + (debugging, diagnostics correlation, or IR-level analysis). +- The design avoids redundancy by not embedding scope hierarchies in each variable, + instead relying on structural containment to establish relationships. diff --git a/frontend/wasm/src/code_translator/mod.rs b/frontend/wasm/src/code_translator/mod.rs index 4cef29cbc..2c9f12a35 100644 --- a/frontend/wasm/src/code_translator/mod.rs +++ b/frontend/wasm/src/code_translator/mod.rs @@ -57,6 +57,8 @@ pub fn translate_operator( diagnostics: &DiagnosticsHandler, span: SourceSpan, ) -> WasmResult<()> { + builder.record_debug_span(span); + if !state.reachable { translate_unreachable_operator(op, builder, state, mod_types, diagnostics, span)?; return Ok(()); diff --git a/frontend/wasm/src/module/build_ir.rs b/frontend/wasm/src/module/build_ir.rs index 1c055a288..2f545f134 100644 --- a/frontend/wasm/src/module/build_ir.rs +++ b/frontend/wasm/src/module/build_ir.rs @@ -14,7 +14,8 @@ use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic, Severity, use wasmparser::Validator; use super::{ - MemoryIndex, module_translation_state::ModuleTranslationState, types::ModuleTypesBuilder, + MemoryIndex, debug_info::collect_function_debug_info, + module_translation_state::ModuleTranslationState, types::ModuleTypesBuilder, }; use crate::{ WasmTranslationConfig, @@ -116,6 +117,14 @@ pub fn build_ir_module( ..Default::default() }) .into_diagnostic()?; + parsed_module.function_debug = collect_function_debug_info( + parsed_module, + module_types, + &parsed_module.module, + &addr2line, + context.diagnostics(), + ); + let mut func_translator = FuncTranslator::new(context.clone()); // Although this renders this parsed module invalid(without function // bodies), we don't support multiple module instances. Thus, this @@ -188,8 +197,12 @@ pub fn build_ir_module( continue; } - let FunctionBodyData { validator, body } = body_data; + let FunctionBodyData { + validator, body, .. + } = body_data; let mut func_validator = validator.into_validator(Default::default()); + let debug_info = parsed_module.function_debug.get(&func_index).cloned(); + func_translator.translate_body( &body, function_ref, @@ -200,6 +213,7 @@ pub fn build_ir_module( context.session(), &mut func_validator, _config, + debug_info, )?; } Ok(()) diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs new file mode 100644 index 000000000..9f02d5f7a --- /dev/null +++ b/frontend/wasm/src/module/debug_info.rs @@ -0,0 +1,697 @@ +use alloc::{rc::Rc, vec::Vec}; +use core::cell::RefCell; +use std::path::Path; + +use addr2line::Context; +use cranelift_entity::EntityRef; +use gimli::{self, AttributeValue, read::Operation}; +use log::debug; +use midenc_hir::{ + DICompileUnitAttr, DIExpressionAttr, DIExpressionOp, DILocalVariableAttr, DISubprogramAttr, + FxHashMap, SourceSpan, interner::Symbol, +}; +use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic}; + +use super::{ + FuncIndex, Module, + module_env::{DwarfReader, FunctionBodyData, ParsedModule}, + types::{WasmFuncType, convert_valtype, ir_type}, +}; +use crate::module::types::ModuleTypesBuilder; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LocationDescriptor { + /// Inclusive start offset within the function's code, relative to the Wasm code section. + pub start: u64, + /// Exclusive end offset. `None` indicates the location is valid until the end of the function. + pub end: Option, + pub storage: VariableStorage, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum VariableStorage { + Local(u32), + Global(u32), + Stack(u32), + ConstU64(u64), + Unsupported, +} + +impl VariableStorage { + pub fn as_local(&self) -> Option { + match self { + VariableStorage::Local(index) => Some(*index), + _ => None, + } + } + + pub fn to_expression_op(&self) -> DIExpressionOp { + match self { + VariableStorage::Local(idx) => DIExpressionOp::WasmLocal(*idx), + VariableStorage::Global(idx) => DIExpressionOp::WasmGlobal(*idx), + VariableStorage::Stack(idx) => DIExpressionOp::WasmStack(*idx), + VariableStorage::ConstU64(val) => DIExpressionOp::ConstU64(*val), + VariableStorage::Unsupported => { + DIExpressionOp::Unsupported(Symbol::intern("unsupported")) + } + } + } +} + +#[derive(Clone)] +pub struct LocalDebugInfo { + pub attr: DILocalVariableAttr, + pub locations: Vec, + pub expression: Option, +} + +#[derive(Clone)] +pub struct FunctionDebugInfo { + pub compile_unit: DICompileUnitAttr, + pub subprogram: DISubprogramAttr, + pub locals: Vec>, + pub function_span: Option, + pub location_schedule: Vec, + pub next_location_event: usize, +} + +#[derive(Default, Clone)] +struct DwarfLocalData { + name: Option, + locations: Vec, + decl_line: Option, + decl_column: Option, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct LocationScheduleEntry { + pub offset: u64, + pub var_index: usize, + pub storage: VariableStorage, +} + +impl FunctionDebugInfo { + pub fn local_attr(&self, index: usize) -> Option<&DILocalVariableAttr> { + self.locals.get(index).and_then(|info| info.as_ref().map(|data| &data.attr)) + } +} + +pub fn collect_function_debug_info( + parsed_module: &ParsedModule, + module_types: &ModuleTypesBuilder, + module: &Module, + addr2line: &Context>, + diagnostics: &DiagnosticsHandler, +) -> FxHashMap>> { + let mut map = FxHashMap::default(); + + let dwarf_locals = collect_dwarf_local_data(parsed_module, module, diagnostics); + + debug!( + "Collecting function debug info for {} functions", + parsed_module.function_body_inputs.len() + ); + + for (defined_idx, body) in parsed_module.function_body_inputs.iter() { + let func_index = module.func_index(defined_idx); + let func_name = module.func_name(func_index); + if let Some(info) = build_function_debug_info( + parsed_module, + module_types, + module, + func_index, + body, + addr2line, + diagnostics, + dwarf_locals.get(&func_index), + ) { + debug!( + "Collected debug info for function {}: {} locals", + func_name.as_str(), + info.locals.len() + ); + map.insert(func_index, Rc::new(RefCell::new(info))); + } else { + debug!("No debug info collected for function {}", func_name.as_str()); + } + } + + debug!("Collected debug info for {} functions total", map.len()); + map +} + +#[allow(clippy::too_many_arguments)] +fn build_function_debug_info( + parsed_module: &ParsedModule, + module_types: &ModuleTypesBuilder, + module: &Module, + func_index: FuncIndex, + body: &FunctionBodyData, + addr2line: &Context>, + diagnostics: &DiagnosticsHandler, + dwarf_locals: Option<&FxHashMap>, +) -> Option { + let func_name = module.func_name(func_index); + + let (file_symbol, directory_symbol) = determine_file_symbols(parsed_module, addr2line, body); + let (line, column) = determine_location(addr2line, body.body_offset); + + let mut compile_unit = DICompileUnitAttr::new(Symbol::intern("wasm"), file_symbol); + compile_unit.directory = directory_symbol; + compile_unit.producer = Some(Symbol::intern("midenc-frontend-wasm")); + + let mut subprogram = DISubprogramAttr::new(func_name, compile_unit.file, line, column); + subprogram.is_definition = true; + + let wasm_signature = module_types[module.functions[func_index].signature].clone(); + let locals = build_local_debug_info( + module, + func_index, + &wasm_signature, + body, + &subprogram, + diagnostics, + dwarf_locals, + ); + let location_schedule = build_location_schedule(&locals); + + Some(FunctionDebugInfo { + compile_unit, + subprogram, + locals, + function_span: None, + location_schedule, + next_location_event: 0, + }) +} + +fn determine_file_symbols( + parsed_module: &ParsedModule, + addr2line: &Context>, + body: &FunctionBodyData, +) -> (Symbol, Option) { + if let Some(location) = addr2line + .find_location(body.body_offset) + .ok() + .flatten() + .and_then(|loc| loc.file.map(|file| file.to_owned())) + { + let path = Path::new(location.as_str()); + let directory_symbol = path.parent().and_then(|parent| parent.to_str()).map(Symbol::intern); + let file_symbol = Symbol::intern(location.as_str()); + (file_symbol, directory_symbol) + } else if let Some(path) = parsed_module.wasm_file.path.as_ref() { + let file_symbol = Symbol::intern(path.to_string_lossy().as_ref()); + let directory_symbol = path.parent().and_then(|parent| parent.to_str()).map(Symbol::intern); + (file_symbol, directory_symbol) + } else { + (Symbol::intern("unknown"), None) + } +} + +fn determine_location(addr2line: &Context>, offset: u64) -> (u32, Option) { + match addr2line.find_location(offset).ok().flatten() { + Some(location) => { + let line = location.line.unwrap_or_default(); + let column = location.column; + (line, column) + } + None => (0, None), + } +} + +fn build_local_debug_info( + module: &Module, + func_index: FuncIndex, + wasm_signature: &WasmFuncType, + body: &FunctionBodyData, + subprogram: &DISubprogramAttr, + diagnostics: &DiagnosticsHandler, + dwarf_locals: Option<&FxHashMap>, +) -> Vec> { + let param_count = wasm_signature.params().len(); + let mut local_entries = Vec::new(); + if let Ok(mut locals_reader) = body.body.get_locals_reader().into_diagnostic() { + let decl_count = locals_reader.get_count(); + for _ in 0..decl_count { + if let Ok((count, ty)) = locals_reader.read().into_diagnostic() { + local_entries.push((count, ty)); + } + } + } + let local_count: usize = local_entries.iter().map(|(count, _)| *count as usize).sum(); + + let total = param_count + local_count; + let mut locals = vec![None; total]; + + for (param_idx, wasm_ty) in wasm_signature.params().iter().enumerate() { + let index_u32 = param_idx as u32; + let dwarf_entry = dwarf_locals.and_then(|map| map.get(&index_u32)); + let mut name_symbol = module + .local_name(func_index, index_u32) + .unwrap_or_else(|| Symbol::intern(format!("arg{param_idx}"))); + if let Some(info) = dwarf_entry + && let Some(symbol) = info.name + { + name_symbol = symbol; + } + let mut attr = DILocalVariableAttr::new( + name_symbol, + subprogram.file, + subprogram.line, + subprogram.column, + ); + attr.arg_index = Some((param_idx + 1) as u32); + if let Ok(ty) = ir_type(*wasm_ty, diagnostics) { + attr.ty = Some(ty); + } + let dwarf_info = dwarf_entry.cloned(); + if let Some(info) = dwarf_info.as_ref() { + if let Some(line) = info.decl_line + && line != 0 + { + attr.line = line; + } + if info.decl_column.is_some() { + attr.column = info.decl_column; + } + } + let locations = dwarf_info.as_ref().map(|info| info.locations.clone()).unwrap_or_default(); + + // Create expression from the first location if available + let expression = if !locations.is_empty() { + let ops = vec![locations[0].storage.to_expression_op()]; + Some(DIExpressionAttr::with_ops(ops)) + } else { + None + }; + + locals[param_idx] = Some(LocalDebugInfo { + attr, + locations, + expression, + }); + } + + let mut next_local_index = param_count; + for (count, ty) in local_entries { + for _ in 0..count { + let index_u32 = next_local_index as u32; + let dwarf_entry = dwarf_locals.and_then(|map| map.get(&index_u32)); + let mut name_symbol = module + .local_name(func_index, index_u32) + .unwrap_or_else(|| Symbol::intern(format!("local{next_local_index}"))); + if let Some(info) = dwarf_entry + && let Some(symbol) = info.name + { + name_symbol = symbol; + } + let mut attr = DILocalVariableAttr::new( + name_symbol, + subprogram.file, + subprogram.line, + subprogram.column, + ); + let wasm_ty = convert_valtype(ty); + if let Ok(ir_ty) = ir_type(wasm_ty, diagnostics) { + attr.ty = Some(ir_ty); + } + let dwarf_info = dwarf_entry.cloned(); + if let Some(info) = dwarf_info.as_ref() { + if let Some(line) = info.decl_line + && line != 0 + { + attr.line = line; + } + if info.decl_column.is_some() { + attr.column = info.decl_column; + } + } + let locations = + dwarf_info.as_ref().map(|info| info.locations.clone()).unwrap_or_default(); + + // Create expression from the first location if available + let expression = if !locations.is_empty() { + let ops = vec![locations[0].storage.to_expression_op()]; + Some(DIExpressionAttr::with_ops(ops)) + } else { + None + }; + + locals[next_local_index] = Some(LocalDebugInfo { + attr, + locations, + expression, + }); + next_local_index += 1; + } + } + + locals +} + +fn build_location_schedule(locals: &[Option]) -> Vec { + let mut schedule = Vec::new(); + for (var_index, info_opt) in locals.iter().enumerate() { + let Some(info) = info_opt else { + continue; + }; + for descriptor in &info.locations { + if descriptor.storage.as_local().is_none() { + continue; + } + schedule.push(LocationScheduleEntry { + offset: descriptor.start, + var_index, + storage: descriptor.storage.clone(), + }); + } + } + schedule.sort_by(|a, b| a.offset.cmp(&b.offset)); + schedule +} + +fn collect_dwarf_local_data( + parsed_module: &ParsedModule, + module: &Module, + diagnostics: &DiagnosticsHandler, +) -> FxHashMap> { + let _ = diagnostics; + let dwarf = &parsed_module.debuginfo.dwarf; + + let mut func_by_name = FxHashMap::default(); + for (func_index, _) in module.functions.iter() { + let name = module.func_name(func_index).as_str().to_owned(); + func_by_name.insert(name, func_index); + } + + let mut low_pc_map = FxHashMap::default(); + let code_section_offset = parsed_module.wasm_file.code_section_offset; + for (defined_idx, body) in parsed_module.function_body_inputs.iter() { + let func_index = module.func_index(defined_idx); + let adjusted = body.body_offset.saturating_sub(code_section_offset); + low_pc_map.insert(adjusted, func_index); + } + + let mut results: FxHashMap> = FxHashMap::default(); + let mut units = dwarf.units(); + loop { + let header = match units.next() { + Ok(Some(header)) => header, + Ok(None) => break, + Err(err) => { + debug!("failed to iterate DWARF units: {err:?}"); + break; + } + }; + let unit = match dwarf.unit(header) { + Ok(unit) => unit, + Err(err) => { + debug!("failed to load DWARF unit: {err:?}"); + continue; + } + }; + + let mut entries = unit.entries(); + loop { + let next = match entries.next_dfs() { + Ok(Some(data)) => data, + Ok(None) => break, + Err(err) => { + debug!("error while traversing DWARF entries: {err:?}"); + break; + } + }; + let (delta, entry) = next; + let _ = delta; // we don't need depth deltas explicitly. + + if entry.tag() == gimli::DW_TAG_subprogram { + let resolved = + resolve_subprogram_target(dwarf, &unit, &func_by_name, &low_pc_map, entry); + let Some((func_index, low_pc, high_pc)) = resolved else { + continue; + }; + + if let Err(err) = collect_subprogram_variables( + dwarf, + &unit, + entry.offset(), + func_index, + low_pc, + high_pc, + &mut results, + ) { + debug!("failed to gather variables for function {:?}: {err:?}", func_index); + } + } + } + } + + results +} + +fn resolve_subprogram_target>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + func_by_name: &FxHashMap, + low_pc_map: &FxHashMap, + entry: &gimli::DebuggingInformationEntry, +) -> Option<(FuncIndex, u64, Option)> { + let mut maybe_name: Option = None; + let mut low_pc = None; + let mut high_pc = None; + + let mut attrs = entry.attrs(); + while let Ok(Some(attr)) = attrs.next() { + match attr.name() { + gimli::DW_AT_name => { + if let Ok(raw) = dwarf.attr_string(unit, attr.value()) + && let Ok(name) = raw.to_string_lossy() + { + maybe_name = Some(name.into_owned()); + } + } + gimli::DW_AT_linkage_name => { + if maybe_name.is_none() + && let Ok(raw) = dwarf.attr_string(unit, attr.value()) + && let Ok(name) = raw.to_string_lossy() + { + maybe_name = Some(name.into_owned()); + } + } + gimli::DW_AT_low_pc => match attr.value() { + AttributeValue::Addr(addr) => low_pc = Some(addr), + AttributeValue::Udata(val) => low_pc = Some(val), + _ => {} + }, + gimli::DW_AT_high_pc => match attr.value() { + AttributeValue::Addr(addr) => high_pc = Some(addr), + AttributeValue::Udata(size) => { + if let Some(base) = low_pc { + high_pc = Some(base.saturating_add(size)); + } + } + _ => {} + }, + _ => {} + } + } + + if let Some(name) = maybe_name + && let Some(&func_index) = func_by_name.get(&name) + { + return Some((func_index, low_pc.unwrap_or_default(), high_pc)); + } + + if let Some(base) = low_pc + && let Some(&func_index) = low_pc_map.get(&base) + { + return Some((func_index, base, high_pc)); + } + + None +} + +fn collect_subprogram_variables>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + offset: gimli::UnitOffset, + func_index: FuncIndex, + low_pc: u64, + high_pc: Option, + results: &mut FxHashMap>, +) -> gimli::Result<()> { + let mut tree = unit.entries_tree(Some(offset))?; + let root = tree.root()?; + let mut children = root.children(); + while let Some(child) = children.next()? { + walk_variable_nodes(dwarf, unit, child, func_index, low_pc, high_pc, results)?; + } + Ok(()) +} + +fn walk_variable_nodes>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + node: gimli::EntriesTreeNode, + func_index: FuncIndex, + low_pc: u64, + high_pc: Option, + results: &mut FxHashMap>, +) -> gimli::Result<()> { + let entry = node.entry(); + match entry.tag() { + gimli::DW_TAG_formal_parameter | gimli::DW_TAG_variable => { + if let Some((local_index, mut data)) = + decode_variable_entry(dwarf, unit, entry, low_pc, high_pc)? + { + let local_map = results.entry(func_index).or_default(); + let entry = local_map.entry(local_index).or_insert_with(DwarfLocalData::default); + entry.name = entry.name.or(data.name); + entry.decl_line = entry.decl_line.or(data.decl_line); + entry.decl_column = entry.decl_column.or(data.decl_column); + if !data.locations.is_empty() { + entry.locations.append(&mut data.locations); + } + } + } + _ => {} + } + + let mut children = node.children(); + while let Some(child) = children.next()? { + walk_variable_nodes(dwarf, unit, child, func_index, low_pc, high_pc, results)?; + } + Ok(()) +} + +fn decode_variable_entry>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + entry: &gimli::DebuggingInformationEntry<'_, '_, R>, + low_pc: u64, + high_pc: Option, +) -> gimli::Result> { + let mut name_symbol = None; + let mut location_attr = None; + let mut decl_line = None; + let mut decl_column = None; + + let mut attrs = entry.attrs(); + while let Some(attr) = attrs.next()? { + match attr.name() { + gimli::DW_AT_name => { + if let Ok(raw) = dwarf.attr_string(unit, attr.value()) + && let Ok(text) = raw.to_string_lossy() + { + name_symbol = Some(Symbol::intern(text.as_ref())); + } + } + gimli::DW_AT_location => location_attr = Some(attr.value()), + gimli::DW_AT_decl_line => { + if let Some(line) = attr.udata_value() { + decl_line = Some(line as u32); + } + } + gimli::DW_AT_decl_column => { + if let Some(column) = attr.udata_value() { + decl_column = Some(column as u32); + } + } + _ => {} + } + } + + let Some(location_value) = location_attr else { + return Ok(None); + }; + + let mut locations = Vec::new(); + + match location_value { + AttributeValue::Exprloc(expr) => { + if let Some(storage) = decode_storage_from_expression(&expr, unit)? + && let Some(local_index) = storage.as_local() + { + locations.push(LocationDescriptor { + start: low_pc, + end: high_pc, + storage, + }); + let data = DwarfLocalData { + name: name_symbol, + locations, + decl_line, + decl_column, + }; + return Ok(Some((local_index, data))); + } + return Ok(None); + } + AttributeValue::LocationListsRef(offset) => { + let mut iter = dwarf.locations.locations( + offset, + unit.encoding(), + low_pc, + &dwarf.debug_addr, + unit.addr_base, + )?; + while let Some(entry) = iter.next()? { + let storage_expr = entry.data; + if let Some(storage) = decode_storage_from_expression(&storage_expr, unit)? + && storage.as_local().is_some() + { + locations.push(LocationDescriptor { + start: entry.range.begin, + end: Some(entry.range.end), + storage, + }); + continue; + } + } + if locations.is_empty() { + return Ok(None); + } + let Some(local_index) = locations.iter().find_map(|desc| desc.storage.as_local()) + else { + return Ok(None); + }; + let data = DwarfLocalData { + name: name_symbol, + locations, + decl_line, + decl_column, + }; + return Ok(Some((local_index, data))); + } + _ => {} + } + + Ok(None) +} + +fn decode_storage_from_expression>( + expr: &gimli::Expression, + unit: &gimli::Unit, +) -> gimli::Result> { + let mut operations = expr.clone().operations(unit.encoding()); + let mut storage = None; + while let Some(op) = operations.next()? { + match op { + Operation::WasmLocal { index } => storage = Some(VariableStorage::Local(index)), + Operation::WasmGlobal { index } => storage = Some(VariableStorage::Global(index)), + Operation::WasmStack { index } => storage = Some(VariableStorage::Stack(index)), + Operation::UnsignedConstant { value } => { + storage = Some(VariableStorage::ConstU64(value)) + } + Operation::StackValue => {} + _ => {} + } + } + + Ok(storage) +} + +fn func_local_index(func_index: FuncIndex, module: &Module) -> Option { + module.defined_func_index(func_index).map(|idx| idx.index()) +} diff --git a/frontend/wasm/src/module/func_translation_state.rs b/frontend/wasm/src/module/func_translation_state.rs index bb0f96113..b0826a2e1 100644 --- a/frontend/wasm/src/module/func_translation_state.rs +++ b/frontend/wasm/src/module/func_translation_state.rs @@ -5,13 +5,16 @@ //! //! Based on Cranelift's Wasm -> CLIF translator v11.0.0 +use alloc::rc::Rc; +use core::cell::RefCell; + use midenc_dialect_hir::HirOpBuilder; use midenc_hir::{ BlockRef, Builder, OperationRef, SourceSpan, Type, ValueRef, dialects::builtin::attributes::Signature, }; -use super::function_builder_ext::FunctionBuilderExt; +use super::{debug_info::FunctionDebugInfo, function_builder_ext::FunctionBuilderExt}; use crate::{error::WasmResult, module::types::BlockType}; /// Information about the presence of an associated `else` for an `if`, or the @@ -232,6 +235,8 @@ pub struct FuncTranslationState { /// Is the current translation state still reachable? This is false when translating operators /// like End, Return, or Unreachable. pub(crate) reachable: bool, + /// Optional debug metadata for the current function. + pub(crate) debug_info: Option>>, } impl FuncTranslationState { @@ -241,6 +246,7 @@ impl FuncTranslationState { stack: Vec::new(), control_stack: Vec::new(), reachable: true, + debug_info: None, } } @@ -248,6 +254,7 @@ impl FuncTranslationState { debug_assert!(self.stack.is_empty()); debug_assert!(self.control_stack.is_empty()); self.reachable = true; + self.debug_info = None; } /// Initialize the state for compiling a function with the given signature. @@ -259,6 +266,10 @@ impl FuncTranslationState { self.push_block(exit_block, 0, sig.results().len()); } + pub(crate) fn set_debug_info(&mut self, info: Option>>) { + self.debug_info = info; + } + /// Push a value. pub(crate) fn push1(&mut self, val: ValueRef) { self.stack.push(val); diff --git a/frontend/wasm/src/module/func_translator.rs b/frontend/wasm/src/module/func_translator.rs index 7693431b9..aa54f3bfb 100644 --- a/frontend/wasm/src/module/func_translator.rs +++ b/frontend/wasm/src/module/func_translator.rs @@ -21,8 +21,9 @@ use midenc_session::{ use wasmparser::{FuncValidator, FunctionBody, WasmModuleResources}; use super::{ - function_builder_ext::SSABuilderListener, module_env::ParsedModule, - module_translation_state::ModuleTranslationState, types::ModuleTypesBuilder, + debug_info::FunctionDebugInfo, function_builder_ext::SSABuilderListener, + module_env::ParsedModule, module_translation_state::ModuleTranslationState, + types::ModuleTypesBuilder, }; use crate::{ code_translator::translate_operator, @@ -69,12 +70,19 @@ impl FuncTranslator { session: &Session, func_validator: &mut FuncValidator, config: &crate::WasmTranslationConfig, + debug_info: Option>>, ) -> WasmResult<()> { let context = func.borrow().as_operation().context_rc(); let mut op_builder = midenc_hir::OpBuilder::new(context) .with_listener(SSABuilderListener::new(self.func_ctx.clone())); let mut builder = FunctionBuilderExt::new(func, &mut op_builder); + if let Some(info) = debug_info.clone() { + builder.set_debug_metadata(info); + } + + self.state.set_debug_info(debug_info); + let entry_block = builder.current_block(); builder.seal_block(entry_block); // Declare all predecessors known. @@ -313,6 +321,8 @@ fn parse_function_body( &session.diagnostics, effective_span, )?; + + builder.apply_location_schedule(offset, span); } // The final `End` operator left us in the exit block where we need to manually add a return diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index eee1e55ce..3e2d081c9 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -1,7 +1,9 @@ -use alloc::rc::Rc; +use alloc::{rc::Rc, vec::Vec}; use core::cell::RefCell; +use std::path::Path; -use cranelift_entity::SecondaryMap; +use cranelift_entity::{EntityRef as _, SecondaryMap}; +use log::warn; use midenc_dialect_arith::ArithOpBuilder; use midenc_dialect_cf::ControlFlowOpBuilder; use midenc_dialect_hir::HirOpBuilder; @@ -17,7 +19,10 @@ use midenc_hir::{ traits::{BranchOpInterface, Terminator}, }; -use crate::ssa::{SSABuilder, SideEffects, Variable}; +use crate::{ + module::debug_info::{FunctionDebugInfo, LocationScheduleEntry}, + ssa::{SSABuilder, SideEffects, Variable}, +}; /// Tracking variables and blocks for SSA construction. pub struct FunctionBuilderContext { @@ -127,6 +132,9 @@ impl Listener for SSABuilderListener { pub struct FunctionBuilderExt<'c, B: ?Sized + Builder> { inner: FunctionBuilder<'c, B>, func_ctx: Rc>, + debug_info: Option>>, + param_values: Vec<(Variable, ValueRef)>, + param_dbg_emitted: bool, } impl<'c> FunctionBuilderExt<'c, OpBuilder> { @@ -136,11 +144,153 @@ impl<'c> FunctionBuilderExt<'c, OpBuilder> { let inner = FunctionBuilder::new(func, builder); - Self { inner, func_ctx } + Self { + inner, + func_ctx, + debug_info: None, + param_values: Vec::new(), + param_dbg_emitted: false, + } } } impl FunctionBuilderExt<'_, B> { + const DI_COMPILE_UNIT_ATTR: &'static str = "di.compile_unit"; + const DI_SUBPROGRAM_ATTR: &'static str = "di.subprogram"; + + pub fn set_debug_metadata(&mut self, info: Rc>) { + self.debug_info = Some(info); + self.param_dbg_emitted = false; + self.refresh_function_debug_attrs(); + } + + fn emit_dbg_value_for_var(&mut self, var: Variable, value: ValueRef, span: SourceSpan) { + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let idx = var.index(); + let (attr_opt, expr_opt) = { + let info = info.borrow(); + let local_info = info.locals.get(idx).and_then(|l| l.as_ref()); + match local_info { + Some(l) => (Some(l.attr.clone()), l.expression.clone()), + None => (None, None), + } + }; + let Some(mut attr) = attr_opt else { + return; + }; + + if let Some((file_symbol, _directory, line, column)) = self.span_to_location(span) { + attr.file = file_symbol; + if line != 0 { + attr.line = line; + } + attr.column = column; + } + + if let Err(err) = + BuiltinOpBuilder::builder_mut(self).dbg_value_with_expr(value, attr, expr_opt, span) + { + warn!("failed to emit dbg.value for local {idx}: {err:?}"); + } + } + + pub fn def_var_with_dbg(&mut self, var: Variable, val: ValueRef, span: SourceSpan) { + self.def_var(var, val); + self.emit_dbg_value_for_var(var, val, span); + } + + pub fn register_parameter(&mut self, var: Variable, value: ValueRef) { + self.param_values.push((var, value)); + } + + pub fn record_debug_span(&mut self, span: SourceSpan) { + if span == SourceSpan::UNKNOWN { + return; + } + let Some(info_rc) = self.debug_info.as_ref() else { + return; + }; + + if let Some((file_symbol, directory_symbol, line, column)) = self.span_to_location(span) { + { + let mut info = info_rc.borrow_mut(); + info.compile_unit.file = file_symbol; + info.compile_unit.directory = directory_symbol; + info.subprogram.file = file_symbol; + info.subprogram.line = line; + info.subprogram.column = column; + info.function_span.get_or_insert(span); + } + self.refresh_function_debug_attrs(); + self.emit_parameter_dbg_if_needed(span); + } + } + + pub fn apply_location_schedule(&mut self, offset: u64, span: SourceSpan) { + let Some(info_rc) = self.debug_info.as_ref() else { + return; + }; + + let updates = { + let mut info = info_rc.borrow_mut(); + let mut pending = Vec::new(); + while info.next_location_event < info.location_schedule.len() { + let entry = &info.location_schedule[info.next_location_event]; + if entry.offset > offset { + break; + } + pending.push(entry.clone()); + info.next_location_event += 1; + } + pending + }; + + for entry in updates { + self.emit_scheduled_dbg_value(entry, span); + } + } + + fn emit_scheduled_dbg_value(&mut self, entry: LocationScheduleEntry, span: SourceSpan) { + let var = Variable::new(entry.var_index); + let Ok(value) = self.try_use_var(var) else { + return; + }; + + // Create expression from the scheduled location + let expression = { + let ops = vec![entry.storage.to_expression_op()]; + Some(midenc_hir::DIExpressionAttr::with_ops(ops)) + }; + + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let idx = entry.var_index; + let attr_opt = { + let info = info.borrow(); + info.local_attr(idx).cloned() + }; + let Some(mut attr) = attr_opt else { + return; + }; + + if let Some((file_symbol, _directory, line, column)) = self.span_to_location(span) { + attr.file = file_symbol; + if line != 0 { + attr.line = line; + } + attr.column = column; + } + + if let Err(err) = + BuiltinOpBuilder::builder_mut(self).dbg_value_with_expr(value, attr, expression, span) + { + warn!("failed to emit scheduled dbg.value for local {idx}: {err:?}"); + } + } + pub fn name(&self) -> Ident { *self.inner.func.borrow().get_name() } @@ -437,6 +587,61 @@ impl FunctionBuilderExt<'_, B> { inst_branch.change_branch_destination(old_block, new_block); self.func_ctx.borrow_mut().ssa.declare_block_predecessor(new_block, branch_inst); } + + fn refresh_function_debug_attrs(&mut self) { + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let info = info.borrow(); + let mut func = self.inner.func.borrow_mut(); + let op = func.as_operation_mut(); + op.set_intrinsic_attribute(Self::DI_COMPILE_UNIT_ATTR, Some(info.compile_unit.clone())); + op.set_intrinsic_attribute(Self::DI_SUBPROGRAM_ATTR, Some(info.subprogram.clone())); + } + + fn emit_parameter_dbg_if_needed(&mut self, span: SourceSpan) { + if self.param_dbg_emitted { + return; + } + self.param_dbg_emitted = true; + let params: Vec<_> = self.param_values.to_vec(); + for (var, value) in params { + let skip_due_to_schedule = if let Some(info_rc) = self.debug_info.as_ref() { + let info = info_rc.borrow(); + info.locals + .get(var.index()) + .and_then(|entry| entry.as_ref()) + .is_some_and(|entry| !entry.locations.is_empty()) + } else { + false + }; + if skip_due_to_schedule { + continue; + } + self.emit_dbg_value_for_var(var, value, span); + } + } + + fn span_to_location( + &self, + span: SourceSpan, + ) -> Option<(Symbol, Option, u32, Option)> { + if span == SourceSpan::UNKNOWN { + return None; + } + + let context = self.inner.builder().context(); + let session = context.session(); + let source_file = session.source_manager.get(span.source_id()).ok()?; + let uri = source_file.uri().as_str(); + let path = Path::new(uri); + let file_symbol = Symbol::intern(uri); + let directory_symbol = path.parent().and_then(|parent| parent.to_str()).map(Symbol::intern); + let location = source_file.location(span); + let line = location.line.to_u32(); + let column = location.column.to_u32(); + Some((file_symbol, directory_symbol, line, Some(column))) + } } impl<'f, B: ?Sized + Builder> ArithOpBuilder<'f, B> for FunctionBuilderExt<'f, B> { diff --git a/frontend/wasm/src/module/mod.rs b/frontend/wasm/src/module/mod.rs index 69b6ca55d..7b97e6723 100644 --- a/frontend/wasm/src/module/mod.rs +++ b/frontend/wasm/src/module/mod.rs @@ -12,6 +12,7 @@ use self::types::*; use crate::{component::SignatureIndex, error::WasmResult, unsupported_diag}; pub mod build_ir; +pub mod debug_info; pub mod func_translation_state; pub mod func_translator; pub mod function_builder_ext; @@ -336,6 +337,14 @@ impl Module { .unwrap_or_else(|| Symbol::intern(format!("data{}", index.as_u32()))) } + /// Returns the name of the given local (including parameters) if available in the name section. + pub fn local_name(&self, func: FuncIndex, index: u32) -> Option { + self.name_section + .locals_names + .get(&func) + .and_then(|locals| locals.get(&index).copied()) + } + /// Sets the fallback name of this module, used if there is no module name in the name section pub fn set_name_fallback(&mut self, name_fallback: Cow<'static, str>) { self.name_fallback = Some(Ident::from(name_fallback.as_ref())); diff --git a/frontend/wasm/src/module/module_env.rs b/frontend/wasm/src/module/module_env.rs index 0399ae6ca..a897e0a86 100644 --- a/frontend/wasm/src/module/module_env.rs +++ b/frontend/wasm/src/module/module_env.rs @@ -1,10 +1,10 @@ -use alloc::sync::Arc; +use alloc::{rc::Rc, sync::Arc}; use core::ops::Range; use std::path::PathBuf; use cranelift_entity::{PrimaryMap, packed_option::ReservedValue}; use midenc_frontend_wasm_metadata::{FrontendMetadata, WASM_FRONTEND_METADATA_CUSTOM_SECTION_NAME}; -use midenc_hir::{FxHashSet, Ident, interner::Symbol}; +use midenc_hir::{FxHashMap, FxHashSet, Ident, interner::Symbol}; use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic, Report, Severity}; use wasmparser::{ CustomSectionReader, DataKind, ElementItems, ElementKind, Encoding, ExternalKind, @@ -67,6 +67,10 @@ pub struct ParsedModule<'data> { /// DWARF debug information, if enabled, parsed from the module. pub debuginfo: DebugInfoData<'data>, + /// Precomputed debug metadata for functions + pub function_debug: + FxHashMap>>, + /// Set if debuginfo was found but it was not parsed due to `Tunables` /// configuration. pub has_unparsed_debuginfo: bool, @@ -186,6 +190,8 @@ pub struct FunctionBodyData<'a> { pub body: FunctionBody<'a>, /// Validator for the function body pub validator: FuncToValidate, + /// Offset in the original wasm binary where this function body starts + pub body_offset: u64, } #[cfg(test)] @@ -704,7 +710,12 @@ impl<'a, 'data> ModuleEnvironment<'a, 'data> { params: sig.params().into(), }); } - self.result.function_body_inputs.push(FunctionBodyData { validator, body }); + let body_offset = body.range().start as u64; + self.result.function_body_inputs.push(FunctionBodyData { + validator, + body, + body_offset, + }); self.result.code_index += 1; Ok(()) } diff --git a/hir-transform/src/dead_debug_ops.rs b/hir-transform/src/dead_debug_ops.rs new file mode 100644 index 000000000..a695dd682 --- /dev/null +++ b/hir-transform/src/dead_debug_ops.rs @@ -0,0 +1,126 @@ +//! This pass removes debug operations (DbgValue) whose operands are no longer +//! live. This prevents issues during codegen where the operand stack state +//! becomes inconsistent due to debug ops referencing dropped values. + +use alloc::vec::Vec; + +use midenc_hir::{ + EntityMut, Operation, OperationName, OperationRef, Report, + dialects::builtin, + pass::{Pass, PassExecutionState, PostPassStatus}, +}; +use midenc_hir_analysis::analyses::LivenessAnalysis; + +/// Removes debug operations whose operands are dead. +/// +/// Debug operations like `DbgValue` reference SSA values to provide debug +/// information. However, these operations don't actually consume their operands; +/// they just observe them. This can cause issues during codegen when the +/// referenced value has been dropped from the operand stack. +/// +/// This pass removes debug ops whose operands are not live after the debug op. +/// If a value is live after the debug op, it will still be available on the +/// operand stack during codegen and can be safely observed. +pub struct RemoveDeadDebugOps; + +impl Pass for RemoveDeadDebugOps { + type Target = Operation; + + fn name(&self) -> &'static str { + "remove-dead-debug-ops" + } + + fn argument(&self) -> &'static str { + "remove-dead-debug-ops" + } + + fn description(&self) -> &'static str { + "Removes debug operations whose operands are dead" + } + + fn can_schedule_on(&self, _name: &OperationName) -> bool { + true + } + + fn run_on_operation( + &mut self, + op: EntityMut<'_, Self::Target>, + state: &mut PassExecutionState, + ) -> Result<(), Report> { + let op_ref = op.as_operation_ref(); + drop(op); + + // Collect all debug ops to potentially remove + let mut debug_ops_to_check: Vec = Vec::new(); + + collect_debug_ops(&op_ref, &mut debug_ops_to_check); + + if debug_ops_to_check.is_empty() { + state.set_post_pass_status(PostPassStatus::Unchanged); + return Ok(()); + } + + // Get liveness analysis + let analysis_manager = state.analysis_manager(); + let liveness = analysis_manager.get_analysis::()?; + + let mut removed_any = false; + + // Check each debug op and remove if its operand will be dead by codegen time + for mut debug_op in debug_ops_to_check { + let should_remove = { + let debug_op_borrowed = debug_op.borrow(); + + // Get the operand (first operand for DbgValue) + let operands = debug_op_borrowed.operands(); + if operands.is_empty() { + continue; + } + + let operand = operands.iter().next().unwrap(); + let operand_value = operand.borrow().as_value_ref(); + + // Only remove debug ops if their operand is not live after the debug op. + // If the value is live after, it will still be on the operand stack + // during codegen and can be safely observed by the debug op. + // + // Note: We previously also removed debug ops if the value had other uses, + // but this was too aggressive - if the value is live after the debug op, + // it doesn't matter how many uses it has; it's still available. + !liveness.is_live_after(operand_value, &debug_op_borrowed) + }; + + if should_remove { + debug_op.borrow_mut().erase(); + removed_any = true; + } + } + + state.set_post_pass_status(if removed_any { + PostPassStatus::Changed + } else { + PostPassStatus::Unchanged + }); + + Ok(()) + } +} + +/// Recursively collect all debug operations in the given operation +fn collect_debug_ops(op: &OperationRef, debug_ops: &mut Vec) { + let op = op.borrow(); + + // Check if this is a debug op + if op.is::() { + debug_ops.push(op.as_operation_ref()); + } + + // Recurse into regions + for region in op.regions() { + for block in region.body() { + for inner_op in block.body() { + collect_debug_ops(&inner_op.as_operation_ref(), debug_ops); + } + } + } +} diff --git a/hir-transform/src/lib.rs b/hir-transform/src/lib.rs index 1394fd948..139c44c41 100644 --- a/hir-transform/src/lib.rs +++ b/hir-transform/src/lib.rs @@ -9,6 +9,7 @@ mod canonicalization; mod cfg_to_scf; mod cse; mod dce; +mod dead_debug_ops; //mod inliner; mod sccp; mod sink; @@ -20,6 +21,7 @@ pub use self::{ canonicalization::Canonicalizer, cfg_to_scf::{CFGToSCFInterface, transform_cfg_to_scf}, cse::CommonSubexpressionElimination, + dead_debug_ops::RemoveDeadDebugOps, sccp::SparseConditionalConstantPropagation, sink::{ControlFlowSink, SinkOperandDefs}, spill::{ReloadLike, SpillLike, TransformSpillsInterface, transform_spills}, diff --git a/hir/src/attributes/debug.rs b/hir/src/attributes/debug.rs new file mode 100644 index 000000000..0dee8171d --- /dev/null +++ b/hir/src/attributes/debug.rs @@ -0,0 +1,254 @@ +use alloc::{format, vec::Vec}; + +use crate::{ + Type, define_attr_type, + formatter::{Document, PrettyPrint, const_text, text}, + interner::Symbol, +}; + +/// Represents the compilation unit associated with debug information. +/// +/// The fields in this struct are intentionally aligned with the subset of +/// DWARF metadata we currently care about when tracking variable locations. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct DICompileUnitAttr { + pub language: Symbol, + pub file: Symbol, + pub directory: Option, + pub producer: Option, + pub optimized: bool, +} + +define_attr_type!(DICompileUnitAttr); + +impl DICompileUnitAttr { + pub fn new(language: Symbol, file: Symbol) -> Self { + Self { + language, + file, + directory: None, + producer: None, + optimized: false, + } + } +} + +impl PrettyPrint for DICompileUnitAttr { + fn render(&self) -> Document { + let mut doc = const_text("di.compile_unit(") + + text(format!("language = {}", self.language.as_str())) + + const_text(", file = ") + + text(self.file.as_str()); + + if let Some(directory) = self.directory { + doc = doc + const_text(", directory = ") + text(directory.as_str()); + } + if let Some(producer) = self.producer { + doc = doc + const_text(", producer = ") + text(producer.as_str()); + } + if self.optimized { + doc += const_text(", optimized"); + } + + doc + const_text(")") + } +} + +/// Represents a subprogram (function) scope for debug information. +/// The compile unit is not embedded but typically stored separately on the module. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct DISubprogramAttr { + pub name: Symbol, + pub linkage_name: Option, + pub file: Symbol, + pub line: u32, + pub column: Option, + pub is_definition: bool, + pub is_local: bool, +} + +define_attr_type!(DISubprogramAttr); + +impl DISubprogramAttr { + pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { + Self { + name, + linkage_name: None, + file, + line, + column, + is_definition: true, + is_local: false, + } + } +} + +impl PrettyPrint for DISubprogramAttr { + fn render(&self) -> Document { + let mut doc = const_text("di.subprogram(") + + text(format!("name = {}", self.name.as_str())) + + const_text(", file = ") + + text(self.file.as_str()) + + const_text(", line = ") + + text(format!("{}", self.line)); + + if let Some(column) = self.column { + doc = doc + const_text(", column = ") + text(format!("{}", column)); + } + if let Some(linkage) = self.linkage_name { + doc = doc + const_text(", linkage = ") + text(linkage.as_str()); + } + if self.is_definition { + doc += const_text(", definition"); + } + if self.is_local { + doc += const_text(", local"); + } + + doc + const_text(")") + } +} + +/// Represents a local variable debug record. +/// The scope (DISubprogramAttr) is not embedded but instead stored on the containing function. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct DILocalVariableAttr { + pub name: Symbol, + pub arg_index: Option, + pub file: Symbol, + pub line: u32, + pub column: Option, + pub ty: Option, +} + +define_attr_type!(DILocalVariableAttr); + +impl DILocalVariableAttr { + pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { + Self { + name, + arg_index: None, + file, + line, + column, + ty: None, + } + } +} + +impl PrettyPrint for DILocalVariableAttr { + fn render(&self) -> Document { + let mut doc = const_text("di.local_variable(") + + text(format!("name = {}", self.name.as_str())) + + const_text(", file = ") + + text(self.file.as_str()) + + const_text(", line = ") + + text(format!("{}", self.line)); + + if let Some(column) = self.column { + doc = doc + const_text(", column = ") + text(format!("{}", column)); + } + if let Some(arg_index) = self.arg_index { + doc = doc + const_text(", arg = ") + text(format!("{}", arg_index)); + } + if let Some(ty) = &self.ty { + doc = doc + const_text(", ty = ") + ty.render(); + } + + doc + const_text(")") + } +} + +/// Represents DWARF expression operations for describing variable locations +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum DIExpressionOp { + /// DW_OP_WASM_location 0x00 - Variable is in a WebAssembly local + WasmLocal(u32), + /// DW_OP_WASM_location 0x01 - Variable is in a WebAssembly global + WasmGlobal(u32), + /// DW_OP_WASM_location 0x02 - Variable is on the WebAssembly operand stack + WasmStack(u32), + /// DW_OP_constu - Unsigned constant value + ConstU64(u64), + /// DW_OP_consts - Signed constant value + ConstS64(i64), + /// DW_OP_plus_uconst - Add unsigned constant to top of stack + PlusUConst(u64), + /// DW_OP_minus - Subtract top two stack values + Minus, + /// DW_OP_plus - Add top two stack values + Plus, + /// DW_OP_deref - Dereference the address at top of stack + Deref, + /// DW_OP_stack_value - The value on the stack is the value of the variable + StackValue, + /// DW_OP_piece - Describes a piece of a variable + Piece(u64), + /// DW_OP_bit_piece - Describes a piece of a variable in bits + BitPiece { size: u64, offset: u64 }, + /// Placeholder for unsupported operations + Unsupported(Symbol), +} + +/// Represents a DWARF expression that describes how to compute or locate a variable's value +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct DIExpressionAttr { + pub operations: Vec, +} + +define_attr_type!(DIExpressionAttr); + +impl DIExpressionAttr { + pub fn new() -> Self { + Self { + operations: Vec::new(), + } + } + + pub fn with_ops(operations: Vec) -> Self { + Self { operations } + } + + pub fn is_empty(&self) -> bool { + self.operations.is_empty() + } +} + +impl Default for DIExpressionAttr { + fn default() -> Self { + Self::new() + } +} + +impl PrettyPrint for DIExpressionAttr { + fn render(&self) -> Document { + if self.operations.is_empty() { + return const_text("di.expression()"); + } + + let mut doc = const_text("di.expression("); + for (i, op) in self.operations.iter().enumerate() { + if i > 0 { + doc += const_text(", "); + } + doc += match op { + DIExpressionOp::WasmLocal(idx) => text(format!("DW_OP_WASM_local {}", idx)), + DIExpressionOp::WasmGlobal(idx) => text(format!("DW_OP_WASM_global {}", idx)), + DIExpressionOp::WasmStack(idx) => text(format!("DW_OP_WASM_stack {}", idx)), + DIExpressionOp::ConstU64(val) => text(format!("DW_OP_constu {}", val)), + DIExpressionOp::ConstS64(val) => text(format!("DW_OP_consts {}", val)), + DIExpressionOp::PlusUConst(val) => text(format!("DW_OP_plus_uconst {}", val)), + DIExpressionOp::Minus => const_text("DW_OP_minus"), + DIExpressionOp::Plus => const_text("DW_OP_plus"), + DIExpressionOp::Deref => const_text("DW_OP_deref"), + DIExpressionOp::StackValue => const_text("DW_OP_stack_value"), + DIExpressionOp::Piece(size) => text(format!("DW_OP_piece {}", size)), + DIExpressionOp::BitPiece { size, offset } => { + text(format!("DW_OP_bit_piece {} {}", size, offset)) + } + DIExpressionOp::Unsupported(name) => text(name.as_str()), + }; + } + doc + const_text(")") + } +} diff --git a/hir/src/dialects/builtin/builders.rs b/hir/src/dialects/builtin/builders.rs index b9fbb36fd..64673ddeb 100644 --- a/hir/src/dialects/builtin/builders.rs +++ b/hir/src/dialects/builtin/builders.rs @@ -86,6 +86,27 @@ pub trait BuiltinOpBuilder<'f, B: ?Sized + Builder> { op_builder(arg) } + fn dbg_value( + &mut self, + value: ValueRef, + variable: DILocalVariableAttr, + span: SourceSpan, + ) -> Result { + self.dbg_value_with_expr(value, variable, None, span) + } + + fn dbg_value_with_expr( + &mut self, + value: ValueRef, + variable: DILocalVariableAttr, + expression: Option, + span: SourceSpan, + ) -> Result { + let expr = expression.unwrap_or_default(); + let op_builder = self.builder_mut().create::(span); + op_builder(value, variable, expr) + } + fn builder(&self) -> &B; fn builder_mut(&mut self) -> &mut B; } diff --git a/hir/src/dialects/builtin/ops.rs b/hir/src/dialects/builtin/ops.rs index ee9b9cbd5..113658d6e 100644 --- a/hir/src/dialects/builtin/ops.rs +++ b/hir/src/dialects/builtin/ops.rs @@ -1,5 +1,6 @@ mod cast; mod component; +mod debug; mod function; mod global_variable; mod interface; diff --git a/hir/src/dialects/builtin/ops/debug.rs b/hir/src/dialects/builtin/ops/debug.rs new file mode 100644 index 000000000..a6cecaf91 --- /dev/null +++ b/hir/src/dialects/builtin/ops/debug.rs @@ -0,0 +1,75 @@ +use crate::{ + UnsafeIntrusiveEntityRef, + attributes::{DIExpressionAttr, DILocalVariableAttr}, + derive::operation, + dialects::builtin::BuiltinDialect, + traits::AnyType, +}; + +pub type DbgValueRef = UnsafeIntrusiveEntityRef; +pub type DbgDeclareRef = UnsafeIntrusiveEntityRef; + +/// Records the value of an SSA operand for debug information consumers. +#[operation(dialect = BuiltinDialect)] +pub struct DbgValue { + #[operand] + value: AnyType, + #[attr] + variable: DILocalVariableAttr, + #[attr] + expression: DIExpressionAttr, +} + +/// Records the storage location of a source-level variable. +#[operation(dialect = BuiltinDialect)] +pub struct DbgDeclare { + #[operand] + address: AnyType, + #[attr] + variable: DILocalVariableAttr, +} + +#[cfg(test)] +mod tests { + use alloc::{rc::Rc, string::ToString}; + + use crate::{ + Builder, Context, OpPrinter, OpPrintingFlags, SourceSpan, Type, + attributes::DILocalVariableAttr, + dialects::builtin::{BuiltinDialect, BuiltinOpBuilder}, + interner::Symbol, + }; + + fn make_variable() -> DILocalVariableAttr { + let mut variable = + DILocalVariableAttr::new(Symbol::intern("x"), Symbol::intern("main.rs"), 12, Some(7)); + variable.arg_index = Some(0); + variable.ty = Some(Type::I32); + variable + } + + #[test] + fn dbg_value_carries_metadata() { + let context = Rc::new(Context::default()); + context.get_or_register_dialect::(); + + let block = context.create_block_with_params([Type::I32]); + let arg = block.borrow().arguments()[0]; + let value = arg.borrow().as_value_ref(); + + let mut builder = context.clone().builder(); + builder.set_insertion_point_to_end(block); + + let variable = make_variable(); + let dbg_value = builder + .dbg_value(value, variable.clone(), SourceSpan::UNKNOWN) + .expect("failed to create dbg.value op"); + + assert_eq!(dbg_value.borrow().variable(), &variable); + assert_eq!(block.borrow().back(), Some(dbg_value.as_operation_ref())); + + let op = dbg_value.as_operation_ref(); + let printed = op.borrow().print(&OpPrintingFlags::default(), context.as_ref()).to_string(); + assert!(printed.contains("di.local_variable")); + } +} diff --git a/midenc-compile/Cargo.toml b/midenc-compile/Cargo.toml index 470c0bd0f..60097dc9a 100644 --- a/midenc-compile/Cargo.toml +++ b/midenc-compile/Cargo.toml @@ -32,6 +32,7 @@ log.workspace = true inventory.workspace = true midenc-codegen-masm.workspace = true miden-assembly.workspace = true +miden-debug-types.workspace = true miden-mast-package.workspace = true midenc-frontend-wasm.workspace = true midenc-dialect-scf.workspace = true diff --git a/midenc-compile/src/debug_info.rs b/midenc-compile/src/debug_info.rs new file mode 100644 index 000000000..4902f9c2e --- /dev/null +++ b/midenc-compile/src/debug_info.rs @@ -0,0 +1,312 @@ +//! Debug info section builder for MASP packages. +//! +//! This module provides utilities for collecting debug information from the HIR +//! and building a `DebugInfoSection` that can be serialized into the `.debug_info` +//! custom section of a MASP package. + +use alloc::{collections::BTreeMap, string::ToString}; + +use miden_debug_types::{ColumnNumber, LineNumber}; +use miden_mast_package::debug_info::{ + DebugFileInfo, DebugFunctionInfo, DebugInfoSection, DebugPrimitiveType, DebugTypeInfo, + DebugVariableInfo, +}; +use midenc_hir::{DILocalVariableAttr, DISubprogramAttr, OpExt, Type, dialects::builtin}; + +/// Builder for constructing a `DebugInfoSection` from HIR components. +pub struct DebugInfoBuilder { + section: DebugInfoSection, + /// Maps source file paths to their indices in the file table + file_indices: BTreeMap, + /// Maps type hashes to their indices in the type table + type_indices: BTreeMap, +} + +/// A key for deduplicating types +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum TypeKey { + Primitive(u8), // Use discriminant instead of the enum directly + Pointer(u32), + Array(u32, Option), + Unknown, +} + +impl Default for DebugInfoBuilder { + fn default() -> Self { + Self::new() + } +} + +impl DebugInfoBuilder { + /// Creates a new debug info builder. + pub fn new() -> Self { + Self { + section: DebugInfoSection::new(), + file_indices: BTreeMap::new(), + type_indices: BTreeMap::new(), + } + } + + /// Adds a string to the string table and returns its index. + pub fn add_string(&mut self, s: impl Into) -> u32 { + self.section.add_string(s) + } + + /// Adds a file to the file table and returns its index. + pub fn add_file(&mut self, path: &str, directory: Option<&str>) -> u32 { + if let Some(&idx) = self.file_indices.get(path) { + return idx; + } + + let path_idx = self.section.add_string(path); + let directory_idx = directory.map(|d| self.section.add_string(d)); + + let mut file = DebugFileInfo::new(path_idx); + if let Some(dir_idx) = directory_idx { + file = file.with_directory(dir_idx); + } + + let idx = self.section.add_file(file); + self.file_indices.insert(path.to_string(), idx); + idx + } + + /// Adds a type to the type table and returns its index. + pub fn add_type(&mut self, ty: &Type) -> u32 { + let debug_type = hir_type_to_debug_type(ty, self); + let key = type_to_key(&debug_type); + + if let Some(&idx) = self.type_indices.get(&key) { + return idx; + } + + let idx = self.section.add_type(debug_type); + self.type_indices.insert(key, idx); + idx + } + + /// Adds a primitive type and returns its index. + pub fn add_primitive_type(&mut self, prim: DebugPrimitiveType) -> u32 { + let key = TypeKey::Primitive(prim as u8); + if let Some(&idx) = self.type_indices.get(&key) { + return idx; + } + + let idx = self.section.add_type(DebugTypeInfo::Primitive(prim)); + self.type_indices.insert(key, idx); + idx + } + + /// Collects debug information from an HIR component. + pub fn collect_from_component(&mut self, component: &builtin::Component) { + // Traverse the component and collect debug info from all functions + let region = component.body(); + let block = region.entry(); + + for op in block.body() { + if let Some(module) = op.downcast_ref::() { + self.collect_from_module(module); + } else if let Some(interface) = op.downcast_ref::() { + self.collect_from_interface(interface); + } else if let Some(function) = op.downcast_ref::() { + self.collect_from_function(function); + } + } + } + + fn collect_from_module(&mut self, module: &builtin::Module) { + let region = module.body(); + let block = region.entry(); + + for op in block.body() { + if let Some(function) = op.downcast_ref::() { + self.collect_from_function(function); + } + } + } + + fn collect_from_interface(&mut self, interface: &builtin::Interface) { + let region = interface.body(); + let block = region.entry(); + + for op in block.body() { + if let Some(function) = op.downcast_ref::() { + self.collect_from_function(function); + } + } + } + + fn collect_from_function(&mut self, function: &builtin::Function) { + // Get function debug info from attributes + // Try to get DISubprogramAttr from the function's attributes + let subprogram: Option = function + .get_attribute(midenc_hir::interner::Symbol::intern("di.subprogram")) + .and_then(|attr| attr.downcast_ref::().cloned()); + + let Some(subprogram) = subprogram else { + // No debug info for this function, just collect from body + self.collect_variables_from_function_body(function, None); + return; + }; + + // Add file + let file_idx = self.add_file(subprogram.file.as_str(), None); + + // Add function name + let name_idx = self.add_string(subprogram.name.as_str()); + let linkage_name_idx = subprogram.linkage_name.map(|s| self.add_string(s.as_str())); + + // Create function info + let line = LineNumber::new(subprogram.line).unwrap_or_default(); + let column = ColumnNumber::new(subprogram.column.unwrap_or(1)).unwrap_or_default(); + + let mut func_info = DebugFunctionInfo::new(name_idx, file_idx, line, column); + if let Some(linkage_idx) = linkage_name_idx { + func_info = func_info.with_linkage_name(linkage_idx); + } + + // Collect local variables from function body + self.collect_variables_from_function_body(function, Some(&mut func_info)); + + self.section.add_function(func_info); + } + + fn collect_variables_from_function_body( + &mut self, + function: &builtin::Function, + func_info: Option<&mut DebugFunctionInfo>, + ) { + // Walk through the function body to find DbgValue operations + let entry = function.entry_block(); + let entry_block = entry.borrow(); + + if let Some(func_info) = func_info { + self.collect_variables_from_block(&entry_block, func_info); + } + } + + fn collect_variables_from_block( + &mut self, + block: &midenc_hir::Block, + func_info: &mut DebugFunctionInfo, + ) { + for op in block.body() { + // Check if this is a DbgValue operation + if let Some(dbg_value) = op.downcast_ref::() + && let Some(var_info) = self.extract_variable_info(dbg_value.variable()) + { + func_info.add_variable(var_info); + } + + // Recursively process nested regions + for region_idx in 0..op.num_regions() { + let region = op.region(region_idx); + let entry = region.entry(); + self.collect_variables_from_block(&entry, func_info); + } + } + } + + fn extract_variable_info(&mut self, var: &DILocalVariableAttr) -> Option { + let name_idx = self.add_string(var.name.as_str()); + + // Add type if available + let type_idx = if let Some(ref ty) = var.ty { + self.add_type(ty) + } else { + self.add_primitive_type(DebugPrimitiveType::Felt) // Default to felt + }; + + let line = LineNumber::new(var.line).unwrap_or_default(); + let column = ColumnNumber::new(var.column.unwrap_or(1)).unwrap_or_default(); + + let mut var_info = DebugVariableInfo::new(name_idx, type_idx, line, column); + + if let Some(arg_index) = var.arg_index { + var_info = var_info.with_arg_index(arg_index); + } + + Some(var_info) + } + + /// Builds and returns the final `DebugInfoSection`. + pub fn build(self) -> DebugInfoSection { + self.section + } + + /// Returns whether any debug info has been collected. + pub fn is_empty(&self) -> bool { + self.section.is_empty() + } +} + +/// Converts an HIR Type to a DebugTypeInfo. +fn hir_type_to_debug_type(ty: &Type, builder: &mut DebugInfoBuilder) -> DebugTypeInfo { + match ty { + Type::Unknown => DebugTypeInfo::Unknown, + Type::Never => DebugTypeInfo::Primitive(DebugPrimitiveType::Void), + Type::I1 => DebugTypeInfo::Primitive(DebugPrimitiveType::Bool), + Type::I8 => DebugTypeInfo::Primitive(DebugPrimitiveType::I8), + Type::U8 => DebugTypeInfo::Primitive(DebugPrimitiveType::U8), + Type::I16 => DebugTypeInfo::Primitive(DebugPrimitiveType::I16), + Type::U16 => DebugTypeInfo::Primitive(DebugPrimitiveType::U16), + Type::I32 => DebugTypeInfo::Primitive(DebugPrimitiveType::I32), + Type::U32 => DebugTypeInfo::Primitive(DebugPrimitiveType::U32), + Type::I64 => DebugTypeInfo::Primitive(DebugPrimitiveType::I64), + Type::U64 => DebugTypeInfo::Primitive(DebugPrimitiveType::U64), + Type::I128 => DebugTypeInfo::Primitive(DebugPrimitiveType::I128), + Type::U128 => DebugTypeInfo::Primitive(DebugPrimitiveType::U128), + Type::U256 => DebugTypeInfo::Unknown, // No direct mapping for U256 + Type::F64 => DebugTypeInfo::Primitive(DebugPrimitiveType::F64), + Type::Felt => DebugTypeInfo::Primitive(DebugPrimitiveType::Felt), + Type::Ptr(ptr_type) => { + let pointee_idx = builder.add_type(ptr_type.pointee()); + DebugTypeInfo::Pointer { + pointee_type_idx: pointee_idx, + } + } + Type::Array(array_type) => { + let element_idx = builder.add_type(array_type.element_type()); + DebugTypeInfo::Array { + element_type_idx: element_idx, + count: Some(array_type.len() as u32), + } + } + // For types we don't have direct mappings for, use Unknown + Type::Struct(_) | Type::List(_) | Type::Function(_) => DebugTypeInfo::Unknown, + } +} + +/// Creates a key for type deduplication. +fn type_to_key(ty: &DebugTypeInfo) -> TypeKey { + match ty { + DebugTypeInfo::Primitive(p) => TypeKey::Primitive(*p as u8), + DebugTypeInfo::Pointer { pointee_type_idx } => TypeKey::Pointer(*pointee_type_idx), + DebugTypeInfo::Array { + element_type_idx, + count, + } => TypeKey::Array(*element_type_idx, *count), + DebugTypeInfo::Unknown => TypeKey::Unknown, + // For complex types like structs and functions, we don't deduplicate + _ => TypeKey::Unknown, + } +} + +/// Builds a `DebugInfoSection` from an HIR component if debug info is enabled. +pub fn build_debug_info_section( + component: &builtin::Component, + emit_debug_decorators: bool, +) -> Option { + if !emit_debug_decorators { + return None; + } + + let mut builder = DebugInfoBuilder::new(); + builder.collect_from_component(component); + + if builder.is_empty() { + None + } else { + Some(builder.build()) + } +} diff --git a/midenc-compile/src/lib.rs b/midenc-compile/src/lib.rs index b08d01d34..4ddbffa4a 100644 --- a/midenc-compile/src/lib.rs +++ b/midenc-compile/src/lib.rs @@ -6,6 +6,7 @@ extern crate alloc; extern crate std; mod compiler; +pub mod debug_info; mod stage; mod stages; diff --git a/midenc-compile/src/stages/assemble.rs b/midenc-compile/src/stages/assemble.rs index 21d499832..c05a9cbb2 100644 --- a/midenc-compile/src/stages/assemble.rs +++ b/midenc-compile/src/stages/assemble.rs @@ -1,4 +1,9 @@ -use miden_mast_package::Package; +use alloc::{string::ToString, vec::Vec}; + +use miden_mast_package::{ + Dependency, Package, PackageManifest, Section, SectionId, TargetType, Version, +}; +use midenc_session::Session; use super::*; @@ -52,3 +57,55 @@ impl Stage for AssembleStage { } } } + +fn build_package( + artifact: midenc_codegen_masm::AssemblyArtifact, + outputs: &CodegenOutput, + session: &Session, +) -> Package { + let name = session.name.clone().into(); + + let mut dependencies = Vec::new(); + for (link_lib, lib) in session.options.link_libraries.iter().zip(outputs.link_libraries.iter()) + { + let dependency = Dependency { + name: link_lib.name.to_string().into(), + kind: TargetType::Library, + // proper version will be implemented in https://github.com/0xMiden/compiler/issues/1069 + version: Version::new(0, 0, 0), + digest: *lib.digest(), + }; + dependencies.push(dependency); + } + + let kind = artifact.kind(); + let mast = artifact.into_mast(); + let manifest = PackageManifest::from_library(&mast) + .with_dependencies(dependencies) + .expect("package dependencies should be unique"); + + let account_component_metadata_bytes = outputs.account_component_metadata_bytes.clone(); + let debug_info_bytes = outputs.debug_info_bytes.clone(); + + let mut sections = Vec::new(); + + if let Some(bytes) = account_component_metadata_bytes { + sections.push(Section::new(SectionId::ACCOUNT_COMPONENT_METADATA, bytes)); + } + + if let Some(bytes) = debug_info_bytes { + log::debug!("adding .debug_info section to package ({} bytes)", bytes.len()); + sections.push(Section::new(SectionId::DEBUG_INFO, bytes)); + } + + Package { + name, + // proper version will be implemented in https://github.com/0xMiden/compiler/issues/1068 + version: Version::new(0, 0, 0), + description: None, + kind, + mast: mast.into(), + manifest, + sections, + } +} diff --git a/midenc-compile/src/stages/codegen.rs b/midenc-compile/src/stages/codegen.rs index 909b24140..659d89617 100644 --- a/midenc-compile/src/stages/codegen.rs +++ b/midenc-compile/src/stages/codegen.rs @@ -20,6 +20,8 @@ pub struct CodegenOutput { pub link_packages: BTreeMap>, /// The serialized AccountComponentMetadata (name, description, storage layout, etc.) pub account_component_metadata_bytes: Option>, + /// The serialized DebugInfoSection for the .debug_info custom section + pub debug_info_bytes: Option>, } /// Perform code generation on the possibly-linked output of previous stages @@ -73,11 +75,29 @@ impl Stage for CodegenStage { session.emit(OutputMode::Text, masm_component.as_ref()).into_diagnostic()?; } + // Build debug info section if debug decorators are enabled + let debug_info_bytes = if session.options.emit_debug_decorators() { + use miden_assembly::utils::Serializable; + + log::debug!("collecting debug info for .debug_info section"); + let debug_section = + crate::debug_info::build_debug_info_section(&component.borrow(), true); + debug_section.map(|section| { + let mut bytes = alloc::vec::Vec::new(); + section.write_into(&mut bytes); + log::debug!("built debug_info section: {} bytes", bytes.len()); + bytes + }) + } else { + None + }; + Ok(CodegenOutput { component: Arc::from(masm_component), link_libraries, link_packages, account_component_metadata_bytes: linker_output.account_component_metadata_bytes, + debug_info_bytes, }) } } diff --git a/midenc-compile/src/stages/rewrite.rs b/midenc-compile/src/stages/rewrite.rs index 637d3529e..a24a42cb5 100644 --- a/midenc-compile/src/stages/rewrite.rs +++ b/midenc-compile/src/stages/rewrite.rs @@ -8,7 +8,7 @@ use midenc_hir::{ patterns::{GreedyRewriteConfig, RegionSimplificationLevel}, }; use midenc_hir_transform::{ - Canonicalizer, CommonSubexpressionElimination, SinkOperandDefs, + Canonicalizer, CommonSubexpressionElimination, RemoveDeadDebugOps, SinkOperandDefs, SparseConditionalConstantPropagation, }; @@ -79,6 +79,8 @@ impl Stage for ApplyRewritesStage { func_pm.add_pass(Box::new(TransformSpills)); //func_pm.add_pass(Box::new(ControlFlowSink)); //func_pm.add_pass(Box::new(DeadCodeElimination)); + // Remove debug ops whose operands are dead to prevent codegen issues + func_pm.add_pass(Box::new(RemoveDeadDebugOps)); } // Function passes for component-level functions { @@ -97,6 +99,8 @@ impl Stage for ApplyRewritesStage { func_pm.add_pass(Box::new(TransformSpills)); //func_pm.add_pass(Box::new(ControlFlowSink)); //func_pm.add_pass(Box::new(DeadCodeElimination)); + // Remove debug ops whose operands are dead to prevent codegen issues + func_pm.add_pass(Box::new(RemoveDeadDebugOps)); } } @@ -108,6 +112,18 @@ impl Stage for ApplyRewritesStage { log::trace!(target: "driver", "after rewrites: {}", input.world.borrow().as_operation()); log::debug!(target: "driver", "rewrites successful"); + // Emit HIR if requested + let session = context.session(); + if session.should_emit(midenc_session::OutputType::Hir) { + log::debug!(target: "driver", "emitting HIR component"); + session + .emit(midenc_session::OutputMode::Text, &*input.component.borrow()) + .into_diagnostic()?; + log::debug!(target: "driver", "HIR component emitted successfully"); + } else { + log::debug!(target: "driver", "HIR emission not requested"); + } + if context.session().rewrite_only() { log::debug!(target: "driver", "stopping compiler early (rewrite-only=true)"); Err(CompilerStopped.into()) diff --git a/tests/integration/expected/debug_variable_locations.hir b/tests/integration/expected/debug_variable_locations.hir new file mode 100644 index 000000000..7c01fbfa4 --- /dev/null +++ b/tests/integration/expected/debug_variable_locations.hir @@ -0,0 +1,47 @@ +builtin.component root_ns:root@1.0.0 { + builtin.module public @test_rust_abc71f0181dc4e2804cd5c8e0cf50175a2515fac3096a395915f9b0604eb287b { + public builtin.function @entrypoint(v0: i32) -> i32 { + ^block6(v0: i32): + v2 = arith.constant 0 : i32; + v3 = arith.constant 0 : i32; + builtin.dbg_value v3 #[expression = di.expression()] #[variable = di.local_variable(name = local1, file = unknown, line = 0, ty = i32)]; + v4 = arith.constant 0 : i32; + builtin.dbg_value v4 #[expression = di.expression()] #[variable = di.local_variable(name = local2, file = unknown, line = 0, ty = i32)]; + cf.br ^block8(v3, v0, v4); + ^block7(v1: i32): + + ^block8(v6: i32, v7: i32, v15: i32): + v8 = hir.bitcast v6 : u32; + v9 = hir.bitcast v7 : u32; + v10 = arith.lte v8, v9 : i1; + v11 = arith.zext v10 : u32; + v12 = hir.bitcast v11 : i32; + v13 = arith.constant 0 : i32; + v14 = arith.neq v12, v13 : i1; + cf.cond_br v14 ^block10, ^block11; + ^block9(v5: i32): + + ^block10: + v16 = arith.add v15, v6 : i32 #[overflow = wrapping]; + builtin.dbg_value v16 #[expression = di.expression()] #[variable = di.local_variable(name = local2, file = unknown, line = 0, ty = i32)]; + v17 = arith.constant 1 : i32; + v18 = arith.add v6, v17 : i32 #[overflow = wrapping]; + builtin.dbg_value v18 #[expression = di.expression()] #[variable = di.local_variable(name = local1, file = unknown, line = 0, ty = i32)]; + cf.br ^block8(v18, v7, v16); + ^block11: + builtin.ret v15; + }; + + builtin.global_variable private @#__stack_pointer : i32 { + builtin.ret_imm 1048576; + }; + + builtin.global_variable public @#gv1 : i32 { + builtin.ret_imm 1048576; + }; + + builtin.global_variable public @#gv2 : i32 { + builtin.ret_imm 1048576; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/src/rust_masm_tests/debug.rs b/tests/integration/src/rust_masm_tests/debug.rs new file mode 100644 index 000000000..4676a3d07 --- /dev/null +++ b/tests/integration/src/rust_masm_tests/debug.rs @@ -0,0 +1,27 @@ +use std::borrow::Cow; + +use midenc_expect_test::expect_file; + +use crate::{CompilerTestBuilder, testing::setup}; + +#[test] +fn variable_locations_schedule() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (n: u32) -> u32 { + let mut sum = 0u32; + let mut i = 0u32; + while i <= n { + sum += i; + i += 1; + } + sum + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["../../expected/debug_variable_locations.hir"]); +} diff --git a/tests/integration/src/rust_masm_tests/mod.rs b/tests/integration/src/rust_masm_tests/mod.rs index b4283e4fb..89bc1e43b 100644 --- a/tests/integration/src/rust_masm_tests/mod.rs +++ b/tests/integration/src/rust_masm_tests/mod.rs @@ -12,6 +12,7 @@ use crate::testing::eval_package; mod abi_transform; mod apps; +mod debug; mod debug_source_locations; mod examples; mod instructions; diff --git a/tests/lit/debug/function_metadata.rs b/tests/lit/debug/function_metadata.rs new file mode 100644 index 000000000..7bf99b7dc --- /dev/null +++ b/tests/lit/debug/function_metadata.rs @@ -0,0 +1,14 @@ +#![no_std] +#![no_main] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn multiply(x: u32, y: u32) -> u32 { + x * y +} diff --git a/tests/lit/debug/function_metadata.shtest b/tests/lit/debug/function_metadata.shtest new file mode 100644 index 000000000..23434d6fc --- /dev/null +++ b/tests/lit/debug/function_metadata.shtest @@ -0,0 +1,6 @@ +# Test that HIR includes source locations for function parameters +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/function_metadata.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/function_metadata.wasm\" && bin/midenc \"\$TMPDIR/function_metadata.wasm\" --entrypoint=multiply -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Check that function has source location annotations +# CHECK-LABEL: builtin.function @multiply +# CHECK: #loc("{{.*}}function_metadata.rs":{{[0-9]+}} diff --git a/tests/lit/debug/lit.suite.toml b/tests/lit/debug/lit.suite.toml new file mode 100644 index 000000000..f95cc52c0 --- /dev/null +++ b/tests/lit/debug/lit.suite.toml @@ -0,0 +1,5 @@ +name = "debug" +patterns = ["*.shtest"] +working_dir = "../../../" + +[format.shtest] diff --git a/tests/lit/debug/location_expressions.rs b/tests/lit/debug/location_expressions.rs new file mode 100644 index 000000000..c899970e3 --- /dev/null +++ b/tests/lit/debug/location_expressions.rs @@ -0,0 +1,25 @@ +// Test file to verify location expressions in debug info +// Using no_std to avoid runtime overhead + +#![no_std] +#![no_main] + +#[panic_handler] +fn panic(_: &core::panic::PanicInfo) -> ! { + loop {} +} + +#[no_mangle] +pub extern "C" fn test_expressions(p0: i32, p1: i32, p2: i32, p3: i32) -> i32 { + // These parameters should be in WASM locals 0, 1, 2, 3 + // The debug info expressions should show: + // p0 -> DW_OP_WASM_local 0 + // p1 -> DW_OP_WASM_local 1 + // p2 -> DW_OP_WASM_local 2 + // p3 -> DW_OP_WASM_local 3 + + // Simple arithmetic using all parameters + let sum1 = p0.wrapping_add(p1); + let sum2 = p2.wrapping_add(p3); + sum1.wrapping_add(sum2) +} \ No newline at end of file diff --git a/tests/lit/debug/location_expressions.shtest b/tests/lit/debug/location_expressions.shtest new file mode 100644 index 000000000..867d6e1b4 --- /dev/null +++ b/tests/lit/debug/location_expressions.shtest @@ -0,0 +1,9 @@ +# Test that debug info with source locations is properly represented in HIR +# This test verifies that operations include source location annotations +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/location_expressions.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/location_expressions.wasm\" && bin/midenc \"\$TMPDIR/location_expressions.wasm\" --entrypoint=test_expressions -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Test that the function exists with 4 parameters +# CHECK-LABEL: builtin.function @test_expressions({{.*}}: i32, {{.*}}: i32, {{.*}}: i32, {{.*}}: i32) -> i32 + +# Test that operations have source location annotations +# CHECK: #loc("{{.*}}location_expressions.rs":{{[0-9]+}} diff --git a/tests/lit/debug/simple_debug.rs b/tests/lit/debug/simple_debug.rs new file mode 100644 index 000000000..342241ecb --- /dev/null +++ b/tests/lit/debug/simple_debug.rs @@ -0,0 +1,14 @@ +#![no_std] +#![no_main] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn add(a: u32, b: u32) -> u32 { + a + b +} diff --git a/tests/lit/debug/simple_debug.shtest b/tests/lit/debug/simple_debug.shtest new file mode 100644 index 000000000..4df8ec78f --- /dev/null +++ b/tests/lit/debug/simple_debug.shtest @@ -0,0 +1,6 @@ +# Test that basic debug info source locations are emitted for a simple function +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/simple_debug.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/simple_debug.wasm\" && bin/midenc \"\$TMPDIR/simple_debug.wasm\" --entrypoint=add -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Check that function has source location annotations +# CHECK-LABEL: builtin.function @add +# CHECK: #loc("{{.*}}simple_debug.rs":{{[0-9]+}} diff --git a/tests/lit/debug/variable_locations.rs b/tests/lit/debug/variable_locations.rs new file mode 100644 index 000000000..623d268de --- /dev/null +++ b/tests/lit/debug/variable_locations.rs @@ -0,0 +1,21 @@ +#![no_std] +#![no_main] +#![allow(unused_unsafe)] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn entrypoint(n: u32) -> u32 { + let mut sum = 0u32; + let mut i = 0u32; + while i <= n { + sum = sum + i; + i = i + 1; + } + sum +} diff --git a/tests/lit/debug/variable_locations.shtest b/tests/lit/debug/variable_locations.shtest new file mode 100644 index 000000000..5e6861546 --- /dev/null +++ b/tests/lit/debug/variable_locations.shtest @@ -0,0 +1,6 @@ +# Test that debug info tracks source locations in a loop +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/variable_locations.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/variable_locations.wasm\" && bin/midenc \"\$TMPDIR/variable_locations.wasm\" --entrypoint=entrypoint -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s + +# Check that function has source location annotations +# CHECK-LABEL: builtin.function @entrypoint +# CHECK: #loc("{{.*}}variable_locations.rs":{{[0-9]+}} diff --git a/tests/lit/debugdump/lit.suite.toml b/tests/lit/debugdump/lit.suite.toml new file mode 100644 index 000000000..162db014a --- /dev/null +++ b/tests/lit/debugdump/lit.suite.toml @@ -0,0 +1,5 @@ +name = "debugdump" +patterns = ["*.wat"] +working_dir = "../../../" + +[format.shtest] diff --git a/tests/lit/debugdump/locations-source-loc.wat b/tests/lit/debugdump/locations-source-loc.wat new file mode 100644 index 000000000..9b9441af3 --- /dev/null +++ b/tests/lit/debugdump/locations-source-loc.wat @@ -0,0 +1,25 @@ +;; Test that .debug_loc section shows DebugVar decorators with source locations +;; from a real Rust project compiled with debug info. +;; +;; RUN: cargo build --release --target wasm32-unknown-unknown --manifest-path tests/lit/source-location/test-project/Cargo.toml 2>&1 +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc 'tests/lit/source-location/test-project/target/wasm32-unknown-unknown/release/source_location_test.wasm' --lib --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s + +;; Check header +;; CHECK: .debug_loc contents (DebugVar decorators from MAST): +;; CHECK: Total DebugVar decorators: 3 +;; CHECK: Unique variable names: 3 + +;; Check variable "arg0" - parameter from test_assertion function +;; CHECK: Variable: "arg0" +;; CHECK: 1 location entries: +;; CHECK: local[0] (param #2) + +;; Check variable "local3" - from panic handler +;; CHECK: Variable: "local3" +;; CHECK: 1 location entries: +;; CHECK: stack[0] + +;; Check variable "x" - parameter from entrypoint function +;; CHECK: Variable: "x" +;; CHECK: 1 location entries: +;; CHECK: local[0] (param #2) diff --git a/tests/lit/debugdump/locations.wat b/tests/lit/debugdump/locations.wat new file mode 100644 index 000000000..aead42315 --- /dev/null +++ b/tests/lit/debugdump/locations.wat @@ -0,0 +1,22 @@ +;; Test that .debug_loc section is present and handles empty case +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc '%s' --exe --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s + +;; Check header for .debug_loc section +;; CHECK: .debug_loc contents (DebugVar decorators from MAST): +;; For raw WAT files without debug info, we expect no decorators +;; CHECK: (no DebugVar decorators found) + +(module + (func $add (export "add") (param i32 i32) (result i32) + local.get 0 + local.get 1 + i32.add + ) + + (func $entrypoint (export "entrypoint") + i32.const 5 + i32.const 3 + call $add + drop + ) +) diff --git a/tests/lit/debugdump/simple.wat b/tests/lit/debugdump/simple.wat new file mode 100644 index 000000000..3d9312e19 --- /dev/null +++ b/tests/lit/debugdump/simple.wat @@ -0,0 +1,32 @@ +;; Test that miden-debugdump correctly parses and displays debug info from a .masp file +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc '%s' --exe --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\"" | filecheck %s + +;; Check header +;; CHECK: DEBUG INFO DUMP: +;; CHECK: Debug info version: 1 + +;; Check summary section is present +;; CHECK: .debug_info summary: +;; CHECK: Strings: +;; CHECK: Types: +;; CHECK: Files: +;; CHECK: Functions: + +;; Check that we have functions from the WAT +;; CHECK: .debug_functions contents: +;; CHECK: FUNCTION: add +;; CHECK: FUNCTION: multiply + +(module + (func $add (export "add") (param $a i32) (param $b i32) (result i32) + local.get $a + local.get $b + i32.add + ) + + (func $multiply (export "multiply") (param $x i32) (param $y i32) (result i32) + local.get $x + local.get $y + i32.mul + ) +) diff --git a/tests/lit/debugdump/summary.wat b/tests/lit/debugdump/summary.wat new file mode 100644 index 000000000..a127c8d07 --- /dev/null +++ b/tests/lit/debugdump/summary.wat @@ -0,0 +1,21 @@ +;; Test that miden-debugdump --summary shows only summary output +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc '%s' --exe --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --summary" | filecheck %s + +;; Check summary is present +;; CHECK: .debug_info summary: +;; CHECK: Strings:{{.*}}entries +;; CHECK: Types:{{.*}}entries +;; CHECK: Files:{{.*}}entries +;; CHECK: Functions:{{.*}}entries + +;; Make sure full dump sections are NOT present with --summary +;; CHECK-NOT: .debug_str contents: +;; CHECK-NOT: .debug_types contents: +;; CHECK-NOT: .debug_files contents: +;; CHECK-NOT: .debug_functions contents: + +(module + (func $test (export "test") (param i32) (result i32) + local.get 0 + ) +) diff --git a/tests/lit/lit.cfg.py b/tests/lit/lit.cfg.py new file mode 100644 index 000000000..75043fcf0 --- /dev/null +++ b/tests/lit/lit.cfg.py @@ -0,0 +1,51 @@ +import os +import sys +import shlex + +from lit.formats import ShTest +import lit.util + +config.name = "miden-lit" +config.test_format = ShTest() +config.suffixes = [".shtest", ".hir", ".wat"] + +source_root = os.path.dirname(__file__) +repo_root = os.path.abspath(os.path.join(source_root, os.pardir, os.pardir)) +config.test_source_root = source_root +config.test_exec_root = repo_root +bin_dir = os.path.join(repo_root, "bin") +config.environment["PATH"] = bin_dir + os.pathsep + config.environment.get("PATH", "") +# Use cargo run to ensure proper runtime environment +# Redirect cargo's stderr to suppress build warnings, but keep midenc's stderr +midenc_cmd = f"cargo run --manifest-path {shlex.quote(os.path.join(repo_root, 'Cargo.toml'))} --bin midenc 2>/dev/null --" +config.substitutions.append(("%midenc", midenc_cmd)) + +# Try to find FileCheck in common locations +filecheck = ( + lit.util.which("FileCheck") + or lit.util.which("filecheck") + or lit.util.which("llvm-filecheck") +) + +# Check homebrew LLVM locations if not found +if not filecheck: + homebrew_paths = [ + "/opt/homebrew/opt/llvm@20/bin/FileCheck", + "/opt/homebrew/opt/llvm/bin/FileCheck", + "/usr/local/opt/llvm/bin/FileCheck", + ] + for path in homebrew_paths: + if os.path.exists(path): + filecheck = path + break + +# Fall back to simple_filecheck.py only if system FileCheck not found +if not filecheck: + script = os.path.join(source_root, 'tools', 'simple_filecheck.py') + filecheck = f"{shlex.quote(sys.executable)} {shlex.quote(script)}" + +config.substitutions.append(("%filecheck", filecheck)) + +config.substitutions.append(("%S", source_root)) + +config.environment.setdefault("RUSTFLAGS", "") diff --git a/tests/lit/variable_locations.rs b/tests/lit/variable_locations.rs new file mode 100644 index 000000000..623d268de --- /dev/null +++ b/tests/lit/variable_locations.rs @@ -0,0 +1,21 @@ +#![no_std] +#![no_main] +#![allow(unused_unsafe)] + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + unsafe { core::arch::wasm32::unreachable() } +} + +#[no_mangle] +pub extern "C" fn entrypoint(n: u32) -> u32 { + let mut sum = 0u32; + let mut i = 0u32; + while i <= n { + sum = sum + i; + i = i + 1; + } + sum +} diff --git a/tools/debugdump/Cargo.toml b/tools/debugdump/Cargo.toml new file mode 100644 index 000000000..930d9812a --- /dev/null +++ b/tools/debugdump/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "miden-debugdump" +version.workspace = true +rust-version.workspace = true +authors.workspace = true +description = "A tool to dump debug information from MASP packages" +repository.workspace = true +homepage.workspace = true +documentation.workspace = true +categories = ["development-tools", "command-line-utilities"] +keywords = ["debug", "miden", "dwarfdump"] +license.workspace = true +readme.workspace = true +edition.workspace = true +publish.workspace = true + +[[bin]] +name = "miden-debugdump" +path = "src/main.rs" + +[dependencies] +miden-mast-package.workspace = true +miden-core.workspace = true +clap.workspace = true +thiserror.workspace = true diff --git a/tools/debugdump/src/main.rs b/tools/debugdump/src/main.rs new file mode 100644 index 000000000..b9df9be93 --- /dev/null +++ b/tools/debugdump/src/main.rs @@ -0,0 +1,638 @@ +//! miden-debugdump - A tool to dump debug information from MASP packages +//! +//! Similar to llvm-dwarfdump, this tool parses the `.debug_info` section +//! from compiled MASP packages and displays the debug metadata in a +//! human-readable format. + +use std::{ + collections::BTreeMap, + fs::File, + io::{BufReader, Read}, + path::PathBuf, +}; + +use clap::{Parser, ValueEnum}; +use miden_core::{ + Decorator, + utils::{Deserializable, SliceReader}, +}; +use miden_mast_package::{ + MastForest, Package, SectionId, + debug_info::{ + DebugFileInfo, DebugFunctionInfo, DebugInfoSection, DebugPrimitiveType, DebugTypeInfo, + DebugVariableInfo, + }, +}; + +#[derive(Debug, thiserror::Error)] +enum Error { + #[error("failed to read file: {0}")] + Io(#[from] std::io::Error), + #[error("failed to parse package: {0}")] + Parse(String), + #[error("no debug_info section found in package")] + NoDebugInfo, +} + +/// A tool to dump debug information from MASP packages +#[derive(Parser, Debug)] +#[command( + name = "miden-debugdump", + about = "Dump debug information from MASP packages (similar to llvm-dwarfdump)", + version, + rename_all = "kebab-case" +)] +struct Cli { + /// Input MASP file to analyze + #[arg(required = true)] + input: PathBuf, + + /// Filter output to specific section + #[arg(short, long, value_enum)] + section: Option, + + /// Show all available information (verbose) + #[arg(short, long)] + verbose: bool, + + /// Show raw indices instead of resolved names + #[arg(long)] + raw: bool, + + /// Only show summary statistics + #[arg(long)] + summary: bool, +} + +#[derive(Debug, Clone, Copy, ValueEnum)] +enum DumpSection { + /// Show string table + Strings, + /// Show type information + Types, + /// Show source file information + Files, + /// Show function debug information + Functions, + /// Show variable information within functions + Variables, + /// Show variable location decorators from MAST (similar to DWARF .debug_loc) + Locations, +} + +fn main() { + if let Err(e) = run() { + eprintln!("error: {e}"); + std::process::exit(1); + } +} + +fn run() -> Result<(), Error> { + let cli = Cli::parse(); + + // Read the MASP file + let file = File::open(&cli.input)?; + let mut reader = BufReader::new(file); + let mut bytes = Vec::new(); + reader.read_to_end(&mut bytes)?; + + // Parse the package + let package = Package::read_from(&mut SliceReader::new(&bytes)) + .map_err(|e| Error::Parse(e.to_string()))?; + + // Get the MAST forest for location decorators + let mast_forest = package.mast.mast_forest(); + + // Find the debug_info section + let debug_section = package + .sections + .iter() + .find(|s| s.id == SectionId::DEBUG_INFO) + .ok_or(Error::NoDebugInfo)?; + + // Parse the debug info + let debug_info = DebugInfoSection::read_from(&mut SliceReader::new(&debug_section.data)) + .map_err(|e| Error::Parse(e.to_string()))?; + + // Print header + println!("{}", "=".repeat(80)); + println!("DEBUG INFO DUMP: {}", cli.input.display()); + println!( + "Package: {} (version: {})", + package.name, + package + .version + .as_ref() + .map(|v| v.to_string()) + .unwrap_or_else(|| "unknown".into()) + ); + println!("Debug info version: {}", debug_info.version); + println!("{}", "=".repeat(80)); + println!(); + + if cli.summary { + print_summary(&debug_info, mast_forest); + return Ok(()); + } + + match cli.section { + Some(DumpSection::Strings) => print_strings(&debug_info), + Some(DumpSection::Types) => print_types(&debug_info, cli.raw), + Some(DumpSection::Files) => print_files(&debug_info, cli.raw), + Some(DumpSection::Functions) => print_functions(&debug_info, cli.raw, cli.verbose), + Some(DumpSection::Variables) => print_variables(&debug_info, cli.raw), + Some(DumpSection::Locations) => print_locations(mast_forest, &debug_info, cli.verbose), + None => { + // Print everything + print_summary(&debug_info, mast_forest); + println!(); + print_strings(&debug_info); + println!(); + print_types(&debug_info, cli.raw); + println!(); + print_files(&debug_info, cli.raw); + println!(); + print_functions(&debug_info, cli.raw, cli.verbose); + println!(); + print_locations(mast_forest, &debug_info, cli.verbose); + } + } + + Ok(()) +} + +fn print_summary(debug_info: &DebugInfoSection, mast_forest: &MastForest) { + println!(".debug_info summary:"); + println!(" Strings: {} entries", debug_info.strings.len()); + println!(" Types: {} entries", debug_info.types.len()); + println!(" Files: {} entries", debug_info.files.len()); + println!(" Functions: {} entries", debug_info.functions.len()); + + let total_vars: usize = debug_info.functions.iter().map(|f| f.variables.len()).sum(); + let total_inlined: usize = debug_info.functions.iter().map(|f| f.inlined_calls.len()).sum(); + println!(" Variables: {} total (across all functions)", total_vars); + println!(" Inlined: {} call sites", total_inlined); + + // Count DebugVar decorators in MAST + let debug_var_count = mast_forest + .decorators() + .iter() + .filter(|d| matches!(d, Decorator::DebugVar(_))) + .count(); + println!(" DebugVar decorators: {} in MAST", debug_var_count); +} + +fn print_strings(debug_info: &DebugInfoSection) { + println!(".debug_str contents:"); + println!("{:-<80}", ""); + for (idx, s) in debug_info.strings.iter().enumerate() { + println!(" [{:4}] \"{}\"", idx, s); + } +} + +fn print_types(debug_info: &DebugInfoSection, raw: bool) { + println!(".debug_types contents:"); + println!("{:-<80}", ""); + for (idx, ty) in debug_info.types.iter().enumerate() { + print!(" [{:4}] ", idx); + print_type(ty, debug_info, raw, 0); + println!(); + } +} + +fn print_type(ty: &DebugTypeInfo, debug_info: &DebugInfoSection, raw: bool, indent: usize) { + let pad = " ".repeat(indent); + match ty { + DebugTypeInfo::Primitive(prim) => { + print!("{}PRIMITIVE: {}", pad, primitive_name(*prim)); + print!(" (size: {} bytes, {} felts)", prim.size_in_bytes(), prim.size_in_felts()); + } + DebugTypeInfo::Pointer { pointee_type_idx } => { + if raw { + print!("{}POINTER -> type[{}]", pad, pointee_type_idx); + } else { + print!("{}POINTER -> ", pad); + if let Some(pointee) = debug_info.get_type(*pointee_type_idx) { + print_type_brief(pointee, debug_info); + } else { + print!("", pointee_type_idx); + } + } + } + DebugTypeInfo::Array { + element_type_idx, + count, + } => { + if raw { + print!("{}ARRAY [{}; {:?}]", pad, element_type_idx, count); + } else { + print!("{}ARRAY [", pad); + if let Some(elem) = debug_info.get_type(*element_type_idx) { + print_type_brief(elem, debug_info); + } else { + print!(""); + } + match count { + Some(n) => print!("; {}]", n), + None => print!("; ?]"), + } + } + } + DebugTypeInfo::Struct { + name_idx, + size, + fields, + } => { + let name = if raw { + format!("str[{}]", name_idx) + } else { + debug_info.get_string(*name_idx).unwrap_or("").to_string() + }; + print!("{}STRUCT {} (size: {} bytes)", pad, name, size); + if !fields.is_empty() { + println!(); + for field in fields { + let field_name = if raw { + format!("str[{}]", field.name_idx) + } else { + debug_info.get_string(field.name_idx).unwrap_or("").to_string() + }; + print!("{} +{:4}: {} : ", pad, field.offset, field_name); + if let Some(fty) = debug_info.get_type(field.type_idx) { + print_type_brief(fty, debug_info); + } else { + print!(""); + } + println!(); + } + } + } + DebugTypeInfo::Function { + return_type_idx, + param_type_indices, + } => { + print!("{}FUNCTION (", pad); + for (i, param_idx) in param_type_indices.iter().enumerate() { + if i > 0 { + print!(", "); + } + if raw { + print!("type[{}]", param_idx); + } else if let Some(pty) = debug_info.get_type(*param_idx) { + print_type_brief(pty, debug_info); + } else { + print!(""); + } + } + print!(") -> "); + match return_type_idx { + Some(idx) => { + if raw { + print!("type[{}]", idx); + } else if let Some(rty) = debug_info.get_type(*idx) { + print_type_brief(rty, debug_info); + } else { + print!(""); + } + } + None => print!("void"), + } + } + DebugTypeInfo::Unknown => { + print!("{}UNKNOWN", pad); + } + } +} + +fn print_type_brief(ty: &DebugTypeInfo, debug_info: &DebugInfoSection) { + match ty { + DebugTypeInfo::Primitive(prim) => print!("{}", primitive_name(*prim)), + DebugTypeInfo::Pointer { pointee_type_idx } => { + print!("*"); + if let Some(p) = debug_info.get_type(*pointee_type_idx) { + print_type_brief(p, debug_info); + } + } + DebugTypeInfo::Array { + element_type_idx, + count, + } => { + print!("["); + if let Some(e) = debug_info.get_type(*element_type_idx) { + print_type_brief(e, debug_info); + } + match count { + Some(n) => print!("; {}]", n), + None => print!("]"), + } + } + DebugTypeInfo::Struct { name_idx, .. } => { + print!("struct {}", debug_info.get_string(*name_idx).unwrap_or("?")); + } + DebugTypeInfo::Function { .. } => print!("fn(...)"), + DebugTypeInfo::Unknown => print!("?"), + } +} + +fn primitive_name(prim: DebugPrimitiveType) -> &'static str { + match prim { + DebugPrimitiveType::Void => "void", + DebugPrimitiveType::Bool => "bool", + DebugPrimitiveType::I8 => "i8", + DebugPrimitiveType::U8 => "u8", + DebugPrimitiveType::I16 => "i16", + DebugPrimitiveType::U16 => "u16", + DebugPrimitiveType::I32 => "i32", + DebugPrimitiveType::U32 => "u32", + DebugPrimitiveType::I64 => "i64", + DebugPrimitiveType::U64 => "u64", + DebugPrimitiveType::I128 => "i128", + DebugPrimitiveType::U128 => "u128", + DebugPrimitiveType::F32 => "f32", + DebugPrimitiveType::F64 => "f64", + DebugPrimitiveType::Felt => "felt", + DebugPrimitiveType::Word => "word", + } +} + +fn print_files(debug_info: &DebugInfoSection, raw: bool) { + println!(".debug_files contents:"); + println!("{:-<80}", ""); + for (idx, file) in debug_info.files.iter().enumerate() { + print_file(idx, file, debug_info, raw); + } +} + +fn print_file(idx: usize, file: &DebugFileInfo, debug_info: &DebugInfoSection, raw: bool) { + let path = if raw { + format!("str[{}]", file.path_idx) + } else { + debug_info.get_string(file.path_idx).unwrap_or("").to_string() + }; + + print!(" [{:4}] {}", idx, path); + + if let Some(dir_idx) = file.directory_idx { + let dir = if raw { + format!("str[{}]", dir_idx) + } else { + debug_info.get_string(dir_idx).unwrap_or("").to_string() + }; + print!(" (dir: {})", dir); + } + + if let Some(checksum) = &file.checksum { + print!(" [checksum: "); + for byte in &checksum[..4] { + print!("{:02x}", byte); + } + print!("...]"); + } + + println!(); +} + +fn print_functions(debug_info: &DebugInfoSection, raw: bool, verbose: bool) { + println!(".debug_functions contents:"); + println!("{:-<80}", ""); + for (idx, func) in debug_info.functions.iter().enumerate() { + print_function(idx, func, debug_info, raw, verbose); + println!(); + } +} + +fn print_function( + idx: usize, + func: &DebugFunctionInfo, + debug_info: &DebugInfoSection, + raw: bool, + verbose: bool, +) { + let name = if raw { + format!("str[{}]", func.name_idx) + } else { + debug_info.get_string(func.name_idx).unwrap_or("").to_string() + }; + + println!(" [{:4}] FUNCTION: {}", idx, name); + + // Linkage name + if let Some(linkage_idx) = func.linkage_name_idx { + let linkage = if raw { + format!("str[{}]", linkage_idx) + } else { + debug_info.get_string(linkage_idx).unwrap_or("").to_string() + }; + println!(" Linkage name: {}", linkage); + } + + // Location + let file_path = if raw { + format!("file[{}]", func.file_idx) + } else { + debug_info + .get_file(func.file_idx) + .and_then(|f| debug_info.get_string(f.path_idx)) + .unwrap_or("") + .to_string() + }; + println!(" Location: {}:{}:{}", file_path, func.line, func.column); + + // Type + if let Some(type_idx) = func.type_idx { + print!(" Type: "); + if raw { + println!("type[{}]", type_idx); + } else if let Some(ty) = debug_info.get_type(type_idx) { + print_type_brief(ty, debug_info); + println!(); + } else { + println!(""); + } + } + + // MAST root + if let Some(root) = &func.mast_root { + print!(" MAST root: 0x"); + for byte in root { + print!("{:02x}", byte); + } + println!(); + } + + // Variables + if !func.variables.is_empty() { + println!(" Variables ({}):", func.variables.len()); + for var in &func.variables { + print_variable(var, debug_info, raw, verbose); + } + } + + // Inlined calls + if !func.inlined_calls.is_empty() && verbose { + println!(" Inlined calls ({}):", func.inlined_calls.len()); + for call in &func.inlined_calls { + let callee = if raw { + format!("func[{}]", call.callee_idx) + } else { + debug_info + .functions + .get(call.callee_idx as usize) + .and_then(|f| debug_info.get_string(f.name_idx)) + .unwrap_or("") + .to_string() + }; + let call_file = if raw { + format!("file[{}]", call.file_idx) + } else { + debug_info + .get_file(call.file_idx) + .and_then(|f| debug_info.get_string(f.path_idx)) + .unwrap_or("") + .to_string() + }; + println!( + " - {} inlined at {}:{}:{}", + callee, call_file, call.line, call.column + ); + } + } +} + +fn print_variable( + var: &DebugVariableInfo, + debug_info: &DebugInfoSection, + raw: bool, + _verbose: bool, +) { + let name = if raw { + format!("str[{}]", var.name_idx) + } else { + debug_info.get_string(var.name_idx).unwrap_or("").to_string() + }; + + let kind = if var.is_parameter() { + format!("param #{}", var.arg_index) + } else { + "local".to_string() + }; + + print!(" - {} ({}): ", name, kind); + + if raw { + print!("type[{}]", var.type_idx); + } else if let Some(ty) = debug_info.get_type(var.type_idx) { + print_type_brief(ty, debug_info); + } else { + print!(""); + } + + print!(" @ {}:{}", var.line, var.column); + + if var.scope_depth > 0 { + print!(" [scope depth: {}]", var.scope_depth); + } + + println!(); +} + +fn print_variables(debug_info: &DebugInfoSection, raw: bool) { + println!(".debug_variables contents (all functions):"); + println!("{:-<80}", ""); + + for func in &debug_info.functions { + if func.variables.is_empty() { + continue; + } + + let func_name = debug_info.get_string(func.name_idx).unwrap_or(""); + println!(" Function: {}", func_name); + + for var in &func.variables { + print_variable(var, debug_info, raw, false); + } + println!(); + } +} + +/// Prints the .debug_loc section - variable location decorators from MAST +/// +/// This is analogous to DWARF's .debug_loc section which contains location +/// lists describing where a variable's value can be found at runtime. +fn print_locations(mast_forest: &MastForest, debug_info: &DebugInfoSection, verbose: bool) { + println!(".debug_loc contents (DebugVar decorators from MAST):"); + println!("{:-<80}", ""); + + // Collect all DebugVar decorators + let debug_vars: Vec<_> = mast_forest + .decorators() + .iter() + .enumerate() + .filter_map(|(idx, dec)| { + if let Decorator::DebugVar(info) = dec { + Some((idx, info)) + } else { + None + } + }) + .collect(); + + if debug_vars.is_empty() { + println!(" (no DebugVar decorators found)"); + return; + } + + // Group by variable name for a cleaner view + let mut by_name: BTreeMap<&str, Vec<(usize, &miden_core::DebugVarInfo)>> = BTreeMap::new(); + for (idx, info) in &debug_vars { + by_name.entry(info.name()).or_default().push((*idx, *info)); + } + + println!(" Total DebugVar decorators: {}", debug_vars.len()); + println!(" Unique variable names: {}", by_name.len()); + println!(); + + for (name, entries) in &by_name { + println!(" Variable: \"{}\"", name); + println!(" {} location entries:", entries.len()); + + for (decorator_idx, info) in entries { + print!(" [dec#{}] ", decorator_idx); + + // Print value location + print!("{}", info.value_location()); + + // Print argument info if present + if let Some(arg_idx) = info.arg_index() { + print!(" (param #{})", arg_idx); + } + + // Print type info if present and we can resolve it + if let Some(type_id) = info.type_id() { + if let Some(ty) = debug_info.get_type(type_id) { + print!(" : "); + print_type_brief(ty, debug_info); + } else { + print!(" : type[{}]", type_id); + } + } + + // Print source location if present + if let Some(loc) = info.location() { + print!(" @ {}:{}:{}", loc.uri, loc.line, loc.column); + } + + println!(); + } + println!(); + } + + // In verbose mode, also show raw decorator list + if verbose { + println!(" Raw decorator list (in order):"); + println!(" {:-<76}", ""); + for (idx, info) in &debug_vars { + println!(" [{:4}] {}", idx, info); + } + } +} From 02f9163de62017776bb602fc18d891a75fe69955 Mon Sep 17 00:00:00 2001 From: djole Date: Fri, 16 Jan 2026 20:09:19 +0100 Subject: [PATCH 02/32] Align with latest debug_info in miden-vm --- midenc-compile/src/debug_info.rs | 30 ++++++++++++++++++++---------- tools/debugdump/src/main.rs | 9 --------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/midenc-compile/src/debug_info.rs b/midenc-compile/src/debug_info.rs index 4902f9c2e..36b6fac32 100644 --- a/midenc-compile/src/debug_info.rs +++ b/midenc-compile/src/debug_info.rs @@ -4,7 +4,7 @@ //! and building a `DebugInfoSection` that can be serialized into the `.debug_info` //! custom section of a MASP package. -use alloc::{collections::BTreeMap, string::ToString}; +use alloc::{collections::BTreeMap, format, string::ToString}; use miden_debug_types::{ColumnNumber, LineNumber}; use miden_mast_package::debug_info::{ @@ -53,21 +53,31 @@ impl DebugInfoBuilder { } /// Adds a file to the file table and returns its index. + /// + /// The `directory` parameter, if provided, is joined with the path to create + /// a full path. The debug info section stores full paths only. pub fn add_file(&mut self, path: &str, directory: Option<&str>) -> u32 { - if let Some(&idx) = self.file_indices.get(path) { + // Build the full path + let full_path = if let Some(dir) = directory { + if path.starts_with('/') || path.starts_with("\\\\") { + // Already absolute + path.to_string() + } else { + format!("{}/{}", dir.trim_end_matches('/'), path) + } + } else { + path.to_string() + }; + + if let Some(&idx) = self.file_indices.get(&full_path) { return idx; } - let path_idx = self.section.add_string(path); - let directory_idx = directory.map(|d| self.section.add_string(d)); - - let mut file = DebugFileInfo::new(path_idx); - if let Some(dir_idx) = directory_idx { - file = file.with_directory(dir_idx); - } + let path_idx = self.section.add_string(&full_path); + let file = DebugFileInfo::new(path_idx); let idx = self.section.add_file(file); - self.file_indices.insert(path.to_string(), idx); + self.file_indices.insert(full_path, idx); idx } diff --git a/tools/debugdump/src/main.rs b/tools/debugdump/src/main.rs index b9df9be93..6fd896156 100644 --- a/tools/debugdump/src/main.rs +++ b/tools/debugdump/src/main.rs @@ -372,15 +372,6 @@ fn print_file(idx: usize, file: &DebugFileInfo, debug_info: &DebugInfoSection, r print!(" [{:4}] {}", idx, path); - if let Some(dir_idx) = file.directory_idx { - let dir = if raw { - format!("str[{}]", dir_idx) - } else { - debug_info.get_string(dir_idx).unwrap_or("").to_string() - }; - print!(" (dir: {})", dir); - } - if let Some(checksum) = &file.checksum { print!(" [checksum: "); for byte in &checksum[..4] { From 781565d043edf6a88d84b9351ecad026efea33a8 Mon Sep 17 00:00:00 2001 From: djole Date: Sat, 17 Jan 2026 17:44:11 +0100 Subject: [PATCH 03/32] fix: compute correct FMP offset for debug variable locations - Check if WasmLocal value is on Miden operand stack before assuming it's in local memory (emit Stack(pos) if on stack) - Add post-processing in MasmFunctionBuilder::build() to patch Local(idx) with correct FMP offset - Compute num_wasm_locals = params_in_felts + hir_locals for correct offset calculation (DWARF WasmLocal uses WASM indexing where params come first) - Add patch_debug_var_locals_in_block() to recursively patch all DebugVar decorators in the MASM body --- codegen/masm/src/lower/component.rs | 66 ++++++++++++++++++++++++++++- codegen/masm/src/lower/lowering.rs | 13 +++++- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/codegen/masm/src/lower/component.rs b/codegen/masm/src/lower/component.rs index c2d5566d7..da8272001 100644 --- a/codegen/masm/src/lower/component.rs +++ b/codegen/masm/src/lower/component.rs @@ -1,7 +1,8 @@ -use alloc::{collections::BTreeSet, sync::Arc}; +use alloc::{collections::BTreeSet, sync::Arc, vec::Vec}; use miden_assembly::{PathBuf as LibraryPath, ast::InvocationTarget}; use miden_assembly_syntax::{ast::Attribute, parser::WordValue}; +use miden_core::DebugVarLocation; use midenc_hir::{ FunctionIdent, Op, OpExt, SourceSpan, Span, Symbol, TraceTarget, ValueRef, diagnostics::IntoDiagnostic, dialects::builtin, pass::AnalysisManager, @@ -646,6 +647,23 @@ impl MasmFunctionBuilder { num_locals, } = self; + // Compute total WASM locals count for FMP offset calculation. + // WASM locals = params (in felts) + local variables (in felts). + // This is needed because DWARF's WasmLocal(idx) uses WASM indexing where + // params come first, while num_locals only counts HIR spilled values. + let num_params_in_felts: u16 = function + .signature() + .params + .iter() + .map(|p| p.ty.size_in_felts() as u16) + .sum(); + let num_wasm_locals = num_params_in_felts + num_locals; + + // Patch DebugVar Local locations to compute FMP offset. + // During lowering, Local(idx) stores the raw WASM local index. + // Now convert to FMP offset: idx - num_wasm_locals + patch_debug_var_locals_in_block(&mut body, num_wasm_locals); + let mut procedure = masm::Procedure::new(span, visibility, name, num_locals, body); procedure.set_signature(signature); for attribute in ["auth_script", "note_script"] { @@ -660,3 +678,49 @@ impl MasmFunctionBuilder { Ok(procedure) } } + +/// Recursively patch DebugVar Local locations in a block. +/// +/// Converts `Local(idx)` where idx is the raw WASM local index to `Local(offset)` +/// where offset = idx - num_locals (the FMP offset, typically negative). +fn patch_debug_var_locals_in_block(block: &mut masm::Block, num_locals: u16) { + for op in block.iter_mut() { + match op { + masm::Op::Inst(span_inst) => { + // Use DerefMut to get mutable access to the inner Instruction + if let masm::Instruction::DebugVar(info) = &mut **span_inst { + if let DebugVarLocation::Local(idx) = info.value_location() { + // Convert raw WASM local index to FMP offset + let fmp_offset = *idx - (num_locals as i16); + + // Create new info with patched location, preserving all fields + let mut new_info = miden_core::DebugVarInfo::new( + info.name(), + DebugVarLocation::Local(fmp_offset), + ); + if let Some(type_id) = info.type_id() { + new_info.set_type_id(type_id); + } + if let Some(arg_index) = info.arg_index() { + new_info.set_arg_index(arg_index.get()); + } + if let Some(loc) = info.location() { + new_info.set_location(loc.clone()); + } + *info = new_info; + } + } + } + masm::Op::If { then_blk, else_blk, .. } => { + patch_debug_var_locals_in_block(then_blk, num_locals); + patch_debug_var_locals_in_block(else_blk, num_locals); + } + masm::Op::While { body: while_body, .. } => { + patch_debug_var_locals_in_block(while_body, num_locals); + } + masm::Op::Repeat { body: repeat_body, .. } => { + patch_debug_var_locals_in_block(repeat_body, num_locals); + } + } + } +} diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index 2db93d698..85e36e84c 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1292,7 +1292,18 @@ impl HirLowering for builtin::DbgValue { let value_location = if let Some(first_op) = expr.operations.first() { match first_op { DIExpressionOp::WasmStack(offset) => DebugVarLocation::Stack(*offset as u8), - DIExpressionOp::WasmLocal(idx) => DebugVarLocation::Local(*idx as u16), + DIExpressionOp::WasmLocal(idx) => { + // First check if the value is on the Miden operand stack. + // WASM locals might stay on the stack in Miden if not spilled. + if let Some(pos) = emitter.stack.find(&value) { + DebugVarLocation::Stack(pos as u8) + } else { + // Value is not on stack, assume it's in local memory. + // Store raw WASM local index temporarily. The FMP offset will be + // computed later in MasmFunctionBuilder::build() when num_locals is known. + DebugVarLocation::Local(*idx as i16) + } + } DIExpressionOp::WasmGlobal(_) | DIExpressionOp::Deref => { // For global or dereference, check the stack position of the value if let Some(pos) = emitter.stack.find(&value) { From ac5faaf67edd0e876b4359444a5268ee73afddba Mon Sep 17 00:00:00 2001 From: djole Date: Tue, 3 Feb 2026 23:03:36 +0100 Subject: [PATCH 04/32] Introduce Debug Info dialect Confirm that cfg --> scf preserves debug info --- Cargo.lock | 14 + Cargo.toml | 2 + codegen/masm/Cargo.toml | 1 + codegen/masm/src/lib.rs | 8 + codegen/masm/src/lower/lowering.rs | 3 +- dialects/debuginfo/Cargo.toml | 21 + dialects/debuginfo/src/builders.rs | 99 + dialects/debuginfo/src/lib.rs | 127 ++ dialects/debuginfo/src/ops.rs | 142 ++ dialects/debuginfo/src/transform.rs | 268 +++ dialects/scf/Cargo.toml | 2 + dialects/scf/src/transforms/cfg_to_scf.rs | 95 + ..._to_scf_debug_value_preservation_after.hir | 19 + ...to_scf_debug_value_preservation_before.hir | 18 + docs/DebugInfoMetadata.md | 177 +- .../counter-contract/counter_contract.masm | 1698 +++++++++++++++++ frontend/wasm/Cargo.toml | 1 + .../wasm/src/module/function_builder_ext.rs | 17 +- hir-transform/src/dead_debug_ops.rs | 126 -- hir/src/dialects/builtin/builders.rs | 22 +- hir/src/dialects/builtin/ops.rs | 1 - hir/src/dialects/builtin/ops/debug.rs | 75 - midenc-compile/Cargo.toml | 1 + midenc-compile/src/debug_info.rs | 3 +- .../expected/debug_variable_locations.hir | 8 +- 25 files changed, 2685 insertions(+), 263 deletions(-) create mode 100644 dialects/debuginfo/Cargo.toml create mode 100644 dialects/debuginfo/src/builders.rs create mode 100644 dialects/debuginfo/src/lib.rs create mode 100644 dialects/debuginfo/src/ops.rs create mode 100644 dialects/debuginfo/src/transform.rs create mode 100644 dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir create mode 100644 dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir create mode 100644 examples/counter-contract/counter_contract.masm delete mode 100644 hir-transform/src/dead_debug_ops.rs delete mode 100644 hir/src/dialects/builtin/ops/debug.rs diff --git a/Cargo.lock b/Cargo.lock index 3bd98a720..edb2e332c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3297,6 +3297,7 @@ dependencies = [ "miden-thiserror", "midenc-dialect-arith", "midenc-dialect-cf", + "midenc-dialect-debuginfo", "midenc-dialect-hir", "midenc-dialect-scf", "midenc-dialect-ub", @@ -3324,6 +3325,7 @@ dependencies = [ "miden-mast-package", "miden-thiserror", "midenc-codegen-masm", + "midenc-dialect-debuginfo", "midenc-dialect-hir", "midenc-dialect-scf", "midenc-frontend-wasm", @@ -3350,6 +3352,15 @@ dependencies = [ "midenc-hir", ] +[[package]] +name = "midenc-dialect-debuginfo" +version = "0.7.1" +dependencies = [ + "log", + "midenc-hir", + "paste", +] + [[package]] name = "midenc-dialect-hir" version = "0.8.1" @@ -3371,9 +3382,11 @@ name = "midenc-dialect-scf" version = "0.8.1" dependencies = [ "bitvec", + "env_logger", "log", "midenc-dialect-arith", "midenc-dialect-cf", + "midenc-dialect-debuginfo", "midenc-dialect-ub", "midenc-expect-test", "midenc-hir", @@ -3430,6 +3443,7 @@ dependencies = [ "miden-thiserror", "midenc-dialect-arith", "midenc-dialect-cf", + "midenc-dialect-debuginfo", "midenc-dialect-hir", "midenc-dialect-ub", "midenc-dialect-wasm", diff --git a/Cargo.toml b/Cargo.toml index 3bed97981..ecc30e371 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,6 +75,7 @@ inventory = "0.3" litcheck = { package = "litcheck-core", version = "0.4" } litcheck-filecheck = "0.4" log = { version = "0.4", features = ["kv"] } +env_logger = "0.11" # Miden Dependencies miden-assembly = { version = "0.22", default-features = false } @@ -141,6 +142,7 @@ midenc-dialect-arith = { version = "0.8.1", path = "dialects/arith" } midenc-dialect-hir = { version = "0.8.1", path = "dialects/hir" } midenc-dialect-scf = { version = "0.8.1", path = "dialects/scf" } midenc-dialect-cf = { version = "0.8.1", path = "dialects/cf" } +midenc-dialect-debuginfo = { version = "0.8.1", path = "dialects/debuginfo" } midenc-dialect-ub = { version = "0.8.1", path = "dialects/ub" } midenc-dialect-wasm = { version = "0.8.1", path = "dialects/wasm" } midenc-hir = { version = "0.8.1", path = "hir" } diff --git a/codegen/masm/Cargo.toml b/codegen/masm/Cargo.toml index 94b121665..b4b023132 100644 --- a/codegen/masm/Cargo.toml +++ b/codegen/masm/Cargo.toml @@ -36,6 +36,7 @@ midenc-hir.workspace = true midenc-hir-analysis.workspace = true midenc-dialect-arith.workspace = true midenc-dialect-cf.workspace = true +midenc-dialect-debuginfo.workspace = true midenc-dialect-hir.workspace = true midenc-dialect-scf.workspace = true midenc-dialect-ub.workspace = true diff --git a/codegen/masm/src/lib.rs b/codegen/masm/src/lib.rs index 86a7557f1..ec369b674 100644 --- a/codegen/masm/src/lib.rs +++ b/codegen/masm/src/lib.rs @@ -27,6 +27,7 @@ pub mod masm { use midenc_dialect_arith as arith; use midenc_dialect_cf as cf; +use midenc_dialect_debuginfo as debuginfo; use midenc_dialect_hir as hir; use midenc_dialect_scf as scf; use midenc_dialect_ub as ub; @@ -44,6 +45,9 @@ pub use self::{ inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( lower_builtin_ops )); +inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( + lower_debuginfo_ops +)); inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( lower_arith_ops )); @@ -69,6 +73,10 @@ fn lower_builtin_ops(info: &mut midenc_hir::DialectInfo) { info.register_operation_trait::(); } +fn lower_debuginfo_ops(info: &mut midenc_hir::DialectInfo) { + info.register_operation_trait::(); +} + fn lower_arith_ops(info: &mut midenc_hir::DialectInfo) { info.register_operation_trait::(); info.register_operation_trait::(); diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index 85e36e84c..d869d5112 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1,5 +1,6 @@ use midenc_dialect_arith as arith; use midenc_dialect_cf as cf; +use midenc_dialect_debuginfo as debuginfo; use midenc_dialect_hir as hir; use midenc_dialect_scf as scf; use midenc_dialect_ub as ub; @@ -1262,7 +1263,7 @@ impl HirLowering for arith::Split { } } -impl HirLowering for builtin::DbgValue { +impl HirLowering for debuginfo::DebugValue { fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { use miden_core::{DebugVarInfo, DebugVarLocation, Felt}; use midenc_hir::DIExpressionOp; diff --git a/dialects/debuginfo/Cargo.toml b/dialects/debuginfo/Cargo.toml new file mode 100644 index 000000000..8c89f4d1d --- /dev/null +++ b/dialects/debuginfo/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "midenc-dialect-debuginfo" +description = "Miden IR Debug Info Dialect" +version.workspace = true +rust-version.workspace = true +authors.workspace = true +repository.workspace = true +categories.workspace = true +keywords.workspace = true +license.workspace = true +readme.workspace = true +edition.workspace = true + +[features] +default = ["std"] +std = ["midenc-hir/std"] + +[dependencies] +midenc-hir.workspace = true +paste.workspace = true +log.workspace = true diff --git a/dialects/debuginfo/src/builders.rs b/dialects/debuginfo/src/builders.rs new file mode 100644 index 000000000..2a28a642e --- /dev/null +++ b/dialects/debuginfo/src/builders.rs @@ -0,0 +1,99 @@ +use midenc_hir::{ + Builder, BuilderExt, DIExpressionAttr, DILocalVariableAttr, Report, SourceSpan, ValueRef, +}; + +use super::ops::*; + +/// Builder trait for creating debug info operations. +/// +/// This trait follows the same pattern as other dialect builders +/// (`ArithOpBuilder`, `HirOpBuilder`, etc.) and can be implemented +/// for any type that wraps a `Builder`. +/// +/// # Usage +/// +/// ```ignore +/// // Emit a debug value tracking where variable 'x' lives: +/// builder.debug_value(ssa_value, variable_attr, span)?; +/// +/// // With a custom expression (e.g., value needs a deref): +/// builder.debug_value_with_expr(ssa_value, variable_attr, Some(expr), span)?; +/// +/// // Mark a variable as dead: +/// builder.debug_kill(variable_attr, span)?; +/// ``` +pub trait DebugInfoOpBuilder<'f, B: ?Sized + Builder> { + /// Emit a `debuginfo.value` operation that records the current value of a + /// source-level variable. + /// + /// This creates an SSA use of `value`, ensuring that transforms cannot + /// silently drop the value without updating the debug info. + fn debug_value( + &mut self, + value: ValueRef, + variable: DILocalVariableAttr, + span: SourceSpan, + ) -> Result { + self.debug_value_with_expr(value, variable, None, span) + } + + /// Emit a `debuginfo.value` operation with an optional expression that + /// describes how to recover the source-level value from the IR value. + /// + /// The expression encodes the *inverse* of whatever transformation was + /// applied to the value. For example, if a value was promoted to a stack + /// allocation (pointer), the expression would contain a `deref` operation + /// to recover the original value. + fn debug_value_with_expr( + &mut self, + value: ValueRef, + variable: DILocalVariableAttr, + expression: Option, + span: SourceSpan, + ) -> Result { + let expr = expression.unwrap_or_default(); + let op_builder = self.builder_mut().create::(span); + op_builder(value, variable, expr) + } + + /// Emit a `debuginfo.declare` operation that records the storage address + /// of a source-level variable. + fn debug_declare( + &mut self, + address: ValueRef, + variable: DILocalVariableAttr, + span: SourceSpan, + ) -> Result { + let op_builder = self.builder_mut().create::(span); + op_builder(address, variable) + } + + /// Emit a `debuginfo.kill` operation that marks a variable as dead. + /// + /// After this point, the debugger should report the variable as unavailable + /// until the next `debug_value` or `debug_declare` for the same variable. + fn debug_kill( + &mut self, + variable: DILocalVariableAttr, + span: SourceSpan, + ) -> Result { + let op_builder = self.builder_mut().create::(span); + op_builder(variable) + } + + fn builder(&self) -> &B; + fn builder_mut(&mut self) -> &mut B; +} + +/// Blanket implementation: any `Builder` can use `DebugInfoOpBuilder` directly. +impl DebugInfoOpBuilder<'_, B> for B { + #[inline(always)] + fn builder(&self) -> &B { + self + } + + #[inline(always)] + fn builder_mut(&mut self) -> &mut B { + self + } +} diff --git a/dialects/debuginfo/src/lib.rs b/dialects/debuginfo/src/lib.rs new file mode 100644 index 000000000..9013d1768 --- /dev/null +++ b/dialects/debuginfo/src/lib.rs @@ -0,0 +1,127 @@ +#![no_std] +#![feature(debug_closure_helpers)] +#![feature(unboxed_closures)] +#![feature(fn_traits)] +#![feature(ptr_metadata)] +#![feature(specialization)] +#![allow(incomplete_features)] +#![deny(warnings)] + +//! # DebugInfo Dialect +//! +//! A first-class dialect for tracking source-level debug information through +//! compiler transformations. Inspired by [Mojo's DebugInfo dialect], this +//! dialect makes debug variable tracking a first-class citizen of the IR, +//! using SSA use-def chains to enforce correctness. +//! +//! ## Motivation +//! +//! Traditional approaches to debug info in MLIR-like compilers (e.g., Flang/FIR) +//! treat debug information as metadata or attributes — second-class citizens that +//! transforms are free to silently drop. The consequences: +//! +//! - Transforms can silently lose debug info with no verifier catching it +//! - No mechanism forces transform authors to update debug info +//! - Debug info quality degrades as the optimizer gets more aggressive +//! +//! ## Approach: SSA-Based Debug Info +//! +//! This dialect defines debug operations as real IR operations with SSA operands: +//! +//! - **`debuginfo.value`** — Records the current value of a source variable. +//! Uses an SSA value operand, so deleting the value without updating debug +//! uses is a hard error. +//! +//! - **`debuginfo.declare`** — Records the storage address of a source variable. +//! Similarly uses an SSA operand for the address. +//! +//! - **`debuginfo.kill`** — Marks a variable as dead, giving the debugger precise +//! lifetime boundaries instead of scope-based heuristics. +//! +//! ## Transform Hooks +//! +//! The [`transform`] module provides utilities that make it easy for transform +//! authors to maintain debug info: +//! +//! - **Simple replacements** are handled automatically via `replace_all_uses_with` +//! - **Complex transforms** use [`salvage_debug_info`](transform::salvage_debug_info) +//! where the transform author only describes the *inverse* of their transformation +//! - **Value deletion** without a replacement emits `debuginfo.kill` automatically +//! +//! ## Design Pillars (from Mojo) +//! +//! 1. **SSA use-def chains** — debug values participate in standard use-def tracking +//! 2. **Expression trees** — `DIExpressionAttr` describes how to recover source values +//! from transformed IR values (encode the inverse transformation) +//! 3. **Explicit lifetimes** — `debuginfo.kill` for precise variable death points +//! +//! [Mojo's DebugInfo dialect]: https://llvm.org/devmtg/2024-04/slides/TechnicalTalks/MojoDebugging.pdf + +extern crate alloc; + +#[cfg(any(feature = "std", test))] +extern crate std; + +use alloc::boxed::Box; + +mod builders; +mod ops; +pub mod transform; + +use midenc_hir::{ + AttributeValue, Builder, Dialect, DialectInfo, DialectRegistration, OperationRef, SourceSpan, + Type, +}; + +pub use self::{builders::DebugInfoOpBuilder, ops::*}; + +/// The DebugInfo dialect — first-class debug variable tracking. +/// +/// This dialect provides operations for tracking source-level variables through +/// compiler transformations using SSA semantics. Unlike metadata-based approaches, +/// debug info here participates in standard use-def chains, making it impossible +/// for transforms to silently drop debug information. +#[derive(Debug)] +pub struct DebugInfoDialect { + info: DialectInfo, +} + +impl DebugInfoDialect { + #[inline] + pub fn num_registered(&self) -> usize { + self.registered_ops().len() + } +} + +impl DialectRegistration for DebugInfoDialect { + const NAMESPACE: &'static str = "debuginfo"; + + #[inline] + fn init(info: DialectInfo) -> Self { + Self { info } + } + + fn register_operations(info: &mut DialectInfo) { + info.register_operation::(); + info.register_operation::(); + info.register_operation::(); + } +} + +impl Dialect for DebugInfoDialect { + #[inline] + fn info(&self) -> &DialectInfo { + &self.info + } + + fn materialize_constant( + &self, + _builder: &mut dyn Builder, + _attr: Box, + _ty: &Type, + _span: SourceSpan, + ) -> Option { + // Debug info operations don't produce values that can be constants + None + } +} diff --git a/dialects/debuginfo/src/ops.rs b/dialects/debuginfo/src/ops.rs new file mode 100644 index 000000000..0bc8fadf4 --- /dev/null +++ b/dialects/debuginfo/src/ops.rs @@ -0,0 +1,142 @@ +use midenc_hir::{ + DIExpressionAttr, DILocalVariableAttr, UnsafeIntrusiveEntityRef, derive::operation, + traits::AnyType, +}; + +use crate::DebugInfoDialect; + +pub type DebugValueRef = UnsafeIntrusiveEntityRef; +pub type DebugDeclareRef = UnsafeIntrusiveEntityRef; +pub type DebugKillRef = UnsafeIntrusiveEntityRef; + +/// Records the current value of a source-level variable. +/// +/// This is the core operation of the debuginfo dialect. It creates a first-class +/// SSA use of the value, which means: +/// +/// - If a transform deletes the value without updating its debug uses, that's a +/// hard error (not a silent drop like with metadata-based approaches). +/// - Standard MLIR-style use-def tracking automatically enforces this — transforms +/// must call `replace_all_uses_with` or explicitly handle debug uses. +/// +/// The `variable` attribute identifies the source variable, and the `expression` +/// attribute describes how to recover the source-level value from the IR value +/// (e.g., "dereference this pointer" if the value was promoted to an alloca). +/// +/// # Example +/// +/// ```text +/// debuginfo.value %0 #[variable = di.local_variable(name = x, ...)] +/// #[expression = di.expression(DW_OP_WASM_local 0)] +/// ``` +#[operation(dialect = DebugInfoDialect)] +pub struct DebugValue { + #[operand] + value: AnyType, + #[attr] + variable: DILocalVariableAttr, + #[attr] + expression: DIExpressionAttr, +} + +/// Records the storage location (address) of a source-level variable. +/// +/// Unlike `DebugValue` which tracks values, `DebugDeclare` tracks the address +/// where a variable is stored. This is useful for variables that live in memory +/// (e.g., stack allocations) where the address itself doesn't change, but the +/// value at that address may be updated through stores. +/// +/// Like `DebugValue`, this creates a real SSA use of the address value, +/// preventing silent drops during transforms. +#[operation(dialect = DebugInfoDialect)] +pub struct DebugDeclare { + #[operand] + address: AnyType, + #[attr] + variable: DILocalVariableAttr, +} + +/// Marks a source-level variable as dead at this program point. +/// +/// This provides explicit lifetime boundaries for variables, giving the debugger +/// precise information about when a variable is no longer valid. Without this, +/// debuggers must rely on scope-based heuristics which can be inaccurate after +/// optimizations. +/// +/// After a `debuginfo.kill`, the debugger should report the variable as +/// "optimized out" or "not available" until the next `debuginfo.value` or +/// `debuginfo.declare` for the same variable. +/// +/// # Example +/// +/// ```text +/// debuginfo.kill #[variable = di.local_variable(name = x, ...)] +/// ``` +#[operation(dialect = DebugInfoDialect)] +pub struct DebugKill { + #[attr] + variable: DILocalVariableAttr, +} + +#[cfg(test)] +mod tests { + use alloc::{rc::Rc, string::ToString}; + + use midenc_hir::{ + Builder, Context, DILocalVariableAttr, OpPrinter, OpPrintingFlags, SourceSpan, Type, + interner::Symbol, + }; + + use crate::{DebugInfoDialect, DebugInfoOpBuilder}; + + fn make_variable() -> DILocalVariableAttr { + let mut variable = + DILocalVariableAttr::new(Symbol::intern("x"), Symbol::intern("main.rs"), 12, Some(7)); + variable.arg_index = Some(0); + variable.ty = Some(Type::I32); + variable + } + + #[test] + fn debug_value_carries_metadata() { + let context = Rc::new(Context::default()); + context.get_or_register_dialect::(); + + let block = context.create_block_with_params([Type::I32]); + let arg = block.borrow().arguments()[0]; + let value = arg.borrow().as_value_ref(); + + let mut builder = context.clone().builder(); + builder.set_insertion_point_to_end(block); + + let variable = make_variable(); + let debug_value = builder + .debug_value(value, variable.clone(), SourceSpan::UNKNOWN) + .expect("failed to create debuginfo.value op"); + + assert_eq!(debug_value.borrow().variable(), &variable); + assert_eq!(block.borrow().back(), Some(debug_value.as_operation_ref())); + + let op = debug_value.as_operation_ref(); + let printed = op.borrow().print(&OpPrintingFlags::default(), context.as_ref()).to_string(); + assert!(printed.contains("di.local_variable")); + } + + #[test] + fn debug_kill_carries_variable() { + let context = Rc::new(Context::default()); + context.get_or_register_dialect::(); + + let block = context.create_block_with_params([Type::I32]); + + let mut builder = context.clone().builder(); + builder.set_insertion_point_to_end(block); + + let variable = make_variable(); + let debug_kill = builder + .debug_kill(variable.clone(), SourceSpan::UNKNOWN) + .expect("failed to create debuginfo.kill op"); + + assert_eq!(debug_kill.borrow().variable(), &variable); + } +} diff --git a/dialects/debuginfo/src/transform.rs b/dialects/debuginfo/src/transform.rs new file mode 100644 index 000000000..076fa6ecf --- /dev/null +++ b/dialects/debuginfo/src/transform.rs @@ -0,0 +1,268 @@ +//! Transform utilities for maintaining debug info across IR transformations. +//! +//! This module provides the "transformation hooks" that make the debuginfo dialect +//! practical. Following Mojo's approach, these utilities make it easy for transform +//! authors to keep debug info valid — they only need to describe the *inverse* of +//! their transformation. +//! +//! # Design Philosophy +//! +//! The debuginfo dialect uses SSA use-def chains for debug values, which means +//! transforms *cannot* silently drop debug info. When a transform replaces or +//! deletes a value, any `debuginfo.value` operations using that value must be +//! updated. The standard `replace_all_uses_with` already handles this correctly +//! for simple value replacements. +//! +//! For more complex transforms (e.g., promoting a value to memory, splitting a +//! value into pieces), the transform author uses `salvage_debug_info` to describe +//! how the debug expression should be updated to recover the source-level value +//! from the new representation. +//! +//! # Examples +//! +//! ## Simple value replacement (handled automatically) +//! +//! When CSE replaces `%1 = add %a, %b` with an existing `%0 = add %a, %b`: +//! ```text +//! // Before: debuginfo.value %1 #[variable = x] +//! // rewriter.replace_all_uses_with(%1, %0) +//! // After: debuginfo.value %0 #[variable = x] -- automatic! +//! ``` +//! +//! ## Value promoted to memory (use salvage_debug_info) +//! +//! When a transform promotes a value to a stack allocation: +//! ```text +//! // Before: debuginfo.value %val #[variable = x] +//! // Transform creates: %ptr = alloca T +//! // store %val, %ptr +//! // Call: salvage_debug_info(%val, SalvageAction::Deref { new_value: %ptr }) +//! // After: debuginfo.value %ptr #[variable = x, expression = di.expression(DW_OP_deref)] +//! ``` + +use alloc::vec::Vec; + +use midenc_hir::{Builder, DIExpressionOp, Op, OperationRef, Spanned, ValueRef}; + +use crate::{DebugInfoOpBuilder, ops::DebugValue}; + +/// Describes how to recover the original source-level value after a transformation. +/// +/// When a transform changes a value's representation, it creates a `SalvageAction` +/// describing the inverse operation. The debuginfo framework then updates the +/// `DIExpressionAttr` accordingly so the debugger can still find the variable's value. +/// +/// Transform authors only need to pick the right variant — the framework handles +/// updating all affected `debuginfo.value` operations. +#[derive(Clone, Debug)] +pub enum SalvageAction { + /// The value is now behind a pointer; dereference to recover the original. + /// + /// Use this when a value is promoted to a stack allocation. + /// The expression will have `DW_OP_deref` prepended. + Deref { + /// The new pointer value that replaces the original. + new_value: ValueRef, + }, + + /// A constant offset was added to the value. + /// + /// Use this when a value is relocated by a fixed amount (e.g., frame + /// pointer adjustments). The expression will encode the inverse subtraction. + OffsetBy { + /// The new value (original + offset). + new_value: ValueRef, + /// The offset that was added. + offset: u64, + }, + + /// The value was replaced by a new value with an arbitrary expression. + /// + /// Use this for complex transformations where the simple patterns don't apply. + /// The caller provides the full expression describing how to recover the + /// source-level value from the new IR value. + WithExpression { + /// The new value replacing the original. + new_value: ValueRef, + /// Expression operations describing the inverse transform. + ops: Vec, + }, + + /// The value is now a constant. + /// + /// Use this when constant propagation determines the value at this point. + Constant { + /// The constant value. + value: u64, + }, + + /// The value was completely removed with no recovery possible. + /// + /// Use this as a last resort when the value cannot be recovered. + /// This will emit a `debuginfo.kill` for the affected variable. + Undef, +} + +/// Salvage debug info for all `debuginfo.value` operations that use `old_value`. +/// +/// When a transform is about to delete or replace a value, call this function +/// to update all debug uses. The `action` describes how the debugger can recover +/// the original source-level value from the new representation. +/// +/// This is the main entry point for transform authors who need to update debug +/// info beyond simple `replace_all_uses_with` scenarios. +/// +/// # Example +/// +/// ```ignore +/// // Value was promoted to memory: +/// let ptr = builder.alloca(ty, span)?; +/// builder.store(old_val, ptr, span)?; +/// salvage_debug_info( +/// &old_val, +/// &SalvageAction::Deref { new_value: ptr }, +/// &mut builder, +/// ); +/// ``` +pub fn salvage_debug_info( + old_value: &ValueRef, + action: &SalvageAction, + builder: &mut B, +) { + // Collect all debug value ops that use the old value + let debug_ops: Vec = debug_value_users(old_value); + + for mut debug_op in debug_ops { + apply_salvage_action(&mut debug_op, action, builder); + } +} + +/// Apply a salvage action to a single debug value operation. +fn apply_salvage_action( + debug_op: &mut OperationRef, + action: &SalvageAction, + builder: &mut B, +) { + let span = debug_op.borrow().span(); + + match action { + SalvageAction::Deref { new_value } => { + // Get existing expression and prepend deref + let (variable, mut expr) = { + let op = debug_op.borrow(); + let dv = op.downcast_ref::().unwrap(); + (dv.variable().clone(), dv.expression().clone()) + }; + expr.operations.insert(0, DIExpressionOp::Deref); + + // Erase old op and create new one with updated value and expression + debug_op.borrow_mut().erase(); + let _ = builder.debug_value_with_expr(*new_value, variable, Some(expr), span); + } + + SalvageAction::OffsetBy { new_value, offset } => { + let (variable, mut expr) = { + let op = debug_op.borrow(); + let dv = op.downcast_ref::().unwrap(); + (dv.variable().clone(), dv.expression().clone()) + }; + // To recover: subtract the offset that was added + expr.operations.push(DIExpressionOp::ConstU64(*offset)); + expr.operations.push(DIExpressionOp::Minus); + + debug_op.borrow_mut().erase(); + let _ = builder.debug_value_with_expr(*new_value, variable, Some(expr), span); + } + + SalvageAction::WithExpression { new_value, ops } => { + let (variable, mut expr) = { + let op = debug_op.borrow(); + let dv = op.downcast_ref::().unwrap(); + (dv.variable().clone(), dv.expression().clone()) + }; + expr.operations.extend(ops.iter().cloned()); + + debug_op.borrow_mut().erase(); + let _ = builder.debug_value_with_expr(*new_value, variable, Some(expr), span); + } + + SalvageAction::Constant { value } => { + let variable = { + let op = debug_op.borrow(); + let dv = op.downcast_ref::().unwrap(); + dv.variable().clone() + }; + + debug_op.borrow_mut().erase(); + // Emit a kill since we can't create a debuginfo.value without a live SSA operand + // for constants — the constant value is encoded in the expression + let _ = builder.debug_kill(variable, span); + // TODO: in the future, could emit a debuginfo.value with a materialized constant + // and a ConstU64/StackValue expression pair + let _ = value; + } + + SalvageAction::Undef => { + let variable = { + let op = debug_op.borrow(); + let dv = op.downcast_ref::().unwrap(); + dv.variable().clone() + }; + + debug_op.borrow_mut().erase(); + let _ = builder.debug_kill(variable, span); + } + } +} + +/// Check if an operation is a debug info operation. +/// +/// This is useful for transforms that need to skip or handle debug ops +/// differently (e.g., DCE should not consider debug uses as "real" uses +/// that keep a value alive). +pub fn is_debug_info_op(op: &dyn Op) -> bool { + op.as_operation().is::() + || op.as_operation().is::() + || op.as_operation().is::() +} + +/// Collect all `debuginfo.value` operations that reference the given value. +/// +/// Useful for transforms that need to inspect or update debug info for a +/// specific value. +pub fn debug_value_users(value: &ValueRef) -> Vec { + let value = value.borrow(); + let mut ops = Vec::new(); + for use_ in value.iter_uses() { + if use_.owner.borrow().is::() { + ops.push(use_.owner); + } + } + ops +} + +/// Recursively collect all debug info operations within an operation's regions. +pub fn collect_debug_ops(op: &OperationRef) -> Vec { + let mut debug_ops = Vec::new(); + collect_debug_ops_recursive(op, &mut debug_ops); + debug_ops +} + +fn collect_debug_ops_recursive(op: &OperationRef, debug_ops: &mut Vec) { + let op = op.borrow(); + + if op.is::() + || op.is::() + || op.is::() + { + debug_ops.push(op.as_operation_ref()); + } + + for region in op.regions() { + for block in region.body() { + for inner_op in block.body() { + collect_debug_ops_recursive(&inner_op.as_operation_ref(), debug_ops); + } + } + } +} diff --git a/dialects/scf/Cargo.toml b/dialects/scf/Cargo.toml index 1adab716c..8e3c13ee4 100644 --- a/dialects/scf/Cargo.toml +++ b/dialects/scf/Cargo.toml @@ -28,3 +28,5 @@ bitvec.workspace = true # NOTE: Use local paths for dev-only dependency to avoid relying on crates.io during packaging midenc-expect-test = { path = "../../tools/expect-test" } midenc-hir = { path = "../../hir", features = ["logging"] } +midenc-dialect-debuginfo = { path = "../debuginfo" } +env_logger.workspace = true diff --git a/dialects/scf/src/transforms/cfg_to_scf.rs b/dialects/scf/src/transforms/cfg_to_scf.rs index d029ad68c..be96eecb3 100644 --- a/dialects/scf/src/transforms/cfg_to_scf.rs +++ b/dialects/scf/src/transforms/cfg_to_scf.rs @@ -837,4 +837,99 @@ mod tests { Ok(()) } + + /// This test verifies that `debuginfo.debug_value` operations are preserved through the + /// CF-to-SCF transformation. The key behavior being tested is that `replace_all_uses_with` + /// (used internally by the transform to replace block arguments with `scf.if` results) + /// automatically updates the SSA operands of debug value ops. + #[test] + fn cfg_to_scf_debug_value_preservation() -> Result<(), Report> { + use midenc_dialect_debuginfo::{DebugInfoDialect, DebugInfoOpBuilder}; + + let context = Rc::new(Context::default()); + context.get_or_register_dialect::(); + let mut builder = OpBuilder::new(context.clone()); + + let span = SourceSpan::default(); + let function = { + let builder = builder.create::(span); + let name = Ident::new("test".into(), span); + let signature = Signature::new([AbiParam::new(Type::U32)], [AbiParam::new(Type::U32)]); + builder(name, signature).unwrap() + }; + + // Define function body + let mut builder = FunctionBuilder::new(function, &mut builder); + + let if_is_zero = builder.create_block(); + let if_is_nonzero = builder.create_block(); + let exit_block = builder.create_block(); + let return_val = builder.append_block_param(exit_block, Type::U32, span); + + let block = builder.current_block(); + let input = block.borrow().arguments()[0].upcast(); + + let input_var = DILocalVariableAttr::new( + Symbol::intern("input"), + Symbol::intern("test.rs"), + 1, + Some(1), + ); + let result_var = DILocalVariableAttr::new( + Symbol::intern("result"), + Symbol::intern("test.rs"), + 2, + Some(1), + ); + + let zero = builder.u32(0, span); + let is_zero = builder.eq(input, zero, span)?; + // Track the input variable + builder + .builder_mut() + .debug_value(input, input_var.clone(), span)?; + builder.cond_br(is_zero, if_is_zero, [], if_is_nonzero, [], span)?; + + builder.switch_to_block(if_is_zero); + let a = builder.incr(input, span)?; + // Track result in then-branch + builder + .builder_mut() + .debug_value(a, result_var.clone(), span)?; + builder.br(exit_block, [a], span)?; + + builder.switch_to_block(if_is_nonzero); + let b = builder.mul(input, input, span)?; + // Track result in else-branch + builder + .builder_mut() + .debug_value(b, result_var.clone(), span)?; + builder.br(exit_block, [b], span)?; + + builder.switch_to_block(exit_block); + // KEY: this debug_value uses the block argument `return_val`, which will be + // replaced by the scf.if result via replace_all_uses_with + builder + .builder_mut() + .debug_value(return_val, result_var.clone(), span)?; + builder.ret(Some(return_val), span)?; + + let operation = function.as_operation_ref(); + + // Verify the input IR + let input_ir = format!("{}", &operation.borrow()); + expect_file!["expected/cfg_to_scf_debug_value_preservation_before.hir"] + .assert_eq(&input_ir); + + // Run transformation + let mut pm = pass::PassManager::on::(context, pass::Nesting::Implicit); + pm.add_pass(Box::new(LiftControlFlowToSCF)); + pm.run(operation)?; + + // Verify that debug values survive with updated SSA operands + let output = format!("{}", &operation.borrow()); + expect_file!["expected/cfg_to_scf_debug_value_preservation_after.hir"].assert_eq(&output); + + Ok(()) + } } diff --git a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir new file mode 100644 index 000000000..16da7f506 --- /dev/null +++ b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir @@ -0,0 +1,19 @@ +public builtin.function @test(v0: u32) -> u32 { +^block0(v0: u32): + v2 = arith.constant 0 : u32; + v3 = arith.eq v0, v2 : i1; + debuginfo.debug_value v0 #[expression = di.expression()] #[variable = di.local_variable(name = input, file = test.rs, line = 1, column = 1)]; + v8 = scf.if v3 : u32 { + ^block1: + v4 = arith.incr v0 : u32; + debuginfo.debug_value v4 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; + scf.yield v4; + } else { + ^block2: + v5 = arith.mul v0, v0 : u32 #[overflow = checked]; + debuginfo.debug_value v5 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; + scf.yield v5; + }; + debuginfo.debug_value v8 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; + builtin.ret v8; +}; \ No newline at end of file diff --git a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir new file mode 100644 index 000000000..894e4e9dc --- /dev/null +++ b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir @@ -0,0 +1,18 @@ +public builtin.function @test(v0: u32) -> u32 { +^block0(v0: u32): + v2 = arith.constant 0 : u32; + v3 = arith.eq v0, v2 : i1; + debuginfo.debug_value v0 #[expression = di.expression()] #[variable = di.local_variable(name = input, file = test.rs, line = 1, column = 1)]; + cf.cond_br v3 ^block1, ^block2; +^block1: + v4 = arith.incr v0 : u32; + debuginfo.debug_value v4 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; + cf.br ^block3(v4); +^block2: + v5 = arith.mul v0, v0 : u32 #[overflow = checked]; + debuginfo.debug_value v5 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; + cf.br ^block3(v5); +^block3(v1: u32): + debuginfo.debug_value v1 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; + builtin.ret v1; +}; \ No newline at end of file diff --git a/docs/DebugInfoMetadata.md b/docs/DebugInfoMetadata.md index bdf71aa21..cdf672569 100644 --- a/docs/DebugInfoMetadata.md +++ b/docs/DebugInfoMetadata.md @@ -1,10 +1,62 @@ # Debug Info Metadata Pipeline -This note describes how the Miden compiler now threads source-level variable +This note describes how the Miden compiler threads source-level variable metadata through HIR when compiling Wasm input. The goal is to make every HIR -function carry `DI*` attributes and `dbg.*` intrinsics that mirror the DWARF -records present in the Wasm binary, so downstream passes (or tooling consuming -serialized HIR) can reason about user variables. +function carry `DI*` attributes and `debuginfo.*` operations that mirror the +DWARF records present in the Wasm binary, so downstream passes (or tooling +consuming serialized HIR) can reason about user variables. + +## The DebugInfo Dialect + +Debug variable tracking is implemented as a first-class IR dialect +(`midenc-dialect-debuginfo`, namespace `"debuginfo"`), inspired by +[Mojo's DebugInfo dialect](https://llvm.org/devmtg/2024-04/slides/TechnicalTalks/MojoDebugging.pdf). +Unlike metadata-based approaches (e.g., Flang/FIR), debug operations here are +real IR operations with SSA operands, which means: + +- If a transform deletes a value without updating its debug uses, that is a + hard error — not a silent drop. +- Standard `replace_all_uses_with` automatically propagates value replacements + to debug uses. +- The IR verifier catches dangling debug references. + +### Operations + +| Operation | Operands | Purpose | +|-----------|----------|---------| +| `debuginfo.debug_value` | SSA value + `DILocalVariableAttr` + `DIExpressionAttr` | Records the current value of a source variable | +| `debuginfo.debug_declare` | SSA address + `DILocalVariableAttr` | Records the storage location (address) of a variable | +| `debuginfo.debug_kill` | `DILocalVariableAttr` only | Marks a variable as dead at this program point | + +### Design Pillars + +1. **SSA use-def chains** — debug values participate in standard use-def tracking, + making it impossible for transforms to silently lose debug info. +2. **Expression trees** — `DIExpressionAttr` describes how to recover source values + from transformed IR values (encoding the inverse transformation). +3. **Explicit lifetimes** — `debuginfo.debug_kill` provides precise variable death + points instead of relying on scope-based heuristics. + +### Builder API + +The `DebugInfoOpBuilder` trait provides a convenient API for emitting debug ops: + +```rust +// Track a variable's value: +builder.debug_value(ssa_value, variable_attr, span)?; + +// Track with a custom expression (e.g., value needs a dereference): +builder.debug_value_with_expr(ssa_value, variable_attr, Some(expr), span)?; + +// Track a variable's storage address: +builder.debug_declare(address_value, variable_attr, span)?; + +// Mark a variable as dead: +builder.debug_kill(variable_attr, span)?; +``` + +The trait has a blanket implementation for all `Builder` types, so any IR builder +can emit debug operations directly. ## High-Level Flow @@ -20,7 +72,7 @@ serialized HIR) can reason about user variables. 3. **Translation-time tracking** – every `FuncTranslator` receives the `FunctionDebugInfo` for the function it is translating. `FunctionBuilderExt` attaches the compile-unit/subprogram attrs to the function op, records entry - parameters, and emits `builtin.dbg_value` intrinsics whenever locals change. + parameters, and emits `debuginfo.debug_value` operations whenever locals change. 4. **Span-aware updates** – as each wasm operator is translated we store the real `SourceSpan`. The first non-unknown span is used to retroactively patch the compile unit, subprogram, and parameter variable records with real file, @@ -28,11 +80,11 @@ serialized HIR) can reason about user variables. the actual user file. The emitted HIR therefore contains both the SSA instructions and the debug -intrinsics that map values back to the user program. +operations that map values back to the user program. ## HIR Metadata Constructs -The core types live in `hir/src/attributes/debug.rs`: +The core attribute types live in `hir/src/attributes/debug.rs`: - `DICompileUnitAttr` – captures language, primary file, optional directory, producer string, and optimized flag. Stored once per function/module. @@ -52,10 +104,11 @@ The core types live in `hir/src/attributes/debug.rs`: - Additional DWARF operations for complex expressions These attrs are exported from `midenc_hir` so clients can construct them -programmatically. The debug intrinsic (`builtin.dbg_value` from -`hir/src/dialects/builtin/ops/debug.rs`) consume a `Value` plus the -metadata attributes. The `dbg_value` operation includes a `DIExpressionAttr` -field that describes the location or computation of the variable's value. +programmatically. The debug operations (`debuginfo.debug_value`, +`debuginfo.debug_declare`, `debuginfo.debug_kill` from +`dialects/debuginfo/src/ops.rs`) consume SSA values plus the metadata +attributes. The `debug_value` operation includes a `DIExpressionAttr` field +that describes the location or computation of the variable's value. ## Collecting Metadata from Wasm @@ -83,33 +136,33 @@ The translation machinery picks up those records as follows: - `FunctionBuilderExt::set_debug_metadata` attaches compile-unit/subprogram attrs to the function op and resets its internal bookkeeping. - Entry parameters are stored via `register_parameter` so we can emit - `dbg.value` instructions after we encounter the first real span (parameters + `debug_value` operations after we encounter the first real span (parameters have no dedicated wasm operator with source ranges). - Every wasm operator calls `builder.record_debug_span(span)` prior to emission; the first non-unknown span updates the compile unit/subprogram attrs and - triggers parameter `dbg.value` emission so arguments are tied to the correct + triggers parameter `debug_value` emission so arguments are tied to the correct location. - `def_var_with_dbg` is the canonical entry point for `local.set` and `local.tee`. It updates the SSA value and immediately emits a - `builtin.dbg_value` with the precise span of the store. + `debuginfo.debug_value` with the precise span of the store. - Decoded `DW_AT_location` ranges are normalized into a per-function schedule. As the translator visits each wasm offset we opportunistically emit extra - `dbg.value` intrinsics so source variables track transitions between Wasm - locals without relying on `builtin.dbg_declare`. + `debug_value` operations so source variables track transitions between Wasm + locals without relying on `debuginfo.debug_declare`. - When present, `DW_AT_decl_line`/`DW_AT_decl_column` on variables override the default span so we keep the original lexical definition sites instead of inheriting the statement we first observed during translation. Locals declared in the wasm prologue receive an initial value but no debug -intrinsic until they are defined in user code. Subsequent writes insert -additional `dbg.value` ops so consumers can track value changes over time. +operation until they are defined in user code. Subsequent writes insert +additional `debug_value` ops so consumers can track value changes over time. ## Example In the serialized HIR for the test pipeline you now see: ```hir -builtin.dbg_value v0 #[expression = di.expression(DW_OP_WASM_local 0)] +debuginfo.debug_value v0 #[expression = di.expression(DW_OP_WASM_local 0)] #[variable = di.local_variable( name = arg0, file = /path/to/lib.rs, @@ -121,11 +174,11 @@ builtin.dbg_value v0 #[expression = di.expression(DW_OP_WASM_local 0)] ``` The `expression` attribute indicates that the variable is stored in WASM local 0. -When a variable moves between locations, additional `dbg_value` operations are +When a variable moves between locations, additional `debug_value` operations are emitted with updated expressions: ```hir -builtin.dbg_value v22 #[expression = di.expression(DW_OP_WASM_local 3)] +debuginfo.debug_value v22 #[expression = di.expression(DW_OP_WASM_local 3)] #[variable = di.local_variable(name = sum, ...)] ``` @@ -133,6 +186,57 @@ Both the attribute and the trailing comment reference the same source location so downstream tooling can disambiguate the variable regardless of how it parses HIR. +## Transform Hooks + +The `debuginfo::transform` module (`dialects/debuginfo/src/transform.rs`) +provides utilities that make it straightforward for transform authors to +maintain debug info across IR transformations. + +### Simple Replacements (Automatic) + +When a transform replaces one value with another (e.g., CSE, copy propagation), +the standard `replace_all_uses_with` automatically updates all debug uses: + +```text +// Before: debuginfo.debug_value %1 #[variable = x] +// rewriter.replace_all_uses_with(%1, %0) +// After: debuginfo.debug_value %0 #[variable = x] -- automatic! +``` + +### Complex Transforms (SalvageAction) + +For transforms that change a value's representation (not just replace it), +the `salvage_debug_info()` function takes a `SalvageAction` describing the +inverse transformation. Available actions: + +| Action | Use Case | Expression Update | +|--------|----------|-------------------| +| `Deref { new_value }` | Value promoted to stack allocation | Prepends `DW_OP_deref` | +| `OffsetBy { new_value, offset }` | Frame pointer adjustment | Appends `const(offset), minus` | +| `WithExpression { new_value, ops }` | Arbitrary complex transform | Appends custom expression ops | +| `Constant { value }` | Constant propagation | Emits `debuginfo.debug_kill` (future: constant expression) | +| `Undef` | Value completely removed | Emits `debuginfo.debug_kill` | + +Example usage in a transform: + +```rust +use midenc_dialect_debuginfo::transform::{salvage_debug_info, SalvageAction}; + +// Value was promoted to memory: +let ptr = builder.alloca(ty, span)?; +builder.store(old_val, ptr, span)?; +salvage_debug_info(&old_val, &SalvageAction::Deref { new_value: ptr }, &mut builder); +``` + +### Helper Functions + +- `is_debug_info_op(op)` — checks if an operation is a debug info op (useful + for DCE to skip debug uses when determining liveness) +- `debug_value_users(value)` — collects all `debuginfo.debug_value` ops that + reference a given value +- `collect_debug_ops(op)` — recursively collects all debug ops within an + operation's regions + ## Kinda Fallback Behavior/Best Effort cases - If DWARF lookup fails entirely, we still emit attrs but populate @@ -146,13 +250,13 @@ HIR. - **Location expressions** – We now decode `DW_AT_location` records for locals and parameters, interpret simple Wasm location opcodes (including locals, - globals, and operand-stack slots), and attach them to `dbg.value` operations - as `DIExpressionAttr`. The system emits additional `dbg.value` intrinsics - whenever a variable's storage changes, with each operation containing the - appropriate expression. This allows modeling multi-location lifetimes where - variables move between different storage locations. Support for more complex - composite expressions (pieces, arithmetic operations, etc.) is implemented - but not fully utilized from DWARF parsing yet. + globals, and operand-stack slots), and attach them to `debuginfo.debug_value` + operations as `DIExpressionAttr`. The system emits additional `debug_value` + operations whenever a variable's storage changes, with each operation + containing the appropriate expression. This allows modeling multi-location + lifetimes where variables move between different storage locations. Support + for more complex composite expressions (pieces, arithmetic operations, etc.) + is implemented but not fully utilized from DWARF parsing yet. - **Lifetimes** – we reset the compile-unit/subprogram metadata to the first span we encounter, but we do not track scopes or lexical block DIEs. Extending the collector to read `DW_TAG_lexical_block` and other scope markers would @@ -184,7 +288,7 @@ The debug info implementation is validated by lit tests in `tests/lit/debug/`: Each test compiles a small Rust snippet with DWARF enabled (`-C debuginfo=2`), runs it through `midenc compile --emit hir`, and uses `FileCheck` to verify that -`builtin.dbg_value` intrinsics are emitted with the correct `di.local_variable` +`debuginfo.debug_value` operations are emitted with the correct `di.local_variable` attributes containing variable names, file paths, line numbers, and types. To run the debug info tests: @@ -201,13 +305,20 @@ Or to run a specific test: ## Bottomline -- HIR now exposes DWARF-like metadata via reusable `DI*` attributes including +- Debug variable tracking uses a dedicated `debuginfo` dialect with SSA-based + operations (`debuginfo.debug_value`, `debuginfo.debug_declare`, + `debuginfo.debug_kill`), making debug info a first-class IR citizen that + transforms cannot silently drop. +- HIR exposes DWARF-like metadata via reusable `DI*` attributes including `DIExpressionAttr` for location expressions. - The wasm frontend precomputes function metadata, keeps it mutable during - translation, and emits `dbg.value` intrinsics with location expressions for - every parameter/variable assignment. + translation, and emits `debuginfo.debug_value` operations with location + expressions for every parameter/variable assignment. +- Transform authors maintain debug info via `salvage_debug_info()` — they only + describe the inverse of their transformation, and the framework updates all + affected debug operations automatically. - Location expressions (DW_OP_WASM_local, etc.) are preserved from DWARF and - attached to `dbg.value` operations, enabling accurate tracking of variables + attached to `debug_value` operations, enabling accurate tracking of variables as they move between different storage locations. - The serialized HIR describes user variables with accurate file/line/column information and storage locations, providing a foundation for future tooling diff --git a/examples/counter-contract/counter_contract.masm b/examples/counter-contract/counter_contract.masm new file mode 100644 index 000000000..fe56c288d --- /dev/null +++ b/examples/counter-contract/counter_contract.masm @@ -0,0 +1,1698 @@ +# mod miden:counter-contract/counter-contract@0.1.0 + +@callconv("canon-lift") +pub proc get-count( + +) -> felt + exec.::miden:counter-contract/counter-contract@0.1.0::init + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::miden:counter-contract/counter-contract@0.1.0#get-count + trace.252 + nop + exec.::std::sys::truncate_stack +end + +@callconv("canon-lift") +pub proc increment-count( + +) -> felt + exec.::miden:counter-contract/counter-contract@0.1.0::init + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::miden:counter-contract/counter-contract@0.1.0#increment-count + trace.252 + nop + exec.::std::sys::truncate_stack +end + +proc init + push.1179648 + trace.240 + exec.::intrinsics::mem::heap_init + trace.252 + push.[7028007876379170725,18060021366771303825,13412364500725888848,14178532912296021363] + adv.push_mapval + push.262144 + push.1 + trace.240 + exec.::std::mem::pipe_preimage_to_memory + trace.252 + drop + push.1048576 + u32assert + mem_store.278536 + push.0 + u32assert + mem_store.278537 +end + +# mod miden:counter-contract/counter-contract@0.1.0::counter_contract + +@callconv("C") +proc __wasm_call_ctors( + +) + nop +end + +@callconv("C") +proc _RNvNtCs2bNbiPwbrt9_16counter_contract8bindings40___link_custom_section_describing_imports( + +) + nop +end + +@callconv("C") +proc _RNvXs2_Cs2bNbiPwbrt9_16counter_contractNtB5_15CounterContractNtNtNtNtNtNtB5_8bindings7exports5miden16counter_contract16counter_contract5Guest15increment_count( + +) -> felt + push.1114144 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_sw + trace.252 + nop + push.160 + u32wrapping_sub + push.1114144 + dup.1 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_sw + trace.252 + nop + push.92 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.1 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.88 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.0 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.84 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.0 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.80 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.0 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.80 + dup.1 + u32wrapping_add + dup.1 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB4_4WordINtNtCs3n4EX6Qeqp9_4core7convert4FromANtNtB6_4felt4Feltj4_E4from + trace.252 + nop + dup.0 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs6_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB5_4WordINtNtCs3n4EX6Qeqp9_4core7convert5AsRefBW_E6as_ref + trace.252 + nop + push.0 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4feltNtB5_4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromhE4from + trace.252 + nop + push.12 + dup.2 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.8 + dup.3 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.4 + dup.4 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + movup.4 + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.32 + dup.6 + u32wrapping_add + movup.2 + swap.3 + movdn.2 + swap.1 + swap.4 + swap.1 + swap.5 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::miden::active_account::get_map_item + trace.252 + nop + push.40 + dup.1 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.88 + dup.3 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.32 + dup.1 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.80 + dup.3 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.80 + dup.1 + u32wrapping_add + push.144 + dup.2 + u32wrapping_add + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvMNtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB2_4Word7reverse + trace.252 + nop + push.144 + dup.1 + u32wrapping_add + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs3_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtNtB7_4felt4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromNtB5_4WordE4from + trace.252 + nop + push.1 + add + push.12 + dup.2 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.8 + dup.3 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.4 + dup.4 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + dup.4 + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.64 + dup.6 + u32wrapping_add + dup.5 + swap.1 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB5_4WordINtNtCs3n4EX6Qeqp9_4core7convert4FromNtNtB7_4felt4FeltE4from + trace.252 + nop + push.0 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4feltNtB5_4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromhE4from + trace.252 + nop + push.76 + dup.7 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.72 + dup.8 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.68 + dup.9 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.64 + dup.10 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.80 + dup.11 + u32wrapping_add + swap.9 + swap.1 + swap.8 + swap.2 + swap.7 + swap.3 + swap.6 + swap.4 + swap.5 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::miden::native_account::set_map_item + trace.252 + nop + push.88 + dup.2 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.120 + dup.4 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.80 + dup.2 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.112 + dup.4 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.24 + push.80 + dup.3 + u32wrapping_add + u32wrapping_add + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.136 + dup.4 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.96 + dup.2 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.128 + dup.4 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.112 + dup.2 + u32wrapping_add + push.32 + dup.3 + u32wrapping_add + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvMNtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB2_4Word7reverse + trace.252 + nop + push.128 + dup.2 + u32wrapping_add + push.144 + dup.3 + u32wrapping_add + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvMNtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB2_4Word7reverse + trace.252 + nop + push.152 + dup.2 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.24 + push.32 + dup.5 + u32wrapping_add + u32wrapping_add + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + dup.2 + dup.2 + movup.2 + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.144 + dup.4 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.48 + dup.6 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + dup.2 + dup.2 + movup.2 + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.16 + dup.6 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.24 + dup.4 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.16 + dup.2 + u32wrapping_add + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs3_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtNtB7_4felt4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromNtB5_4WordE4from + trace.252 + nop + drop + push.160 + movup.2 + u32wrapping_add + push.1114144 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_sw + trace.252 + nop +end + +@callconv("C") +proc _RNvXs2_Cs2bNbiPwbrt9_16counter_contractNtB5_15CounterContractNtNtNtNtNtNtB5_8bindings7exports5miden16counter_contract16counter_contract5Guest9get_count( + +) -> felt + push.1114144 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_sw + trace.252 + nop + push.64 + u32wrapping_sub + push.1114144 + dup.1 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_sw + trace.252 + nop + push.60 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.1 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.56 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.0 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.52 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.0 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.48 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.0 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.48 + dup.1 + u32wrapping_add + dup.1 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB4_4WordINtNtCs3n4EX6Qeqp9_4core7convert4FromANtNtB6_4felt4Feltj4_E4from + trace.252 + nop + dup.0 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs6_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB5_4WordINtNtCs3n4EX6Qeqp9_4core7convert5AsRefBW_E6as_ref + trace.252 + nop + push.0 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4feltNtB5_4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromhE4from + trace.252 + nop + push.12 + dup.2 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.8 + dup.3 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.4 + dup.4 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + movup.4 + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop + push.32 + dup.6 + u32wrapping_add + movup.2 + swap.3 + movdn.2 + swap.1 + swap.4 + swap.1 + swap.5 + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::miden::active_account::get_map_item + trace.252 + nop + push.40 + dup.1 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.56 + dup.3 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.32 + dup.1 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.48 + dup.3 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.48 + dup.1 + u32wrapping_add + push.16 + dup.2 + u32wrapping_add + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvMNtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB2_4Word7reverse + trace.252 + nop + push.16 + dup.1 + u32wrapping_add + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs3_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtNtB7_4felt4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromNtB5_4WordE4from + trace.252 + nop + push.64 + movup.2 + u32wrapping_add + push.1114144 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_sw + trace.252 + nop +end + +@callconv("C") +proc miden:counter-contract/counter-contract@0.1.0#get-count( + +) -> felt + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvNtCsjM1oOs60QyD_11wit_bindgen2rt14run_ctors_once + trace.252 + nop + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_Cs2bNbiPwbrt9_16counter_contractNtB5_15CounterContractNtNtNtNtNtNtB5_8bindings7exports5miden16counter_contract16counter_contract5Guest9get_count + trace.252 + nop +end + +@callconv("C") +proc miden:counter-contract/counter-contract@0.1.0#increment-count( + +) -> felt + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvNtCsjM1oOs60QyD_11wit_bindgen2rt14run_ctors_once + trace.252 + nop + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_Cs2bNbiPwbrt9_16counter_contractNtB5_15CounterContractNtNtNtNtNtNtB5_8bindings7exports5miden16counter_contract16counter_contract5Guest15increment_count + trace.252 + nop +end + +@callconv("C") +proc _RNvNtCsjM1oOs60QyD_11wit_bindgen2rt14run_ctors_once( + +) + push.1114148 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_sw + trace.252 + nop + push.1048584 + u32wrapping_add + u32divmod.4 + swap.1 + swap.1 + dup.1 + mem_load + swap.1 + push.8 + u32wrapping_mul + u32shr + swap.1 + drop + push.255 + u32and + push.0 + swap.1 + neq + if.true + nop + else + push.1114148 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_sw + trace.252 + nop + trace.240 + nop + exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::__wasm_call_ctors + trace.252 + nop + push.1 + push.1048584 + movup.2 + u32wrapping_add + u32divmod.4 + swap.1 + dup.0 + mem_load + dup.2 + push.8 + u32wrapping_mul + push.255 + swap.1 + u32shl + u32not + swap.1 + u32and + movup.3 + movup.3 + push.8 + u32wrapping_mul + u32shl + u32or + swap.1 + mem_store + end +end + +@callconv("C") +proc _RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4feltNtB5_4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromhE4from( + i32 +) -> felt + push.255 + u32and +end + +@callconv("C") +proc _RNvXs3_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtNtB7_4felt4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromNtB5_4WordE4from( + i32 +) -> felt + push.12 + swap.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_felt + trace.252 + nop +end + +@callconv("C") +proc _RNvMNtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB2_4Word7reverse( + i32, + i32 +) + dup.1 + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.8 + dup.3 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + swap.2 + movup.2 + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + push.8 + movup.2 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + movup.2 + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + swap.2 + movup.2 + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop +end + +@callconv("C") +proc _RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB5_4WordINtNtCs3n4EX6Qeqp9_4core7convert4FromNtNtB7_4felt4FeltE4from( + i32, + felt +) + push.12 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.8 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.0 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.4 + dup.1 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.0 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + push.0 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop +end + +@callconv("C") +proc _RNvXs6_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB5_4WordINtNtCs3n4EX6Qeqp9_4core7convert5AsRefBW_E6as_ref( + i32 +) -> i32 + nop +end + +@callconv("C") +proc _RNvXs_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB4_4WordINtNtCs3n4EX6Qeqp9_4core7convert4FromANtNtB6_4felt4Feltj4_E4from( + i32, + i32 +) + push.8 + dup.2 + add + u32assert + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + push.8 + dup.3 + add + u32assert + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop + swap.1 + push.4 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::load_dw + trace.252 + nop + swap.1 + movup.2 + push.8 + dup.1 + swap.1 + u32mod + u32assert + assertz + u32divmod.4 + swap.1 + movup.2 + movdn.3 + trace.240 + nop + exec.::intrinsics::mem::store_dw + trace.252 + nop +end + +@callconv("C") +proc miden::active_account::get_map_item(felt, felt, felt, felt, felt, i32) + trace.240 + nop + exec.::miden::active_account::get_map_item + trace.252 + nop + movup.4 + dup.0 + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.4 + dup.1 + add + u32assert + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.8 + dup.1 + add + u32assert + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.12 + add + u32assert + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop +end + +@callconv("C") +proc miden::native_account::set_map_item( + felt, + felt, + felt, + felt, + felt, + felt, + felt, + felt, + felt, + i32 +) + trace.240 + nop + exec.::miden::native_account::set_map_item + trace.252 + nop + movup.8 + dup.0 + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.4 + dup.1 + add + u32assert + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.8 + dup.1 + add + u32assert + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.12 + dup.1 + add + u32assert + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.16 + dup.1 + add + u32assert + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.20 + dup.1 + add + u32assert + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.24 + dup.1 + add + u32assert + movup.2 + swap.1 + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop + push.28 + add + u32assert + u32divmod.4 + swap.1 + trace.240 + nop + exec.::intrinsics::mem::store_felt + trace.252 + nop +end + diff --git a/frontend/wasm/Cargo.toml b/frontend/wasm/Cargo.toml index 6dc71310d..7102ceaae 100644 --- a/frontend/wasm/Cargo.toml +++ b/frontend/wasm/Cargo.toml @@ -24,6 +24,7 @@ indexmap = "2.7" log.workspace = true miden-core.workspace = true midenc-hir.workspace = true +midenc-dialect-debuginfo.workspace = true midenc-dialect-hir.workspace = true midenc-dialect-cf.workspace = true midenc-dialect-ub.workspace = true diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index 3e2d081c9..adc9045f1 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -6,6 +6,7 @@ use cranelift_entity::{EntityRef as _, SecondaryMap}; use log::warn; use midenc_dialect_arith::ArithOpBuilder; use midenc_dialect_cf::ControlFlowOpBuilder; +use midenc_dialect_debuginfo::DebugInfoOpBuilder; use midenc_dialect_hir::HirOpBuilder; use midenc_dialect_ub::UndefinedBehaviorOpBuilder; use midenc_dialect_wasm::WasmOpBuilder; @@ -190,7 +191,7 @@ impl FunctionBuilderExt<'_, B> { } if let Err(err) = - BuiltinOpBuilder::builder_mut(self).dbg_value_with_expr(value, attr, expr_opt, span) + DebugInfoOpBuilder::builder_mut(self).debug_value_with_expr(value, attr, expr_opt, span) { warn!("failed to emit dbg.value for local {idx}: {err:?}"); } @@ -285,7 +286,7 @@ impl FunctionBuilderExt<'_, B> { } if let Err(err) = - BuiltinOpBuilder::builder_mut(self).dbg_value_with_expr(value, attr, expression, span) + DebugInfoOpBuilder::builder_mut(self).debug_value_with_expr(value, attr, expression, span) { warn!("failed to emit scheduled dbg.value for local {idx}: {err:?}"); } @@ -704,6 +705,18 @@ impl<'f, B: ?Sized + Builder> BuiltinOpBuilder<'f, B> for FunctionBuilderExt<'f, } } +impl<'f, B: ?Sized + Builder> DebugInfoOpBuilder<'f, B> for FunctionBuilderExt<'f, B> { + #[inline(always)] + fn builder(&self) -> &B { + self.inner.builder() + } + + #[inline(always)] + fn builder_mut(&mut self) -> &mut B { + self.inner.builder_mut() + } +} + impl<'f, B: ?Sized + Builder> HirOpBuilder<'f, B> for FunctionBuilderExt<'f, B> { #[inline(always)] fn builder(&self) -> &B { diff --git a/hir-transform/src/dead_debug_ops.rs b/hir-transform/src/dead_debug_ops.rs deleted file mode 100644 index a695dd682..000000000 --- a/hir-transform/src/dead_debug_ops.rs +++ /dev/null @@ -1,126 +0,0 @@ -//! This pass removes debug operations (DbgValue) whose operands are no longer -//! live. This prevents issues during codegen where the operand stack state -//! becomes inconsistent due to debug ops referencing dropped values. - -use alloc::vec::Vec; - -use midenc_hir::{ - EntityMut, Operation, OperationName, OperationRef, Report, - dialects::builtin, - pass::{Pass, PassExecutionState, PostPassStatus}, -}; -use midenc_hir_analysis::analyses::LivenessAnalysis; - -/// Removes debug operations whose operands are dead. -/// -/// Debug operations like `DbgValue` reference SSA values to provide debug -/// information. However, these operations don't actually consume their operands; -/// they just observe them. This can cause issues during codegen when the -/// referenced value has been dropped from the operand stack. -/// -/// This pass removes debug ops whose operands are not live after the debug op. -/// If a value is live after the debug op, it will still be available on the -/// operand stack during codegen and can be safely observed. -pub struct RemoveDeadDebugOps; - -impl Pass for RemoveDeadDebugOps { - type Target = Operation; - - fn name(&self) -> &'static str { - "remove-dead-debug-ops" - } - - fn argument(&self) -> &'static str { - "remove-dead-debug-ops" - } - - fn description(&self) -> &'static str { - "Removes debug operations whose operands are dead" - } - - fn can_schedule_on(&self, _name: &OperationName) -> bool { - true - } - - fn run_on_operation( - &mut self, - op: EntityMut<'_, Self::Target>, - state: &mut PassExecutionState, - ) -> Result<(), Report> { - let op_ref = op.as_operation_ref(); - drop(op); - - // Collect all debug ops to potentially remove - let mut debug_ops_to_check: Vec = Vec::new(); - - collect_debug_ops(&op_ref, &mut debug_ops_to_check); - - if debug_ops_to_check.is_empty() { - state.set_post_pass_status(PostPassStatus::Unchanged); - return Ok(()); - } - - // Get liveness analysis - let analysis_manager = state.analysis_manager(); - let liveness = analysis_manager.get_analysis::()?; - - let mut removed_any = false; - - // Check each debug op and remove if its operand will be dead by codegen time - for mut debug_op in debug_ops_to_check { - let should_remove = { - let debug_op_borrowed = debug_op.borrow(); - - // Get the operand (first operand for DbgValue) - let operands = debug_op_borrowed.operands(); - if operands.is_empty() { - continue; - } - - let operand = operands.iter().next().unwrap(); - let operand_value = operand.borrow().as_value_ref(); - - // Only remove debug ops if their operand is not live after the debug op. - // If the value is live after, it will still be on the operand stack - // during codegen and can be safely observed by the debug op. - // - // Note: We previously also removed debug ops if the value had other uses, - // but this was too aggressive - if the value is live after the debug op, - // it doesn't matter how many uses it has; it's still available. - !liveness.is_live_after(operand_value, &debug_op_borrowed) - }; - - if should_remove { - debug_op.borrow_mut().erase(); - removed_any = true; - } - } - - state.set_post_pass_status(if removed_any { - PostPassStatus::Changed - } else { - PostPassStatus::Unchanged - }); - - Ok(()) - } -} - -/// Recursively collect all debug operations in the given operation -fn collect_debug_ops(op: &OperationRef, debug_ops: &mut Vec) { - let op = op.borrow(); - - // Check if this is a debug op - if op.is::() { - debug_ops.push(op.as_operation_ref()); - } - - // Recurse into regions - for region in op.regions() { - for block in region.body() { - for inner_op in block.body() { - collect_debug_ops(&inner_op.as_operation_ref(), debug_ops); - } - } - } -} diff --git a/hir/src/dialects/builtin/builders.rs b/hir/src/dialects/builtin/builders.rs index 64673ddeb..44c2072f9 100644 --- a/hir/src/dialects/builtin/builders.rs +++ b/hir/src/dialects/builtin/builders.rs @@ -86,26 +86,8 @@ pub trait BuiltinOpBuilder<'f, B: ?Sized + Builder> { op_builder(arg) } - fn dbg_value( - &mut self, - value: ValueRef, - variable: DILocalVariableAttr, - span: SourceSpan, - ) -> Result { - self.dbg_value_with_expr(value, variable, None, span) - } - - fn dbg_value_with_expr( - &mut self, - value: ValueRef, - variable: DILocalVariableAttr, - expression: Option, - span: SourceSpan, - ) -> Result { - let expr = expression.unwrap_or_default(); - let op_builder = self.builder_mut().create::(span); - op_builder(value, variable, expr) - } + // Note: dbg_value / dbg_value_with_expr have moved to DebugInfoOpBuilder + // in the midenc-dialect-debuginfo crate. Use debug_value / debug_value_with_expr there. fn builder(&self) -> &B; fn builder_mut(&mut self) -> &mut B; diff --git a/hir/src/dialects/builtin/ops.rs b/hir/src/dialects/builtin/ops.rs index 113658d6e..ee9b9cbd5 100644 --- a/hir/src/dialects/builtin/ops.rs +++ b/hir/src/dialects/builtin/ops.rs @@ -1,6 +1,5 @@ mod cast; mod component; -mod debug; mod function; mod global_variable; mod interface; diff --git a/hir/src/dialects/builtin/ops/debug.rs b/hir/src/dialects/builtin/ops/debug.rs deleted file mode 100644 index a6cecaf91..000000000 --- a/hir/src/dialects/builtin/ops/debug.rs +++ /dev/null @@ -1,75 +0,0 @@ -use crate::{ - UnsafeIntrusiveEntityRef, - attributes::{DIExpressionAttr, DILocalVariableAttr}, - derive::operation, - dialects::builtin::BuiltinDialect, - traits::AnyType, -}; - -pub type DbgValueRef = UnsafeIntrusiveEntityRef; -pub type DbgDeclareRef = UnsafeIntrusiveEntityRef; - -/// Records the value of an SSA operand for debug information consumers. -#[operation(dialect = BuiltinDialect)] -pub struct DbgValue { - #[operand] - value: AnyType, - #[attr] - variable: DILocalVariableAttr, - #[attr] - expression: DIExpressionAttr, -} - -/// Records the storage location of a source-level variable. -#[operation(dialect = BuiltinDialect)] -pub struct DbgDeclare { - #[operand] - address: AnyType, - #[attr] - variable: DILocalVariableAttr, -} - -#[cfg(test)] -mod tests { - use alloc::{rc::Rc, string::ToString}; - - use crate::{ - Builder, Context, OpPrinter, OpPrintingFlags, SourceSpan, Type, - attributes::DILocalVariableAttr, - dialects::builtin::{BuiltinDialect, BuiltinOpBuilder}, - interner::Symbol, - }; - - fn make_variable() -> DILocalVariableAttr { - let mut variable = - DILocalVariableAttr::new(Symbol::intern("x"), Symbol::intern("main.rs"), 12, Some(7)); - variable.arg_index = Some(0); - variable.ty = Some(Type::I32); - variable - } - - #[test] - fn dbg_value_carries_metadata() { - let context = Rc::new(Context::default()); - context.get_or_register_dialect::(); - - let block = context.create_block_with_params([Type::I32]); - let arg = block.borrow().arguments()[0]; - let value = arg.borrow().as_value_ref(); - - let mut builder = context.clone().builder(); - builder.set_insertion_point_to_end(block); - - let variable = make_variable(); - let dbg_value = builder - .dbg_value(value, variable.clone(), SourceSpan::UNKNOWN) - .expect("failed to create dbg.value op"); - - assert_eq!(dbg_value.borrow().variable(), &variable); - assert_eq!(block.borrow().back(), Some(dbg_value.as_operation_ref())); - - let op = dbg_value.as_operation_ref(); - let printed = op.borrow().print(&OpPrintingFlags::default(), context.as_ref()).to_string(); - assert!(printed.contains("di.local_variable")); - } -} diff --git a/midenc-compile/Cargo.toml b/midenc-compile/Cargo.toml index 60097dc9a..53552a5a2 100644 --- a/midenc-compile/Cargo.toml +++ b/midenc-compile/Cargo.toml @@ -35,6 +35,7 @@ miden-assembly.workspace = true miden-debug-types.workspace = true miden-mast-package.workspace = true midenc-frontend-wasm.workspace = true +midenc-dialect-debuginfo.workspace = true midenc-dialect-scf.workspace = true midenc-dialect-hir.workspace = true midenc-hir.workspace = true diff --git a/midenc-compile/src/debug_info.rs b/midenc-compile/src/debug_info.rs index 36b6fac32..dfd7c54ae 100644 --- a/midenc-compile/src/debug_info.rs +++ b/midenc-compile/src/debug_info.rs @@ -11,6 +11,7 @@ use miden_mast_package::debug_info::{ DebugFileInfo, DebugFunctionInfo, DebugInfoSection, DebugPrimitiveType, DebugTypeInfo, DebugVariableInfo, }; +use midenc_dialect_debuginfo as debuginfo; use midenc_hir::{DILocalVariableAttr, DISubprogramAttr, OpExt, Type, dialects::builtin}; /// Builder for constructing a `DebugInfoSection` from HIR components. @@ -202,7 +203,7 @@ impl DebugInfoBuilder { ) { for op in block.body() { // Check if this is a DbgValue operation - if let Some(dbg_value) = op.downcast_ref::() + if let Some(dbg_value) = op.downcast_ref::() && let Some(var_info) = self.extract_variable_info(dbg_value.variable()) { func_info.add_variable(var_info); diff --git a/tests/integration/expected/debug_variable_locations.hir b/tests/integration/expected/debug_variable_locations.hir index 7c01fbfa4..2711397c4 100644 --- a/tests/integration/expected/debug_variable_locations.hir +++ b/tests/integration/expected/debug_variable_locations.hir @@ -4,9 +4,9 @@ builtin.component root_ns:root@1.0.0 { ^block6(v0: i32): v2 = arith.constant 0 : i32; v3 = arith.constant 0 : i32; - builtin.dbg_value v3 #[expression = di.expression()] #[variable = di.local_variable(name = local1, file = unknown, line = 0, ty = i32)]; + debuginfo.debug_value v3 #[expression = di.expression()] #[variable = di.local_variable(name = local1, file = unknown, line = 0, ty = i32)]; v4 = arith.constant 0 : i32; - builtin.dbg_value v4 #[expression = di.expression()] #[variable = di.local_variable(name = local2, file = unknown, line = 0, ty = i32)]; + debuginfo.debug_value v4 #[expression = di.expression()] #[variable = di.local_variable(name = local2, file = unknown, line = 0, ty = i32)]; cf.br ^block8(v3, v0, v4); ^block7(v1: i32): @@ -23,10 +23,10 @@ builtin.component root_ns:root@1.0.0 { ^block10: v16 = arith.add v15, v6 : i32 #[overflow = wrapping]; - builtin.dbg_value v16 #[expression = di.expression()] #[variable = di.local_variable(name = local2, file = unknown, line = 0, ty = i32)]; + debuginfo.debug_value v16 #[expression = di.expression()] #[variable = di.local_variable(name = local2, file = unknown, line = 0, ty = i32)]; v17 = arith.constant 1 : i32; v18 = arith.add v6, v17 : i32 #[overflow = wrapping]; - builtin.dbg_value v18 #[expression = di.expression()] #[variable = di.local_variable(name = local1, file = unknown, line = 0, ty = i32)]; + debuginfo.debug_value v18 #[expression = di.expression()] #[variable = di.local_variable(name = local1, file = unknown, line = 0, ty = i32)]; cf.br ^block8(v18, v7, v16); ^block11: builtin.ret v15; From e0c14a92ba91d617f6ddbea49f3b02150b33c83b Mon Sep 17 00:00:00 2001 From: djole Date: Thu, 5 Feb 2026 00:18:33 +0100 Subject: [PATCH 05/32] fix: handle debug info ops in transforms and MASM codegen Debug info operations (debuginfo.value, debuginfo.kill, debuginfo.declare) are purely observational and were causing panics and assembler validation errors when they survived through optimization passes into codegen. - Skip operand scheduling for DebugValue (observational, not consuming) - Gracefully skip debuginfo ops with no HirLowering in the emitter - Make SinkOperandDefs and ControlFlowSink debug-info-aware so debug uses don't block sinking or prevent dead code elimination - Strip DebugVar-only procedure bodies that would be rejected by the Miden assembler (decorators without instructions have no MAST effect) --- codegen/masm/src/emitter.rs | 13 ++- codegen/masm/src/lower/component.rs | 25 +++++ codegen/masm/src/lower/lowering.rs | 12 +++ docs/DebugInfoMetadata.md | 145 ++++++++++++++++++++++++++++ hir-transform/src/sink.rs | 89 +++++++++++++++-- 5 files changed, 271 insertions(+), 13 deletions(-) diff --git a/codegen/masm/src/emitter.rs b/codegen/masm/src/emitter.rs index 12db31481..14c2071ab 100644 --- a/codegen/masm/src/emitter.rs +++ b/codegen/masm/src/emitter.rs @@ -115,9 +115,16 @@ impl BlockEmitter<'_> { // operand stack space on operands that will never be used. //self.drop_unused_operands_at(op); - let lowering = op.as_trait::().unwrap_or_else(|| { - panic!("illegal operation: no lowering has been defined for '{}'", op.name()) - }); + let Some(lowering) = op.as_trait::() else { + // Skip debug info ops that have no lowering (e.g. debuginfo.kill, + // debuginfo.declare) rather than panicking. These ops carry no + // semantic meaning for code generation. + if op.name().dialect().as_str() == "debuginfo" { + log::trace!(target: "codegen", "skipping debug info op with no lowering: {}", op.name()); + return; + } + panic!("illegal operation: no lowering has been defined for '{}'", op.name()); + }; // Schedule operands for this instruction lowering diff --git a/codegen/masm/src/lower/component.rs b/codegen/masm/src/lower/component.rs index da8272001..e8c78a106 100644 --- a/codegen/masm/src/lower/component.rs +++ b/codegen/masm/src/lower/component.rs @@ -664,6 +664,16 @@ impl MasmFunctionBuilder { // Now convert to FMP offset: idx - num_wasm_locals patch_debug_var_locals_in_block(&mut body, num_wasm_locals); + // Strip DebugVar-only procedure bodies. + // The Miden assembler rejects procedures whose bodies contain only decorators + // (like DebugVar) and no real instructions, because decorators don't affect + // MAST digests — two empty procedures with different decorators would be + // indistinguishable. If there are no real instructions, the debug info is + // meaningless anyway, so just drop it. + if !block_has_real_instructions(&body) { + body = masm::Block::new(body.span(), vec![]); + } + let mut procedure = masm::Procedure::new(span, visibility, name, num_locals, body); procedure.set_signature(signature); for attribute in ["auth_script", "note_script"] { @@ -679,6 +689,21 @@ impl MasmFunctionBuilder { } } +/// Returns true if the block contains at least one real (non-decorator) instruction. +/// +/// DebugVar instructions are decorator-only and don't produce MAST nodes. If a procedure +/// body contains only DebugVar ops, the assembler will reject it. +fn block_has_real_instructions(block: &masm::Block) -> bool { + block.iter().any(|op| match op { + masm::Op::Inst(inst) => inst.has_textual_representation(), + masm::Op::If { + then_blk, else_blk, .. + } => block_has_real_instructions(then_blk) || block_has_real_instructions(else_blk), + masm::Op::While { body, .. } => block_has_real_instructions(body), + masm::Op::Repeat { body, .. } => block_has_real_instructions(body), + }) +} + /// Recursively patch DebugVar Local locations in a block. /// /// Converts `Local(idx)` where idx is the raw WASM local index to `Local(offset)` diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index d869d5112..729d0ce4d 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1264,6 +1264,18 @@ impl HirLowering for arith::Split { } impl HirLowering for debuginfo::DebugValue { + fn schedule_operands(&self, _emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + // Debug value operations are purely observational — they do not consume their + // operand from the stack. Skip operand scheduling entirely; the emit() method + // will look up the value's current stack position (if any) on its own. + Ok(()) + } + + fn required_operands(&self) -> ValueRange<'_, 4> { + // No operands need to be scheduled on the stack for debug ops. + ValueRange::Empty + } + fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { use miden_core::{DebugVarInfo, DebugVarLocation, Felt}; use midenc_hir::DIExpressionOp; diff --git a/docs/DebugInfoMetadata.md b/docs/DebugInfoMetadata.md index cdf672569..27cce9496 100644 --- a/docs/DebugInfoMetadata.md +++ b/docs/DebugInfoMetadata.md @@ -237,6 +237,128 @@ salvage_debug_info(&old_val, &SalvageAction::Deref { new_value: ptr }, &mut buil - `collect_debug_ops(op)` — recursively collects all debug ops within an operation's regions +## Guide for Pass Authors + +Debug info operations are **observational** — they observe SSA values but don't +consume them or affect program semantics. This has implications for how passes +should handle them: + +### The Golden Rule + +**Debug uses should never prevent optimizations.** If a value is dead except for +debug uses, treat it as dead. If an operation can be sunk/moved except for debug +uses, sink/move it anyway. + +### What You Get for Free + +1. **Value replacements propagate automatically.** When you call + `replace_all_uses_with(old, new)`, debug uses are updated too. No action needed. + +2. **The verifier catches mistakes.** If you delete an operation without handling + its debug uses, the verifier will report dangling references. This is + intentional — silent debug info loss is worse than a loud failure. + +### What You Must Handle + +#### 1. Dead Code Elimination + +When determining if a value is dead, exclude debug uses: + +```rust +use crate::sink::{has_only_debug_uses, erase_debug_users}; + +// Wrong: debug uses keep the value "alive" +if !value.is_used() { ... } + +// Right: only non-debug uses matter +if has_only_debug_uses(&*value.borrow()) { + erase_debug_users(value); // Clean up debug ops first + defining_op.erase(); // Then erase the definition +} +``` + +#### 2. Sinking / Code Motion + +When checking if an operation can be moved, exclude debug uses from the +dominance check: + +```rust +fn can_sink(&self, op: &Operation, target_region: &Region) -> bool { + op.results().iter().all(|result| { + result.borrow().iter_uses().all(|user| { + // Skip debug uses — they're observational + if is_debug_info_op(&user.owner.borrow()) { + return true; + } + self.dominates(target_region, &user.owner) + }) + }) +} +``` + +Before moving an operation, erase debug uses that would violate dominance: + +```rust +// Erase debug uses outside target region before moving +for result in op.borrow().results().iter() { + erase_debug_users(result.borrow().as_value_ref()); +} +move_op_into_region(op, target_region); +``` + +#### 3. Value Transformations + +When a transform changes how a value is computed (not just replaces it), use +`salvage_debug_info()` to update the debug expressions: + +```rust +use midenc_dialect_debuginfo::transform::{salvage_debug_info, SalvageAction}; + +// Value was promoted to a stack slot: +let ptr = builder.alloca(ty, span)?; +builder.store(old_val, ptr, span)?; +salvage_debug_info(&old_val, &SalvageAction::Deref { new_value: ptr }, &mut builder); + +// Value was completely optimized away: +salvage_debug_info(&old_val, &SalvageAction::Undef, &mut builder); +``` + +#### 4. Deleting Operations + +Always erase debug users before erasing the defining operation: + +```rust +for result in op.borrow().results().iter() { + erase_debug_users(result.borrow().as_value_ref()); +} +op.borrow_mut().erase(); +``` + +### Quick Reference + +| Scenario | Action | +|----------|--------| +| Replacing value A with B | Just use `replace_all_uses_with` — automatic | +| Checking if value is dead | Use `has_only_debug_uses()`, not `is_used()` | +| Moving/sinking an op | Exclude debug uses from dominance checks | +| Before moving an op | Call `erase_debug_users()` on results | +| Before deleting an op | Call `erase_debug_users()` on results | +| Value computation changed | Use `salvage_debug_info()` with appropriate action | +| Value optimized to constant | Use `SalvageAction::Constant` or `::Undef` | + +### Defense in Depth + +The MASM codegen has additional hardening: + +- `DebugValue::emit()` skips emission if the value is not on the stack and has + no location expression (gracefully handles orphaned debug ops) +- `emit_inst()` silently skips debuginfo-dialect ops that have no `HirLowering` + implementation (e.g., `debuginfo.kill`, `debuginfo.declare`) +- `MasmFunctionBuilder::build()` strips debug-only procedure bodies that would + be rejected by the assembler + +These are safety nets, not substitutes for proper debug info handling in passes. + ## Kinda Fallback Behavior/Best Effort cases - If DWARF lookup fails entirely, we still emit attrs but populate @@ -274,6 +396,29 @@ salvage_debug_info(&old_val, &SalvageAction::Deref { new_value: ptr }, &mut buil information. These decorators are embedded in the MAST instruction stream, enabling debuggers to track variable values at specific execution points. + **Local variable FMP offset handling** uses a two-phase approach: + + 1. **During lowering** (`DebugValue::emit()` in `lowering.rs`): When a value + is not on the operand stack (i.e., it was spilled to memory), the emitter + records `DebugVarLocation::Local(wasm_idx)` using the raw WASM local index + from the `DIExpressionOp::WasmLocal` attribute. This index is stable and + known from DWARF. + + 2. **After body is built** (`patch_debug_var_locals_in_block()` in + `component.rs`): Once the entire procedure body is emitted and `num_locals` + is finalized, a fixup pass converts `Local(wasm_idx)` to `Local(fmp_offset)` + where `fmp_offset = wasm_idx - num_wasm_locals`. The FMP offset is negative, + pointing below the frame pointer where spilled locals reside. + + This separation keeps lowering simple (no need to thread `num_locals` through + the emitter) while ensuring correct FMP-relative offsets in the final output. + + **Debug-only procedure bodies**: If a procedure body contains only `DebugVar` + decorators and no real instructions, the MASM codegen strips the decorators + entirely. The Miden assembler rejects such bodies because decorators don't + affect MAST digests—two empty procedures with different decorators would be + indistinguishable. + These refinements can be implemented without changing the public HIR surface; we would only update the metadata collector and the builder helpers. diff --git a/hir-transform/src/sink.rs b/hir-transform/src/sink.rs index f8c1f5edd..0474c2556 100644 --- a/hir-transform/src/sink.rs +++ b/hir-transform/src/sink.rs @@ -3,7 +3,7 @@ use alloc::vec::Vec; use midenc_hir::{ Backward, Builder, EntityMut, Forward, FxHashSet, OpBuilder, Operation, OperationName, OperationRef, ProgramPoint, RawWalk, Region, RegionBranchOpInterface, - RegionBranchTerminatorOpInterface, RegionRef, Report, SmallVec, Usable, ValueRef, + RegionBranchTerminatorOpInterface, RegionRef, Report, SmallVec, Usable, Value, ValueRef, adt::SmallDenseMap, dominance::DominanceInfo, matchers::{self, Matcher}, @@ -11,6 +11,49 @@ use midenc_hir::{ traits::{ConstantLike, Terminator}, }; +/// Returns `true` if the given operation belongs to the debuginfo dialect. +/// +/// Debug info operations (debuginfo.value, debuginfo.kill, etc.) are purely +/// observational and should not prevent optimizations such as sinking or DCE. +#[inline] +fn is_debug_info_op(op: &Operation) -> bool { + op.name().dialect().as_str() == "debuginfo" +} + +/// Check whether `operation` is the sole *non-debug-info* user of `value`. +/// +/// Debug uses are excluded because they are observational and should never +/// prevent value-producing operations from being moved or eliminated. +fn is_sole_non_debug_user(value: &dyn Value, operation: OperationRef) -> bool { + value.iter_uses().all(|user| { + user.owner == operation || is_debug_info_op(&user.owner.borrow()) + }) +} + +/// Returns `true` if the only remaining uses of the given value are debug info uses +/// (or the value is entirely unused). +fn has_only_debug_uses(value: &dyn Value) -> bool { + value.iter_uses().all(|user| is_debug_info_op(&user.owner.borrow())) +} + +/// Erase all debug info operations that reference the given value. +/// +/// This is used before erasing a defining op whose result is only kept alive by +/// debug uses. The debug ops are simply removed; the codegen emitter is also +/// hardened to skip orphaned debug ops, so this is a best-effort cleanup. +fn erase_debug_users(value: ValueRef) { + let debug_ops: Vec = { + let v = value.borrow(); + v.iter_uses() + .filter(|user| is_debug_info_op(&user.owner.borrow())) + .map(|user| user.owner) + .collect() + }; + for mut op in debug_ops { + op.borrow_mut().erase(); + } +} + /// This transformation sinks operations as close as possible to their uses, one of two ways: /// /// 1. If there exists only a single use of the operation, move it before it's use so that it is @@ -221,7 +264,9 @@ impl Pass for SinkOperandDefs { for operand in op.operands().iter().rev() { let value = operand.borrow(); let value = value.value(); - let is_sole_user = value.iter_uses().all(|user| user.owner == operation); + // Exclude debug info uses when determining whether this is the sole + // user — debug ops are observational and should not prevent sinking. + let is_sole_user = is_sole_non_debug_user(&*value, operation); let Some(defining_op) = value.get_defining_op() else { // Skip block arguments, nothing to move in that situation @@ -276,10 +321,13 @@ impl Pass for SinkOperandDefs { let mut operation = sink_state.operation; let op = operation.borrow(); - // If this operation is unused, remove it now if it has no side effects + // If this operation is unused (or only has debug info uses), remove it + // now if it has no side effects. let is_memory_effect_free = op.is_memory_effect_free() || op.implements::(); - if !op.is_used() + let only_debug_uses = !op.is_used() + || op.results().iter().all(|r| has_only_debug_uses(&*r.borrow())); + if only_debug_uses && is_memory_effect_free && !op.implements::() && !op.implements::() @@ -287,6 +335,10 @@ impl Pass for SinkOperandDefs { { log::debug!(target: Self::NAME, "erasing unused, effect-free, non-terminator op {op}"); drop(op); + // Erase any remaining debug uses before erasing the defining op + for result in operation.borrow().results().iter() { + erase_debug_users(result.borrow().as_value_ref()); + } operation.borrow_mut().erase(); continue; } @@ -320,10 +372,11 @@ impl Pass for SinkOperandDefs { operand.borrow_mut().set(replacement); changed = PostPassStatus::Changed; - // If no other uses of this value remain, then remove the original - // operation, as it is now dead. - if !operand_value.borrow().is_used() { + // If no other non-debug uses of this value remain, then remove + // the original operation, as it is now dead. + if has_only_debug_uses(&*operand_value.borrow()) { log::trace!(target: Self::NAME, " {operand_value} is no longer used, erasing definition"); + erase_debug_users(operand_value); // Replacements are only ever for op results let mut defining_op = operand_value.borrow().get_defining_op().unwrap(); defining_op.borrow_mut().erase(); @@ -333,7 +386,8 @@ impl Pass for SinkOperandDefs { } let value = operand_value.borrow(); - let is_sole_user = value.iter_uses().all(|user| user.owner == operation); + // Exclude debug info uses when determining sole-user status. + let is_sole_user = is_sole_non_debug_user(&*value, operation); let Some(mut defining_op) = value.get_defining_op() else { // Skip block arguments, nothing to move in that situation @@ -477,8 +531,11 @@ where } /// Given a region and an op which dominates the region, returns true if all - /// users of the given op are dominated by the entry block of the region, and - /// thus the operation can be sunk into the region. + /// *non-debug-info* users of the given op are dominated by the entry block + /// of the region, and thus the operation can be sunk into the region. + /// + /// Debug info uses are excluded because they are observational and should + /// not prevent control-flow sinking. fn all_users_dominated_by(&self, op: &Operation, region: &Region) -> bool { assert!( region.find_ancestor_op(op.as_operation_ref()).is_none(), @@ -488,6 +545,11 @@ where op.results().iter().all(|result| { let result = result.borrow(); result.iter_uses().all(|user| { + // Skip debug info users — they are observational and should not + // prevent sinking. + if is_debug_info_op(&user.owner.borrow()) { + return true; + } // The user is dominated by the region if its containing block is dominated // by the region's entry block. self.dominfo.dominates(®ion_entry, &user.owner.parent().unwrap()) @@ -528,6 +590,13 @@ where (all_users_dominated_by, should_move_into_region) }; if all_users_dominated_by && should_move_into_region { + // Before moving, erase any debug info ops outside the target region + // that reference results of this op — they would violate dominance + // after the move. + for result in op.borrow().results().iter() { + erase_debug_users(result.borrow().as_value_ref()); + } + (self.move_into_region)(op, region); self.num_sunk += 1; From 67fc2e548474683bfb30c5eae66bf0896f93127a Mon Sep 17 00:00:00 2001 From: djole Date: Sat, 21 Mar 2026 11:33:59 +0100 Subject: [PATCH 06/32] debuginfo: migrate DI types to DialectAttribute derive for v0.21.1 compatibility --- .gitignore | 1 + dialects/debuginfo/src/builders.rs | 12 +- dialects/debuginfo/src/lib.rs | 6 +- dialects/debuginfo/src/ops.rs | 19 ++- dialects/debuginfo/src/transform.rs | 10 +- dialects/scf/Cargo.toml | 2 +- dialects/scf/src/transforms/cfg_to_scf.rs | 5 +- frontend/wasm/src/module/debug_info.rs | 26 ++--- .../wasm/src/module/function_builder_ext.rs | 2 +- hir-transform/src/lib.rs | 2 - hir/src/attributes.rs | 3 +- hir/src/attributes/debug.rs | 108 +++++++++++++----- hir/src/lib.rs | 4 +- midenc-compile/src/debug_info.rs | 10 +- midenc-compile/src/stages/rewrite.rs | 6 +- 15 files changed, 134 insertions(+), 82 deletions(-) diff --git a/.gitignore b/.gitignore index e92412b9b..3547a9b09 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ env/ *.out node_modules/ *DS_Store +._* *.iml book/ diff --git a/dialects/debuginfo/src/builders.rs b/dialects/debuginfo/src/builders.rs index 2a28a642e..298493efc 100644 --- a/dialects/debuginfo/src/builders.rs +++ b/dialects/debuginfo/src/builders.rs @@ -1,5 +1,5 @@ use midenc_hir::{ - Builder, BuilderExt, DIExpressionAttr, DILocalVariableAttr, Report, SourceSpan, ValueRef, + Builder, BuilderExt, DIExpression, DILocalVariable, Report, SourceSpan, ValueRef, }; use super::ops::*; @@ -31,7 +31,7 @@ pub trait DebugInfoOpBuilder<'f, B: ?Sized + Builder> { fn debug_value( &mut self, value: ValueRef, - variable: DILocalVariableAttr, + variable: DILocalVariable, span: SourceSpan, ) -> Result { self.debug_value_with_expr(value, variable, None, span) @@ -47,8 +47,8 @@ pub trait DebugInfoOpBuilder<'f, B: ?Sized + Builder> { fn debug_value_with_expr( &mut self, value: ValueRef, - variable: DILocalVariableAttr, - expression: Option, + variable: DILocalVariable, + expression: Option, span: SourceSpan, ) -> Result { let expr = expression.unwrap_or_default(); @@ -61,7 +61,7 @@ pub trait DebugInfoOpBuilder<'f, B: ?Sized + Builder> { fn debug_declare( &mut self, address: ValueRef, - variable: DILocalVariableAttr, + variable: DILocalVariable, span: SourceSpan, ) -> Result { let op_builder = self.builder_mut().create::(span); @@ -74,7 +74,7 @@ pub trait DebugInfoOpBuilder<'f, B: ?Sized + Builder> { /// until the next `debug_value` or `debug_declare` for the same variable. fn debug_kill( &mut self, - variable: DILocalVariableAttr, + variable: DILocalVariable, span: SourceSpan, ) -> Result { let op_builder = self.builder_mut().create::(span); diff --git a/dialects/debuginfo/src/lib.rs b/dialects/debuginfo/src/lib.rs index 9013d1768..441cefe1e 100644 --- a/dialects/debuginfo/src/lib.rs +++ b/dialects/debuginfo/src/lib.rs @@ -62,14 +62,12 @@ extern crate alloc; #[cfg(any(feature = "std", test))] extern crate std; -use alloc::boxed::Box; - mod builders; mod ops; pub mod transform; use midenc_hir::{ - AttributeValue, Builder, Dialect, DialectInfo, DialectRegistration, OperationRef, SourceSpan, + AttributeRef, Builder, Dialect, DialectInfo, DialectRegistration, OperationRef, SourceSpan, Type, }; @@ -117,7 +115,7 @@ impl Dialect for DebugInfoDialect { fn materialize_constant( &self, _builder: &mut dyn Builder, - _attr: Box, + _attr: AttributeRef, _ty: &Type, _span: SourceSpan, ) -> Option { diff --git a/dialects/debuginfo/src/ops.rs b/dialects/debuginfo/src/ops.rs index 0bc8fadf4..1c948e24f 100644 --- a/dialects/debuginfo/src/ops.rs +++ b/dialects/debuginfo/src/ops.rs @@ -2,6 +2,8 @@ use midenc_hir::{ DIExpressionAttr, DILocalVariableAttr, UnsafeIntrusiveEntityRef, derive::operation, traits::AnyType, }; +// Note: DILocalVariableAttr and DIExpressionAttr are now the generated wrapper +// types from #[derive(DialectAttribute)] on DILocalVariable and DIExpression. use crate::DebugInfoDialect; @@ -80,18 +82,15 @@ pub struct DebugKill { #[cfg(test)] mod tests { - use alloc::{rc::Rc, string::ToString}; + use alloc::rc::Rc; - use midenc_hir::{ - Builder, Context, DILocalVariableAttr, OpPrinter, OpPrintingFlags, SourceSpan, Type, - interner::Symbol, - }; + use midenc_hir::{Builder, Context, DILocalVariable, SourceSpan, Type, interner::Symbol}; use crate::{DebugInfoDialect, DebugInfoOpBuilder}; - fn make_variable() -> DILocalVariableAttr { + fn make_variable() -> DILocalVariable { let mut variable = - DILocalVariableAttr::new(Symbol::intern("x"), Symbol::intern("main.rs"), 12, Some(7)); + DILocalVariable::new(Symbol::intern("x"), Symbol::intern("main.rs"), 12, Some(7)); variable.arg_index = Some(0); variable.ty = Some(Type::I32); variable @@ -114,11 +113,11 @@ mod tests { .debug_value(value, variable.clone(), SourceSpan::UNKNOWN) .expect("failed to create debuginfo.value op"); - assert_eq!(debug_value.borrow().variable(), &variable); + assert_eq!(debug_value.borrow().variable().as_value(), &variable); assert_eq!(block.borrow().back(), Some(debug_value.as_operation_ref())); let op = debug_value.as_operation_ref(); - let printed = op.borrow().print(&OpPrintingFlags::default(), context.as_ref()).to_string(); + let printed = alloc::string::ToString::to_string(&*op.borrow()); assert!(printed.contains("di.local_variable")); } @@ -137,6 +136,6 @@ mod tests { .debug_kill(variable.clone(), SourceSpan::UNKNOWN) .expect("failed to create debuginfo.kill op"); - assert_eq!(debug_kill.borrow().variable(), &variable); + assert_eq!(debug_kill.borrow().variable().as_value(), &variable); } } diff --git a/dialects/debuginfo/src/transform.rs b/dialects/debuginfo/src/transform.rs index 076fa6ecf..b172bee09 100644 --- a/dialects/debuginfo/src/transform.rs +++ b/dialects/debuginfo/src/transform.rs @@ -151,7 +151,7 @@ fn apply_salvage_action( let (variable, mut expr) = { let op = debug_op.borrow(); let dv = op.downcast_ref::().unwrap(); - (dv.variable().clone(), dv.expression().clone()) + (dv.variable().as_value().clone(), dv.expression().as_value().clone()) }; expr.operations.insert(0, DIExpressionOp::Deref); @@ -164,7 +164,7 @@ fn apply_salvage_action( let (variable, mut expr) = { let op = debug_op.borrow(); let dv = op.downcast_ref::().unwrap(); - (dv.variable().clone(), dv.expression().clone()) + (dv.variable().as_value().clone(), dv.expression().as_value().clone()) }; // To recover: subtract the offset that was added expr.operations.push(DIExpressionOp::ConstU64(*offset)); @@ -178,7 +178,7 @@ fn apply_salvage_action( let (variable, mut expr) = { let op = debug_op.borrow(); let dv = op.downcast_ref::().unwrap(); - (dv.variable().clone(), dv.expression().clone()) + (dv.variable().as_value().clone(), dv.expression().as_value().clone()) }; expr.operations.extend(ops.iter().cloned()); @@ -190,7 +190,7 @@ fn apply_salvage_action( let variable = { let op = debug_op.borrow(); let dv = op.downcast_ref::().unwrap(); - dv.variable().clone() + dv.variable().as_value().clone() }; debug_op.borrow_mut().erase(); @@ -206,7 +206,7 @@ fn apply_salvage_action( let variable = { let op = debug_op.borrow(); let dv = op.downcast_ref::().unwrap(); - dv.variable().clone() + dv.variable().as_value().clone() }; debug_op.borrow_mut().erase(); diff --git a/dialects/scf/Cargo.toml b/dialects/scf/Cargo.toml index 8e3c13ee4..19985c6ae 100644 --- a/dialects/scf/Cargo.toml +++ b/dialects/scf/Cargo.toml @@ -29,4 +29,4 @@ bitvec.workspace = true midenc-expect-test = { path = "../../tools/expect-test" } midenc-hir = { path = "../../hir", features = ["logging"] } midenc-dialect-debuginfo = { path = "../debuginfo" } -env_logger.workspace = true +env_logger = "0.11" diff --git a/dialects/scf/src/transforms/cfg_to_scf.rs b/dialects/scf/src/transforms/cfg_to_scf.rs index be96eecb3..e6b1c15f9 100644 --- a/dialects/scf/src/transforms/cfg_to_scf.rs +++ b/dialects/scf/src/transforms/cfg_to_scf.rs @@ -845,6 +845,7 @@ mod tests { #[test] fn cfg_to_scf_debug_value_preservation() -> Result<(), Report> { use midenc_dialect_debuginfo::{DebugInfoDialect, DebugInfoOpBuilder}; + use midenc_hir::DILocalVariable; let context = Rc::new(Context::default()); context.get_or_register_dialect::(); @@ -869,13 +870,13 @@ mod tests { let block = builder.current_block(); let input = block.borrow().arguments()[0].upcast(); - let input_var = DILocalVariableAttr::new( + let input_var = DILocalVariable::new( Symbol::intern("input"), Symbol::intern("test.rs"), 1, Some(1), ); - let result_var = DILocalVariableAttr::new( + let result_var = DILocalVariable::new( Symbol::intern("result"), Symbol::intern("test.rs"), 2, diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs index 9f02d5f7a..50272093f 100644 --- a/frontend/wasm/src/module/debug_info.rs +++ b/frontend/wasm/src/module/debug_info.rs @@ -7,7 +7,7 @@ use cranelift_entity::EntityRef; use gimli::{self, AttributeValue, read::Operation}; use log::debug; use midenc_hir::{ - DICompileUnitAttr, DIExpressionAttr, DIExpressionOp, DILocalVariableAttr, DISubprogramAttr, + DICompileUnit, DIExpression, DIExpressionOp, DILocalVariable, DISubprogram, FxHashMap, SourceSpan, interner::Symbol, }; use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic}; @@ -60,15 +60,15 @@ impl VariableStorage { #[derive(Clone)] pub struct LocalDebugInfo { - pub attr: DILocalVariableAttr, + pub attr: DILocalVariable, pub locations: Vec, - pub expression: Option, + pub expression: Option, } #[derive(Clone)] pub struct FunctionDebugInfo { - pub compile_unit: DICompileUnitAttr, - pub subprogram: DISubprogramAttr, + pub compile_unit: DICompileUnit, + pub subprogram: DISubprogram, pub locals: Vec>, pub function_span: Option, pub location_schedule: Vec, @@ -91,7 +91,7 @@ pub struct LocationScheduleEntry { } impl FunctionDebugInfo { - pub fn local_attr(&self, index: usize) -> Option<&DILocalVariableAttr> { + pub fn local_attr(&self, index: usize) -> Option<&DILocalVariable> { self.locals.get(index).and_then(|info| info.as_ref().map(|data| &data.attr)) } } @@ -156,11 +156,11 @@ fn build_function_debug_info( let (file_symbol, directory_symbol) = determine_file_symbols(parsed_module, addr2line, body); let (line, column) = determine_location(addr2line, body.body_offset); - let mut compile_unit = DICompileUnitAttr::new(Symbol::intern("wasm"), file_symbol); + let mut compile_unit = DICompileUnit::new(Symbol::intern("wasm"), file_symbol); compile_unit.directory = directory_symbol; compile_unit.producer = Some(Symbol::intern("midenc-frontend-wasm")); - let mut subprogram = DISubprogramAttr::new(func_name, compile_unit.file, line, column); + let mut subprogram = DISubprogram::new(func_name, compile_unit.file, line, column); subprogram.is_definition = true; let wasm_signature = module_types[module.functions[func_index].signature].clone(); @@ -225,7 +225,7 @@ fn build_local_debug_info( func_index: FuncIndex, wasm_signature: &WasmFuncType, body: &FunctionBodyData, - subprogram: &DISubprogramAttr, + subprogram: &DISubprogram, diagnostics: &DiagnosticsHandler, dwarf_locals: Option<&FxHashMap>, ) -> Vec> { @@ -255,7 +255,7 @@ fn build_local_debug_info( { name_symbol = symbol; } - let mut attr = DILocalVariableAttr::new( + let mut attr = DILocalVariable::new( name_symbol, subprogram.file, subprogram.line, @@ -281,7 +281,7 @@ fn build_local_debug_info( // Create expression from the first location if available let expression = if !locations.is_empty() { let ops = vec![locations[0].storage.to_expression_op()]; - Some(DIExpressionAttr::with_ops(ops)) + Some(DIExpression::with_ops(ops)) } else { None }; @@ -306,7 +306,7 @@ fn build_local_debug_info( { name_symbol = symbol; } - let mut attr = DILocalVariableAttr::new( + let mut attr = DILocalVariable::new( name_symbol, subprogram.file, subprogram.line, @@ -333,7 +333,7 @@ fn build_local_debug_info( // Create expression from the first location if available let expression = if !locations.is_empty() { let ops = vec![locations[0].storage.to_expression_op()]; - Some(DIExpressionAttr::with_ops(ops)) + Some(DIExpression::with_ops(ops)) } else { None }; diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index adc9045f1..e4f037ffa 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -262,7 +262,7 @@ impl FunctionBuilderExt<'_, B> { // Create expression from the scheduled location let expression = { let ops = vec![entry.storage.to_expression_op()]; - Some(midenc_hir::DIExpressionAttr::with_ops(ops)) + Some(midenc_hir::DIExpression::with_ops(ops)) }; let Some(info) = self.debug_info.as_ref() else { diff --git a/hir-transform/src/lib.rs b/hir-transform/src/lib.rs index 139c44c41..1394fd948 100644 --- a/hir-transform/src/lib.rs +++ b/hir-transform/src/lib.rs @@ -9,7 +9,6 @@ mod canonicalization; mod cfg_to_scf; mod cse; mod dce; -mod dead_debug_ops; //mod inliner; mod sccp; mod sink; @@ -21,7 +20,6 @@ pub use self::{ canonicalization::Canonicalizer, cfg_to_scf::{CFGToSCFInterface, transform_cfg_to_scf}, cse::CommonSubexpressionElimination, - dead_debug_ops::RemoveDeadDebugOps, sccp::SparseConditionalConstantPropagation, sink::{ControlFlowSink, SinkOperandDefs}, spill::{ReloadLike, SpillLike, TransformSpillsInterface, transform_spills}, diff --git a/hir/src/attributes.rs b/hir/src/attributes.rs index 91fb82bdf..52924d175 100644 --- a/hir/src/attributes.rs +++ b/hir/src/attributes.rs @@ -1,5 +1,6 @@ mod attribute; +pub mod debug; mod named_attribute; mod traits; -pub use self::{attribute::*, named_attribute::*, traits::*}; +pub use self::{attribute::*, debug::*, named_attribute::*, traits::*}; diff --git a/hir/src/attributes/debug.rs b/hir/src/attributes/debug.rs index 0dee8171d..16efa5272 100644 --- a/hir/src/attributes/debug.rs +++ b/hir/src/attributes/debug.rs @@ -1,17 +1,21 @@ use alloc::{format, vec::Vec}; use crate::{ - Type, define_attr_type, + AttrPrinter, Type, + derive::DialectAttribute, + dialects::builtin::BuiltinDialect, formatter::{Document, PrettyPrint, const_text, text}, interner::Symbol, + print::AsmPrinter, }; /// Represents the compilation unit associated with debug information. /// /// The fields in this struct are intentionally aligned with the subset of /// DWARF metadata we currently care about when tracking variable locations. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct DICompileUnitAttr { +#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] +#[attribute(dialect = BuiltinDialect, implements(AttrPrinter))] +pub struct DICompileUnit { pub language: Symbol, pub file: Symbol, pub directory: Option, @@ -19,9 +23,19 @@ pub struct DICompileUnitAttr { pub optimized: bool, } -define_attr_type!(DICompileUnitAttr); +impl Default for DICompileUnit { + fn default() -> Self { + Self { + language: crate::interner::symbols::Empty, + file: crate::interner::symbols::Empty, + directory: None, + producer: None, + optimized: false, + } + } +} -impl DICompileUnitAttr { +impl DICompileUnit { pub fn new(language: Symbol, file: Symbol) -> Self { Self { language, @@ -33,7 +47,13 @@ impl DICompileUnitAttr { } } -impl PrettyPrint for DICompileUnitAttr { +impl AttrPrinter for DICompileUnitAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + *printer += self.value.render(); + } +} + +impl PrettyPrint for DICompileUnit { fn render(&self) -> Document { let mut doc = const_text("di.compile_unit(") + text(format!("language = {}", self.language.as_str())) @@ -56,8 +76,9 @@ impl PrettyPrint for DICompileUnitAttr { /// Represents a subprogram (function) scope for debug information. /// The compile unit is not embedded but typically stored separately on the module. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct DISubprogramAttr { +#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] +#[attribute(dialect = BuiltinDialect, implements(AttrPrinter))] +pub struct DISubprogram { pub name: Symbol, pub linkage_name: Option, pub file: Symbol, @@ -67,9 +88,21 @@ pub struct DISubprogramAttr { pub is_local: bool, } -define_attr_type!(DISubprogramAttr); +impl Default for DISubprogram { + fn default() -> Self { + Self { + name: crate::interner::symbols::Empty, + linkage_name: None, + file: crate::interner::symbols::Empty, + line: 0, + column: None, + is_definition: false, + is_local: false, + } + } +} -impl DISubprogramAttr { +impl DISubprogram { pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { Self { name, @@ -83,7 +116,13 @@ impl DISubprogramAttr { } } -impl PrettyPrint for DISubprogramAttr { +impl AttrPrinter for DISubprogramAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + *printer += self.value.render(); + } +} + +impl PrettyPrint for DISubprogram { fn render(&self) -> Document { let mut doc = const_text("di.subprogram(") + text(format!("name = {}", self.name.as_str())) @@ -110,9 +149,10 @@ impl PrettyPrint for DISubprogramAttr { } /// Represents a local variable debug record. -/// The scope (DISubprogramAttr) is not embedded but instead stored on the containing function. -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct DILocalVariableAttr { +/// The scope (DISubprogram) is not embedded but instead stored on the containing function. +#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] +#[attribute(dialect = BuiltinDialect, implements(AttrPrinter))] +pub struct DILocalVariable { pub name: Symbol, pub arg_index: Option, pub file: Symbol, @@ -121,9 +161,20 @@ pub struct DILocalVariableAttr { pub ty: Option, } -define_attr_type!(DILocalVariableAttr); +impl Default for DILocalVariable { + fn default() -> Self { + Self { + name: crate::interner::symbols::Empty, + arg_index: None, + file: crate::interner::symbols::Empty, + line: 0, + column: None, + ty: None, + } + } +} -impl DILocalVariableAttr { +impl DILocalVariable { pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { Self { name, @@ -136,7 +187,13 @@ impl DILocalVariableAttr { } } -impl PrettyPrint for DILocalVariableAttr { +impl AttrPrinter for DILocalVariableAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + *printer += self.value.render(); + } +} + +impl PrettyPrint for DILocalVariable { fn render(&self) -> Document { let mut doc = const_text("di.local_variable(") + text(format!("name = {}", self.name.as_str())) @@ -191,14 +248,13 @@ pub enum DIExpressionOp { } /// Represents a DWARF expression that describes how to compute or locate a variable's value -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct DIExpressionAttr { +#[derive(DialectAttribute, Clone, Debug, Default, PartialEq, Eq, Hash)] +#[attribute(dialect = BuiltinDialect, implements(AttrPrinter))] +pub struct DIExpression { pub operations: Vec, } -define_attr_type!(DIExpressionAttr); - -impl DIExpressionAttr { +impl DIExpression { pub fn new() -> Self { Self { operations: Vec::new(), @@ -214,13 +270,13 @@ impl DIExpressionAttr { } } -impl Default for DIExpressionAttr { - fn default() -> Self { - Self::new() +impl AttrPrinter for DIExpressionAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + *printer += self.value.render(); } } -impl PrettyPrint for DIExpressionAttr { +impl PrettyPrint for DIExpression { fn render(&self) -> Document { if self.operations.is_empty() { return const_text("di.expression()"); diff --git a/hir/src/lib.rs b/hir/src/lib.rs index 1b01ee544..a440ba7cc 100644 --- a/hir/src/lib.rs +++ b/hir/src/lib.rs @@ -82,7 +82,9 @@ pub use midenc_session::diagnostics; pub use self::{ attributes::{ - Attribute, AttributeName, AttributeRef, AttributeRegistration, NamedAttribute, + Attribute, AttributeName, AttributeRef, AttributeRegistration, AttributeValue, + DICompileUnit, DICompileUnitAttr, DIExpression, DIExpressionAttr, DIExpressionOp, + DILocalVariable, DILocalVariableAttr, DISubprogram, DISubprogramAttr, NamedAttribute, NamedAttributeList, }, dialects::builtin::attributes::{Location, Overflow, Visibility, version}, diff --git a/midenc-compile/src/debug_info.rs b/midenc-compile/src/debug_info.rs index dfd7c54ae..0eebd71dc 100644 --- a/midenc-compile/src/debug_info.rs +++ b/midenc-compile/src/debug_info.rs @@ -12,7 +12,7 @@ use miden_mast_package::debug_info::{ DebugVariableInfo, }; use midenc_dialect_debuginfo as debuginfo; -use midenc_hir::{DILocalVariableAttr, DISubprogramAttr, OpExt, Type, dialects::builtin}; +use midenc_hir::{DILocalVariable, DISubprogram, OpExt, Type, dialects::builtin}; /// Builder for constructing a `DebugInfoSection` from HIR components. pub struct DebugInfoBuilder { @@ -149,10 +149,10 @@ impl DebugInfoBuilder { fn collect_from_function(&mut self, function: &builtin::Function) { // Get function debug info from attributes - // Try to get DISubprogramAttr from the function's attributes - let subprogram: Option = function + // Try to get DISubprogram from the function's attributes + let subprogram: Option = function .get_attribute(midenc_hir::interner::Symbol::intern("di.subprogram")) - .and_then(|attr| attr.downcast_ref::().cloned()); + .and_then(|attr| attr.downcast_ref::().cloned()); let Some(subprogram) = subprogram else { // No debug info for this function, just collect from body @@ -218,7 +218,7 @@ impl DebugInfoBuilder { } } - fn extract_variable_info(&mut self, var: &DILocalVariableAttr) -> Option { + fn extract_variable_info(&mut self, var: &DILocalVariable) -> Option { let name_idx = self.add_string(var.name.as_str()); // Add type if available diff --git a/midenc-compile/src/stages/rewrite.rs b/midenc-compile/src/stages/rewrite.rs index a24a42cb5..6b9161070 100644 --- a/midenc-compile/src/stages/rewrite.rs +++ b/midenc-compile/src/stages/rewrite.rs @@ -8,7 +8,7 @@ use midenc_hir::{ patterns::{GreedyRewriteConfig, RegionSimplificationLevel}, }; use midenc_hir_transform::{ - Canonicalizer, CommonSubexpressionElimination, RemoveDeadDebugOps, SinkOperandDefs, + Canonicalizer, CommonSubexpressionElimination, SinkOperandDefs, SparseConditionalConstantPropagation, }; @@ -79,8 +79,6 @@ impl Stage for ApplyRewritesStage { func_pm.add_pass(Box::new(TransformSpills)); //func_pm.add_pass(Box::new(ControlFlowSink)); //func_pm.add_pass(Box::new(DeadCodeElimination)); - // Remove debug ops whose operands are dead to prevent codegen issues - func_pm.add_pass(Box::new(RemoveDeadDebugOps)); } // Function passes for component-level functions { @@ -99,8 +97,6 @@ impl Stage for ApplyRewritesStage { func_pm.add_pass(Box::new(TransformSpills)); //func_pm.add_pass(Box::new(ControlFlowSink)); //func_pm.add_pass(Box::new(DeadCodeElimination)); - // Remove debug ops whose operands are dead to prevent codegen issues - func_pm.add_pass(Box::new(RemoveDeadDebugOps)); } } From 57cf2a61d5d3022a80e29e4ea7dbb47dcee6ff1f Mon Sep 17 00:00:00 2001 From: djole Date: Mon, 30 Mar 2026 16:01:42 +0200 Subject: [PATCH 07/32] feat: debug variable location tracking with dedup crash fix Add end-to-end debug variable location tracking through the compiler pipeline, and patch miden-vm to fix a crash when basic blocks are deduplicated during assembly. --- Cargo.lock | 129 ++++-- Cargo.toml | 18 +- codegen/masm/src/lower/component.rs | 4 +- codegen/masm/src/lower/lowering.rs | 2 +- frontend/wasm/src/module/build_ir.rs | 18 +- .../wasm/src/module/function_builder_ext.rs | 14 +- midenc-compile/Cargo.toml | 1 + midenc-compile/src/debug_info.rs | 102 +++-- midenc-compile/src/stages/assemble.rs | 13 +- midenc-compile/src/stages/codegen.rs | 35 +- .../expected/debug_conditional_assignment.hir | 36 ++ .../expected/debug_multiple_locals.hir | 27 ++ .../expected/debug_nested_loops.hir | 36 ++ .../expected/debug_simple_params.hir | 23 ++ .../expected/debug_variable_locations.hir | 76 ++-- .../integration/src/rust_masm_tests/debug.rs | 78 ++++ tools/debugdump/src/main.rs | 391 +++++++++++------- 17 files changed, 700 insertions(+), 303 deletions(-) create mode 100644 tests/integration/expected/debug_conditional_assignment.hir create mode 100644 tests/integration/expected/debug_multiple_locals.hir create mode 100644 tests/integration/expected/debug_nested_loops.hir create mode 100644 tests/integration/expected/debug_simple_params.hir diff --git a/Cargo.lock b/Cargo.lock index edb2e332c..731c453da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -741,9 +741,9 @@ dependencies = [ [[package]] name = "const-hex" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "531185e432bb31db1ecda541e9e7ab21468d4d844ad7505e0546a49b4945d49b" +checksum = "20d9a563d167a9cce0f94153382b33cb6eded6dfabff03c69ad65a28ea1514e0" dependencies = [ "cfg-if", "cpufeatures 0.2.17", @@ -2398,7 +2398,7 @@ dependencies = [ "miden-crypto", "miden-protocol", "miden-standards", - "miden-utils-sync", + "miden-utils-sync 0.22.2", "primitive-types", "regex", "serde", @@ -2415,33 +2415,40 @@ checksum = "b45551e1417cb2be47064c36fe6e1e69ab10ad7b4b55f0731d8cac109b7738b9" dependencies = [ "miden-core", "miden-crypto", - "miden-utils-indexing", + "miden-utils-indexing 0.22.2", + "thiserror 2.0.18", + "tracing", +] + +[[package]] +name = "miden-air" +version = "0.22.3" +dependencies = [ + "miden-core", + "miden-crypto", + "miden-utils-indexing 0.22.3", "thiserror 2.0.18", "tracing", ] [[package]] name = "miden-assembly" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2094e2b943f7bf955a2bc3b44b0ad7c4f45a286f170eaa7e5060871c44847a" +version = "0.22.3" dependencies = [ "env_logger", "log", "miden-assembly-syntax", "miden-core", "miden-mast-package", - "miden-package-registry", - "miden-project", + "miden-package-registry 0.22.3", + "miden-project 0.22.3", "smallvec", "thiserror 2.0.18", ] [[package]] name = "miden-assembly-syntax" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5a3212614ad28399612f39024c1e321dc8cebc8998def06058e60462ddc3856" +version = "0.22.3" dependencies = [ "aho-corasick", "env_logger", @@ -2549,9 +2556,7 @@ dependencies = [ [[package]] name = "miden-core" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39a4a2e2de49213ec899e88fe399d4ec568c8eb9e8c747d6ed58938c40031daa" +version = "0.22.3" dependencies = [ "derive_more", "itertools 0.14.0", @@ -2559,8 +2564,8 @@ dependencies = [ "miden-debug-types", "miden-formatting", "miden-utils-core-derive", - "miden-utils-indexing", - "miden-utils-sync", + "miden-utils-indexing 0.22.3", + "miden-utils-sync 0.22.3", "num-derive", "num-traits", "proptest", @@ -2580,9 +2585,9 @@ dependencies = [ "miden-assembly", "miden-core", "miden-crypto", - "miden-package-registry", + "miden-package-registry 0.22.2", "miden-processor", - "miden-utils-sync", + "miden-utils-sync 0.22.2", "thiserror 2.0.18", ] @@ -2720,16 +2725,14 @@ dependencies = [ [[package]] name = "miden-debug-types" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16570786d938b7f795921b3a84890708a7d72708442c622eb58c2fb5480821e9" +version = "0.22.3" dependencies = [ "memchr", "miden-crypto", "miden-formatting", "miden-miette", - "miden-utils-indexing", - "miden-utils-sync", + "miden-utils-indexing 0.22.3", + "miden-utils-sync 0.22.3", "paste", "serde", "serde_spanned 1.1.1", @@ -2867,9 +2870,7 @@ dependencies = [ [[package]] name = "miden-mast-package" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0953396dc5e575b79bccb8b7da6e0d18ce71bcde899901bb4293a433f9003b94" +version = "0.22.3" dependencies = [ "derive_more", "miden-assembly-syntax", @@ -2966,18 +2967,29 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "miden-package-registry" +version = "0.22.3" +dependencies = [ + "miden-assembly-syntax", + "miden-core", + "miden-mast-package", + "pubgrub", + "serde", + "smallvec", + "thiserror 2.0.18", +] + [[package]] name = "miden-processor" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "340c424f9f62b56a808c9a479cef016f25478e227555ce39cb2684e8baf26542" +version = "0.22.3" dependencies = [ "itertools 0.14.0", - "miden-air", + "miden-air 0.22.3", "miden-core", "miden-debug-types", "miden-utils-diagnostics", - "miden-utils-indexing", + "miden-utils-indexing 0.22.3", "paste", "rayon", "thiserror 2.0.18", @@ -2994,7 +3006,19 @@ dependencies = [ "miden-assembly-syntax", "miden-core", "miden-mast-package", - "miden-package-registry", + "miden-package-registry 0.22.2", + "thiserror 2.0.18", + "toml 1.1.2+spec-1.1.0", +] + +[[package]] +name = "miden-project" +version = "0.22.3" +dependencies = [ + "miden-assembly-syntax", + "miden-core", + "miden-mast-package", + "miden-package-registry 0.22.3", "serde", "serde-untagged", "thiserror 2.0.18", @@ -3018,7 +3042,7 @@ dependencies = [ "miden-mast-package", "miden-processor", "miden-protocol-macros", - "miden-utils-sync", + "miden-utils-sync 0.22.2", "miden-verifier", "rand 0.9.4", "rand_chacha", @@ -3049,7 +3073,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fadcff2d171f81f2737a35a007c756753a8298d067555c7a556bd72f570b32f9" dependencies = [ "bincode", - "miden-air", + "miden-air 0.22.2", "miden-core", "miden-crypto", "miden-debug-types", @@ -3200,9 +3224,7 @@ dependencies = [ [[package]] name = "miden-utils-core-derive" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdd5103e9b6527ad396dce12c135cea1984dfd77ebbffa76f260f4e139906cc4" +version = "0.22.3" dependencies = [ "proc-macro2", "quote", @@ -3211,9 +3233,7 @@ dependencies = [ [[package]] name = "miden-utils-diagnostics" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72226906c968c2e7c37435d67be9e29aeba05336db30c4e57d290cc6efb1da9d" +version = "0.22.3" dependencies = [ "miden-crypto", "miden-debug-types", @@ -3227,6 +3247,14 @@ name = "miden-utils-indexing" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5cc2e62161113179a370ae0bf1fd33eb8d20b6131e8559d2dc0bead5cffae586" +dependencies = [ + "miden-crypto", + "thiserror 2.0.18", +] + +[[package]] +name = "miden-utils-indexing" +version = "0.22.3" dependencies = [ "miden-crypto", "serde", @@ -3245,6 +3273,16 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "miden-utils-sync" +version = "0.22.3" +dependencies = [ + "lock_api", + "loom", + "once_cell", + "parking_lot", +] + [[package]] name = "miden-verifier" version = "0.22.2" @@ -3252,7 +3290,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83f47bf33268ffb31c2fc452debf8e4ba76fbb3175566efbfe850c4886fb5b37" dependencies = [ "bincode", - "miden-air", + "miden-air 0.22.2", "miden-core", "miden-crypto", "serde", @@ -3290,9 +3328,9 @@ dependencies = [ "miden-assembly-syntax", "miden-core", "miden-mast-package", - "miden-package-registry", + "miden-package-registry 0.22.2", "miden-processor", - "miden-project", + "miden-project 0.22.2", "miden-protocol", "miden-thiserror", "midenc-dialect-arith", @@ -3321,6 +3359,7 @@ dependencies = [ "inventory", "log", "miden-assembly", + "miden-core", "miden-debug-types", "miden-mast-package", "miden-thiserror", @@ -3354,7 +3393,7 @@ dependencies = [ [[package]] name = "midenc-dialect-debuginfo" -version = "0.7.1" +version = "0.8.1" dependencies = [ "log", "midenc-hir", diff --git a/Cargo.toml b/Cargo.toml index ecc30e371..51c48a1b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,18 +163,12 @@ midenc-expect-test = { path = "tools/expect-test" } miden-field = { version = "^0.24" } [patch.crates-io] -#miden-assembly = { git = "https://github.com/0xMiden/miden-vm", rev = "614cd7f9b52f45238b0ab59c71ebb49325051e5d" } -#miden-assembly = { path = "../miden-vm/assembly" } -#miden-assembly-syntax = { path = "../miden-vm/assembly-syntax" } -#miden-core = { git = "https://github.com/0xMiden/miden-vm", rev = "614cd7f9b52f45238b0ab59c71ebb49325051e5d" } -#miden-core = { path = "../miden-vm/core" } -# miden-client = { git = "https://github.com/0xMiden/miden-client", rev = "0a5add565d1388f77cd182f3639c16aa8f7ec674" } -# miden-debug = { git = "https://github.com/0xMiden/miden-debug", branch = "main" } -#miden-debug-types = { path = "../miden-vm/crates/debug/types" } -#miden-processor = { git = "https://github.com/0xMiden/miden-vm", rev = "614cd7f9b52f45238b0ab59c71ebb49325051e5d" } -#miden-processor = { path = "../miden-vm/processor" } -#miden-mast-package = { git = "https://github.com/0xMiden/miden-vm", rev = "614cd7f9b52f45238b0ab59c71ebb49325051e5d" } -#miden-mast-package = { path = "../miden-vm/package" } +miden-assembly = { path = "../miden-vm/crates/assembly" } +miden-assembly-syntax = { path = "../miden-vm/crates/assembly-syntax" } +miden-core = { path = "../miden-vm/core" } +miden-debug-types = { path = "../miden-vm/crates/debug-types" } +miden-mast-package = { path = "../miden-vm/crates/mast-package" } +miden-processor = { path = "../miden-vm/processor" } # miden-protocol = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } # miden-standards = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } # miden-tx = { tag = "v0.14.0-beta.4", git = "https://github.com/0xMiden/miden-base" } diff --git a/codegen/masm/src/lower/component.rs b/codegen/masm/src/lower/component.rs index e8c78a106..c7d419946 100644 --- a/codegen/masm/src/lower/component.rs +++ b/codegen/masm/src/lower/component.rs @@ -2,7 +2,7 @@ use alloc::{collections::BTreeSet, sync::Arc, vec::Vec}; use miden_assembly::{PathBuf as LibraryPath, ast::InvocationTarget}; use miden_assembly_syntax::{ast::Attribute, parser::WordValue}; -use miden_core::DebugVarLocation; +use miden_core::operations::{DebugVarInfo, DebugVarLocation}; use midenc_hir::{ FunctionIdent, Op, OpExt, SourceSpan, Span, Symbol, TraceTarget, ValueRef, diagnostics::IntoDiagnostic, dialects::builtin, pass::AnalysisManager, @@ -719,7 +719,7 @@ fn patch_debug_var_locals_in_block(block: &mut masm::Block, num_locals: u16) { let fmp_offset = *idx - (num_locals as i16); // Create new info with patched location, preserving all fields - let mut new_info = miden_core::DebugVarInfo::new( + let mut new_info = DebugVarInfo::new( info.name(), DebugVarLocation::Local(fmp_offset), ); diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index 729d0ce4d..768ccc2a6 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1277,7 +1277,7 @@ impl HirLowering for debuginfo::DebugValue { } fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { - use miden_core::{DebugVarInfo, DebugVarLocation, Felt}; + use miden_core::{Felt, operations::{DebugVarInfo, DebugVarLocation}}; use midenc_hir::DIExpressionOp; // Get the variable info diff --git a/frontend/wasm/src/module/build_ir.rs b/frontend/wasm/src/module/build_ir.rs index 2f545f134..b6aa8d39d 100644 --- a/frontend/wasm/src/module/build_ir.rs +++ b/frontend/wasm/src/module/build_ir.rs @@ -117,13 +117,17 @@ pub fn build_ir_module( ..Default::default() }) .into_diagnostic()?; - parsed_module.function_debug = collect_function_debug_info( - parsed_module, - module_types, - &parsed_module.module, - &addr2line, - context.diagnostics(), - ); + parsed_module.function_debug = if context.session().options.emit_debug_decorators() { + collect_function_debug_info( + parsed_module, + module_types, + &parsed_module.module, + &addr2line, + context.diagnostics(), + ) + } else { + Default::default() + }; let mut func_translator = FuncTranslator::new(context.clone()); // Although this renders this parsed module invalid(without function diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index e4f037ffa..74c7c71e2 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -12,11 +12,12 @@ use midenc_dialect_ub::UndefinedBehaviorOpBuilder; use midenc_dialect_wasm::WasmOpBuilder; use midenc_hir::{ BlockRef, Builder, Context, EntityRef, FxHashMap, FxHashSet, Ident, Listener, ListenerType, - OpBuilder, OperationRef, ProgramPoint, RegionRef, SmallVec, SourceSpan, Type, ValueRef, + Op, OpBuilder, OperationRef, ProgramPoint, RegionRef, SmallVec, SourceSpan, Type, ValueRef, dialects::builtin::{ BuiltinOpBuilder, FunctionBuilder, FunctionRef, attributes::{LocalVariable, Signature}, }, + interner::Symbol, traits::{BranchOpInterface, Terminator}, }; @@ -594,10 +595,17 @@ impl FunctionBuilderExt<'_, B> { return; }; let info = info.borrow(); + let context = self.inner.builder().context_rc(); + let cu_attr = context + .create_attribute::(info.compile_unit.clone()) + .as_attribute_ref(); + let sp_attr = context + .create_attribute::(info.subprogram.clone()) + .as_attribute_ref(); let mut func = self.inner.func.borrow_mut(); let op = func.as_operation_mut(); - op.set_intrinsic_attribute(Self::DI_COMPILE_UNIT_ATTR, Some(info.compile_unit.clone())); - op.set_intrinsic_attribute(Self::DI_SUBPROGRAM_ATTR, Some(info.subprogram.clone())); + op.set_attribute(Self::DI_COMPILE_UNIT_ATTR, cu_attr); + op.set_attribute(Self::DI_SUBPROGRAM_ATTR, sp_attr); } fn emit_parameter_dbg_if_needed(&mut self, span: SourceSpan) { diff --git a/midenc-compile/Cargo.toml b/midenc-compile/Cargo.toml index 53552a5a2..3173bc0ab 100644 --- a/midenc-compile/Cargo.toml +++ b/midenc-compile/Cargo.toml @@ -32,6 +32,7 @@ log.workspace = true inventory.workspace = true midenc-codegen-masm.workspace = true miden-assembly.workspace = true +miden-core.workspace = true miden-debug-types.workspace = true miden-mast-package.workspace = true midenc-frontend-wasm.workspace = true diff --git a/midenc-compile/src/debug_info.rs b/midenc-compile/src/debug_info.rs index 0eebd71dc..481fbc92e 100644 --- a/midenc-compile/src/debug_info.rs +++ b/midenc-compile/src/debug_info.rs @@ -1,29 +1,37 @@ //! Debug info section builder for MASP packages. //! //! This module provides utilities for collecting debug information from the HIR -//! and building a `DebugInfoSection` that can be serialized into the `.debug_info` -//! custom section of a MASP package. +//! and building debug sections that can be serialized into the MASP package. -use alloc::{collections::BTreeMap, format, string::ToString}; +use alloc::{collections::BTreeMap, format, string::ToString, sync::Arc}; use miden_debug_types::{ColumnNumber, LineNumber}; use miden_mast_package::debug_info::{ - DebugFileInfo, DebugFunctionInfo, DebugInfoSection, DebugPrimitiveType, DebugTypeInfo, - DebugVariableInfo, + DebugFileInfo, DebugFunctionInfo, DebugFunctionsSection, DebugPrimitiveType, + DebugSourcesSection, DebugTypeIdx, DebugTypeInfo, DebugTypesSection, DebugVariableInfo, }; use midenc_dialect_debuginfo as debuginfo; -use midenc_hir::{DILocalVariable, DISubprogram, OpExt, Type, dialects::builtin}; +use midenc_hir::{DILocalVariable, DISubprogramAttr, OpExt, Type, dialects::builtin}; -/// Builder for constructing a `DebugInfoSection` from HIR components. +/// The output of the debug info collection pass: three separate sections. +pub struct DebugInfoSections { + pub types: DebugTypesSection, + pub sources: DebugSourcesSection, + pub functions: DebugFunctionsSection, +} + +/// Builder for constructing debug info sections from HIR components. pub struct DebugInfoBuilder { - section: DebugInfoSection, + types: DebugTypesSection, + sources: DebugSourcesSection, + functions: DebugFunctionsSection, /// Maps source file paths to their indices in the file table file_indices: BTreeMap, - /// Maps type hashes to their indices in the type table - type_indices: BTreeMap, + /// Maps type keys to their indices in the type table + type_indices: BTreeMap, } -/// A key for deduplicating types +/// A key for deduplicating types (uses u32 since DebugTypeIdx lacks Ord) #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] enum TypeKey { Primitive(u8), // Use discriminant instead of the enum directly @@ -42,17 +50,14 @@ impl DebugInfoBuilder { /// Creates a new debug info builder. pub fn new() -> Self { Self { - section: DebugInfoSection::new(), + types: DebugTypesSection::new(), + sources: DebugSourcesSection::new(), + functions: DebugFunctionsSection::new(), file_indices: BTreeMap::new(), type_indices: BTreeMap::new(), } } - /// Adds a string to the string table and returns its index. - pub fn add_string(&mut self, s: impl Into) -> u32 { - self.section.add_string(s) - } - /// Adds a file to the file table and returns its index. /// /// The `directory` parameter, if provided, is joined with the path to create @@ -74,16 +79,16 @@ impl DebugInfoBuilder { return idx; } - let path_idx = self.section.add_string(&full_path); + let path_idx = self.sources.add_string(Arc::from(full_path.as_str())); let file = DebugFileInfo::new(path_idx); - let idx = self.section.add_file(file); + let idx = self.sources.add_file(file); self.file_indices.insert(full_path, idx); idx } /// Adds a type to the type table and returns its index. - pub fn add_type(&mut self, ty: &Type) -> u32 { + pub fn add_type(&mut self, ty: &Type) -> DebugTypeIdx { let debug_type = hir_type_to_debug_type(ty, self); let key = type_to_key(&debug_type); @@ -91,19 +96,19 @@ impl DebugInfoBuilder { return idx; } - let idx = self.section.add_type(debug_type); + let idx = self.types.add_type(debug_type); self.type_indices.insert(key, idx); idx } /// Adds a primitive type and returns its index. - pub fn add_primitive_type(&mut self, prim: DebugPrimitiveType) -> u32 { + pub fn add_primitive_type(&mut self, prim: DebugPrimitiveType) -> DebugTypeIdx { let key = TypeKey::Primitive(prim as u8); if let Some(&idx) = self.type_indices.get(&key) { return idx; } - let idx = self.section.add_type(DebugTypeInfo::Primitive(prim)); + let idx = self.types.add_type(DebugTypeInfo::Primitive(prim)); self.type_indices.insert(key, idx); idx } @@ -148,11 +153,14 @@ impl DebugInfoBuilder { } fn collect_from_function(&mut self, function: &builtin::Function) { - // Get function debug info from attributes // Try to get DISubprogram from the function's attributes - let subprogram: Option = function - .get_attribute(midenc_hir::interner::Symbol::intern("di.subprogram")) - .and_then(|attr| attr.downcast_ref::().cloned()); + let subprogram_attr = function + .get_attribute(midenc_hir::interner::Symbol::intern("di.subprogram")); + + let subprogram = subprogram_attr.and_then(|attr| { + let borrowed = attr.borrow(); + borrowed.downcast_ref::().map(|sp| sp.as_value().clone()) + }); let Some(subprogram) = subprogram else { // No debug info for this function, just collect from body @@ -164,8 +172,10 @@ impl DebugInfoBuilder { let file_idx = self.add_file(subprogram.file.as_str(), None); // Add function name - let name_idx = self.add_string(subprogram.name.as_str()); - let linkage_name_idx = subprogram.linkage_name.map(|s| self.add_string(s.as_str())); + let name_idx = self.functions.add_string(Arc::from(subprogram.name.as_str())); + let linkage_name_idx = subprogram + .linkage_name + .map(|s| self.functions.add_string(Arc::from(s.as_str()))); // Create function info let line = LineNumber::new(subprogram.line).unwrap_or_default(); @@ -179,7 +189,7 @@ impl DebugInfoBuilder { // Collect local variables from function body self.collect_variables_from_function_body(function, Some(&mut func_info)); - self.section.add_function(func_info); + self.functions.add_function(func_info); } fn collect_variables_from_function_body( @@ -204,7 +214,7 @@ impl DebugInfoBuilder { for op in block.body() { // Check if this is a DbgValue operation if let Some(dbg_value) = op.downcast_ref::() - && let Some(var_info) = self.extract_variable_info(dbg_value.variable()) + && let Some(var_info) = self.extract_variable_info(dbg_value.variable().as_value()) { func_info.add_variable(var_info); } @@ -219,7 +229,7 @@ impl DebugInfoBuilder { } fn extract_variable_info(&mut self, var: &DILocalVariable) -> Option { - let name_idx = self.add_string(var.name.as_str()); + let name_idx = self.functions.add_string(Arc::from(var.name.as_str())); // Add type if available let type_idx = if let Some(ref ty) = var.ty { @@ -240,14 +250,18 @@ impl DebugInfoBuilder { Some(var_info) } - /// Builds and returns the final `DebugInfoSection`. - pub fn build(self) -> DebugInfoSection { - self.section + /// Builds and returns the final debug info sections. + pub fn build(self) -> DebugInfoSections { + DebugInfoSections { + types: self.types, + sources: self.sources, + functions: self.functions, + } } /// Returns whether any debug info has been collected. pub fn is_empty(&self) -> bool { - self.section.is_empty() + self.functions.is_empty() && self.types.is_empty() && self.sources.is_empty() } } @@ -284,7 +298,9 @@ fn hir_type_to_debug_type(ty: &Type, builder: &mut DebugInfoBuilder) -> DebugTyp } } // For types we don't have direct mappings for, use Unknown - Type::Struct(_) | Type::List(_) | Type::Function(_) => DebugTypeInfo::Unknown, + Type::Struct(_) | Type::List(_) | Type::Function(_) | Type::Enum(_) => { + DebugTypeInfo::Unknown + } } } @@ -292,22 +308,24 @@ fn hir_type_to_debug_type(ty: &Type, builder: &mut DebugInfoBuilder) -> DebugTyp fn type_to_key(ty: &DebugTypeInfo) -> TypeKey { match ty { DebugTypeInfo::Primitive(p) => TypeKey::Primitive(*p as u8), - DebugTypeInfo::Pointer { pointee_type_idx } => TypeKey::Pointer(*pointee_type_idx), + DebugTypeInfo::Pointer { pointee_type_idx } => { + TypeKey::Pointer(pointee_type_idx.as_u32()) + } DebugTypeInfo::Array { element_type_idx, count, - } => TypeKey::Array(*element_type_idx, *count), + } => TypeKey::Array(element_type_idx.as_u32(), *count), DebugTypeInfo::Unknown => TypeKey::Unknown, // For complex types like structs and functions, we don't deduplicate _ => TypeKey::Unknown, } } -/// Builds a `DebugInfoSection` from an HIR component if debug info is enabled. -pub fn build_debug_info_section( +/// Builds debug info sections from an HIR component if debug info is enabled. +pub fn build_debug_info_sections( component: &builtin::Component, emit_debug_decorators: bool, -) -> Option { +) -> Option { if !emit_debug_decorators { return None; } diff --git a/midenc-compile/src/stages/assemble.rs b/midenc-compile/src/stages/assemble.rs index c05a9cbb2..66daedebc 100644 --- a/midenc-compile/src/stages/assemble.rs +++ b/midenc-compile/src/stages/assemble.rs @@ -93,9 +93,16 @@ fn build_package( sections.push(Section::new(SectionId::ACCOUNT_COMPONENT_METADATA, bytes)); } - if let Some(bytes) = debug_info_bytes { - log::debug!("adding .debug_info section to package ({} bytes)", bytes.len()); - sections.push(Section::new(SectionId::DEBUG_INFO, bytes)); + if let Some((types_bytes, sources_bytes, functions_bytes)) = debug_info_bytes { + log::debug!( + "adding debug sections to package (types={} sources={} functions={} bytes)", + types_bytes.len(), + sources_bytes.len(), + functions_bytes.len(), + ); + sections.push(Section::new(SectionId::DEBUG_TYPES, types_bytes)); + sections.push(Section::new(SectionId::DEBUG_SOURCES, sources_bytes)); + sections.push(Section::new(SectionId::DEBUG_FUNCTIONS, functions_bytes)); } Package { diff --git a/midenc-compile/src/stages/codegen.rs b/midenc-compile/src/stages/codegen.rs index 659d89617..4f920ad22 100644 --- a/midenc-compile/src/stages/codegen.rs +++ b/midenc-compile/src/stages/codegen.rs @@ -20,8 +20,8 @@ pub struct CodegenOutput { pub link_packages: BTreeMap>, /// The serialized AccountComponentMetadata (name, description, storage layout, etc.) pub account_component_metadata_bytes: Option>, - /// The serialized DebugInfoSection for the .debug_info custom section - pub debug_info_bytes: Option>, + /// The serialized debug sections (types, sources, functions) + pub debug_info_bytes: Option<(Vec, Vec, Vec)>, } /// Perform code generation on the possibly-linked output of previous stages @@ -75,18 +75,27 @@ impl Stage for CodegenStage { session.emit(OutputMode::Text, masm_component.as_ref()).into_diagnostic()?; } - // Build debug info section if debug decorators are enabled + // Build debug info sections if debug decorators are enabled let debug_info_bytes = if session.options.emit_debug_decorators() { - use miden_assembly::utils::Serializable; - - log::debug!("collecting debug info for .debug_info section"); - let debug_section = - crate::debug_info::build_debug_info_section(&component.borrow(), true); - debug_section.map(|section| { - let mut bytes = alloc::vec::Vec::new(); - section.write_into(&mut bytes); - log::debug!("built debug_info section: {} bytes", bytes.len()); - bytes + use miden_core::serde::Serializable; + + log::debug!("collecting debug info for debug sections"); + let debug_sections = + crate::debug_info::build_debug_info_sections(&component.borrow(), true); + debug_sections.map(|sections| { + let mut types_bytes = alloc::vec::Vec::new(); + sections.types.write_into(&mut types_bytes); + let mut sources_bytes = alloc::vec::Vec::new(); + sections.sources.write_into(&mut sources_bytes); + let mut functions_bytes = alloc::vec::Vec::new(); + sections.functions.write_into(&mut functions_bytes); + log::debug!( + "built debug sections: types={} sources={} functions={} bytes", + types_bytes.len(), + sources_bytes.len(), + functions_bytes.len(), + ); + (types_bytes, sources_bytes, functions_bytes) }) } else { None diff --git a/tests/integration/expected/debug_conditional_assignment.hir b/tests/integration/expected/debug_conditional_assignment.hir new file mode 100644 index 000000000..8564b3852 --- /dev/null +++ b/tests/integration/expected/debug_conditional_assignment.hir @@ -0,0 +1,36 @@ +builtin.component private @root_ns:root@1.0.0 { + builtin.module private @test_rust_6a38c85fac04c065bce04eac12569a730e53b0bf392ff986bec40561d4ead6b0 { + builtin.function public extern("C") @entrypoint(%0: i32) -> i32 { + hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); + %2 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %3 = arith.constant 1 : i32; + %4 = hir.bitcast %3 <{ ty = #builtin.type }>; + %5 = arith.shl %2, %4; + %6 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %7 = arith.constant 1 : i32; + %8 = arith.add %6, %7 <{ overflow = #builtin.overflow }>; + %9 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %10 = arith.constant 10 : i32; + %11 = hir.bitcast %9 <{ ty = #builtin.type }>; + %12 = hir.bitcast %10 <{ ty = #builtin.type }>; + %13 = arith.gt %11, %12; + %14 = arith.zext %13 <{ ty = #builtin.type }>; + %15 = hir.bitcast %14 <{ ty = #builtin.type }>; + %16 = arith.constant 0 : i32; + %17 = arith.neq %15, %16; + %18 = cf.select %17, %5, %8; + cf.br ^block7:(%18); + ^block7(%1: i32): + builtin.ret %1 : (i32); + }; + builtin.global_variable private @__stack_pointer : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @gv1 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @gv2 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/expected/debug_multiple_locals.hir b/tests/integration/expected/debug_multiple_locals.hir new file mode 100644 index 000000000..a79c4d40e --- /dev/null +++ b/tests/integration/expected/debug_multiple_locals.hir @@ -0,0 +1,27 @@ +builtin.component private @root_ns:root@1.0.0 { + builtin.module private @test_rust_4a512383b70b366e158ab1775d5a9229476df2d2f66794836c3f1deff13c59ae { + builtin.function public extern("C") @entrypoint(%0: i32) -> i32 { + hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); + %2 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %3 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %4 = arith.constant 1 : i32; + %5 = hir.bitcast %4 <{ ty = #builtin.type }>; + %6 = arith.shl %3, %5; + %7 = arith.add %2, %6 <{ overflow = #builtin.overflow }>; + %8 = arith.constant 1 : i32; + %9 = arith.add %7, %8 <{ overflow = #builtin.overflow }>; + cf.br ^block7:(%9); + ^block7(%1: i32): + builtin.ret %1 : (i32); + }; + builtin.global_variable private @__stack_pointer : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @gv1 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @gv2 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/expected/debug_nested_loops.hir b/tests/integration/expected/debug_nested_loops.hir new file mode 100644 index 000000000..688505a36 --- /dev/null +++ b/tests/integration/expected/debug_nested_loops.hir @@ -0,0 +1,36 @@ +builtin.component private @root_ns:root@1.0.0 { + builtin.module private @test_rust_0c3ae86ec9024ba37eabb5e0f732abe49b77e739f7696fe0d9cb0b5d1f72d1c1 { + builtin.function public extern("C") @entrypoint(%0: i32) -> i32 { + hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); + %2 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %3 = arith.constant -1 : i32; + %4 = arith.add %2, %3 <{ overflow = #builtin.overflow }>; + %5 = hir.bitcast %4 <{ ty = #builtin.type }>; + %6 = arith.zext %5 <{ ty = #builtin.type }>; + %7 = hir.bitcast %6 <{ ty = #builtin.type }>; + %8 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %9 = hir.bitcast %8 <{ ty = #builtin.type }>; + %10 = arith.zext %9 <{ ty = #builtin.type }>; + %11 = hir.bitcast %10 <{ ty = #builtin.type }>; + %12 = arith.mul %7, %11 <{ overflow = #builtin.overflow }>; + %13 = arith.constant 1 : i64; + %14 = hir.bitcast %12 <{ ty = #builtin.type }>; + %15 = hir.cast %13 <{ ty = #builtin.type }>; + %16 = arith.shr %14, %15; + %17 = hir.bitcast %16 <{ ty = #builtin.type }>; + %18 = arith.trunc %17 <{ ty = #builtin.type }>; + cf.br ^block7:(%18); + ^block7(%1: i32): + builtin.ret %1 : (i32); + }; + builtin.global_variable private @__stack_pointer : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @gv1 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @gv2 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/expected/debug_simple_params.hir b/tests/integration/expected/debug_simple_params.hir new file mode 100644 index 000000000..d7bfedd45 --- /dev/null +++ b/tests/integration/expected/debug_simple_params.hir @@ -0,0 +1,23 @@ +builtin.component private @root_ns:root@1.0.0 { + builtin.module private @test_rust_f87bb1f3934b1e6844c1c9b6ceccb97d68349d13274693606c56673a28b6e537 { + builtin.function public extern("C") @entrypoint(%0: i32, %1: i32) -> i32 { + hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); + hir.store_local %1 <{ local = #builtin.local_variable<1, i32> }> : (i32); + %3 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %4 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %5 = arith.add %3, %4 <{ overflow = #builtin.overflow }>; + cf.br ^block7:(%5); + ^block7(%2: i32): + builtin.ret %2 : (i32); + }; + builtin.global_variable private @__stack_pointer : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @gv1 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + builtin.global_variable public @gv2 : i32 { + builtin.ret_imm #builtin.number<1048576>; + }; + }; +}; \ No newline at end of file diff --git a/tests/integration/expected/debug_variable_locations.hir b/tests/integration/expected/debug_variable_locations.hir index 2711397c4..d8629ee01 100644 --- a/tests/integration/expected/debug_variable_locations.hir +++ b/tests/integration/expected/debug_variable_locations.hir @@ -1,47 +1,49 @@ -builtin.component root_ns:root@1.0.0 { - builtin.module public @test_rust_abc71f0181dc4e2804cd5c8e0cf50175a2515fac3096a395915f9b0604eb287b { - public builtin.function @entrypoint(v0: i32) -> i32 { - ^block6(v0: i32): - v2 = arith.constant 0 : i32; - v3 = arith.constant 0 : i32; - debuginfo.debug_value v3 #[expression = di.expression()] #[variable = di.local_variable(name = local1, file = unknown, line = 0, ty = i32)]; - v4 = arith.constant 0 : i32; - debuginfo.debug_value v4 #[expression = di.expression()] #[variable = di.local_variable(name = local2, file = unknown, line = 0, ty = i32)]; - cf.br ^block8(v3, v0, v4); - ^block7(v1: i32): +builtin.component private @root_ns:root@1.0.0 { + builtin.module private @test_rust_abc71f0181dc4e2804cd5c8e0cf50175a2515fac3096a395915f9b0604eb287b { + builtin.function public extern("C") @entrypoint(%0: i32) -> i32 { + hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); + %2 = arith.constant 0 : i32; + hir.store_local %2 <{ local = #builtin.local_variable<1, i32> }> : (i32); + %3 = arith.constant 0 : i32; + hir.store_local %3 <{ local = #builtin.local_variable<2, i32> }> : (i32); + cf.br ^block8; + ^block7(%1: i32): - ^block8(v6: i32, v7: i32, v15: i32): - v8 = hir.bitcast v6 : u32; - v9 = hir.bitcast v7 : u32; - v10 = arith.lte v8, v9 : i1; - v11 = arith.zext v10 : u32; - v12 = hir.bitcast v11 : i32; - v13 = arith.constant 0 : i32; - v14 = arith.neq v12, v13 : i1; - cf.cond_br v14 ^block10, ^block11; - ^block9(v5: i32): + ^block8: + %5 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %6 = hir.load_local <{ local = #builtin.local_variable<0, i32> }>; + %7 = hir.bitcast %5 <{ ty = #builtin.type }>; + %8 = hir.bitcast %6 <{ ty = #builtin.type }>; + %9 = arith.lte %7, %8; + %10 = arith.zext %9 <{ ty = #builtin.type }>; + %11 = hir.bitcast %10 <{ ty = #builtin.type }>; + %12 = arith.constant 0 : i32; + %13 = arith.neq %11, %12; + cf.cond_br %13 ^block10, ^block11 : (i1); + ^block9(%4: i32): ^block10: - v16 = arith.add v15, v6 : i32 #[overflow = wrapping]; - debuginfo.debug_value v16 #[expression = di.expression()] #[variable = di.local_variable(name = local2, file = unknown, line = 0, ty = i32)]; - v17 = arith.constant 1 : i32; - v18 = arith.add v6, v17 : i32 #[overflow = wrapping]; - debuginfo.debug_value v18 #[expression = di.expression()] #[variable = di.local_variable(name = local1, file = unknown, line = 0, ty = i32)]; - cf.br ^block8(v18, v7, v16); + %15 = hir.load_local <{ local = #builtin.local_variable<2, i32> }>; + %16 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %17 = arith.add %15, %16 <{ overflow = #builtin.overflow }>; + hir.store_local %17 <{ local = #builtin.local_variable<2, i32> }> : (i32); + %18 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; + %19 = arith.constant 1 : i32; + %20 = arith.add %18, %19 <{ overflow = #builtin.overflow }>; + hir.store_local %20 <{ local = #builtin.local_variable<1, i32> }> : (i32); + cf.br ^block8; ^block11: - builtin.ret v15; + %14 = hir.load_local <{ local = #builtin.local_variable<2, i32> }>; + builtin.ret %14 : (i32); }; - - builtin.global_variable private @#__stack_pointer : i32 { - builtin.ret_imm 1048576; + builtin.global_variable private @__stack_pointer : i32 { + builtin.ret_imm #builtin.number<1048576>; }; - - builtin.global_variable public @#gv1 : i32 { - builtin.ret_imm 1048576; + builtin.global_variable public @gv1 : i32 { + builtin.ret_imm #builtin.number<1048576>; }; - - builtin.global_variable public @#gv2 : i32 { - builtin.ret_imm 1048576; + builtin.global_variable public @gv2 : i32 { + builtin.ret_imm #builtin.number<1048576>; }; }; }; \ No newline at end of file diff --git a/tests/integration/src/rust_masm_tests/debug.rs b/tests/integration/src/rust_masm_tests/debug.rs index 4676a3d07..0999e420e 100644 --- a/tests/integration/src/rust_masm_tests/debug.rs +++ b/tests/integration/src/rust_masm_tests/debug.rs @@ -25,3 +25,81 @@ fn variable_locations_schedule() { let mut test = builder.build(); test.expect_ir_unoptimized(expect_file!["../../expected/debug_variable_locations.hir"]); } + +#[test] +fn debug_simple_params() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (a: u32, b: u32) -> u32 { + a + b + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["../../expected/debug_simple_params.hir"]); +} + +#[test] +fn debug_conditional_assignment() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (x: u32) -> u32 { + let result = if x > 10 { x * 2 } else { x + 1 }; + result + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["../../expected/debug_conditional_assignment.hir"]); +} + +#[test] +fn debug_multiple_locals() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (n: u32) -> u32 { + let a: u32 = n + 1; + let b: u32 = n * 2; + let c: u32 = a + b; + c + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["../../expected/debug_multiple_locals.hir"]); +} + +#[test] +fn debug_nested_loops() { + setup::enable_compiler_instrumentation(); + + let source = r#" + (n: u32) -> u32 { + let mut total = 0u32; + let mut i = 0u32; + while i < n { + let mut j = 0u32; + while j < i { + total += 1; + j += 1; + } + i += 1; + } + total + } + "#; + + let mut builder = CompilerTestBuilder::rust_fn_body(source, []); + builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + let mut test = builder.build(); + test.expect_ir_unoptimized(expect_file!["../../expected/debug_nested_loops.hir"]); +} diff --git a/tools/debugdump/src/main.rs b/tools/debugdump/src/main.rs index 6fd896156..08f1e9faa 100644 --- a/tools/debugdump/src/main.rs +++ b/tools/debugdump/src/main.rs @@ -13,14 +13,15 @@ use std::{ use clap::{Parser, ValueEnum}; use miden_core::{ - Decorator, - utils::{Deserializable, SliceReader}, + mast::MastForest, + operations::DebugVarInfo, + serde::{Deserializable, SliceReader}, }; use miden_mast_package::{ - MastForest, Package, SectionId, + Package, SectionId, debug_info::{ - DebugFileInfo, DebugFunctionInfo, DebugInfoSection, DebugPrimitiveType, DebugTypeInfo, - DebugVariableInfo, + DebugFileInfo, DebugFunctionInfo, DebugFunctionsSection, DebugPrimitiveType, + DebugSourcesSection, DebugTypeIdx, DebugTypeInfo, DebugTypesSection, DebugVariableInfo, }, }; @@ -34,6 +35,40 @@ enum Error { NoDebugInfo, } +/// Holds the three debug info sections with helper accessors. +struct DebugSections { + types: DebugTypesSection, + sources: DebugSourcesSection, + functions: DebugFunctionsSection, +} + +impl DebugSections { + /// Look up a string in the types section's string table. + fn get_type_string(&self, idx: u32) -> Option { + self.types.get_string(idx).map(|s| s.to_string()) + } + + /// Look up a string in the sources section's string table. + fn get_source_string(&self, idx: u32) -> Option { + self.sources.get_string(idx).map(|s| s.to_string()) + } + + /// Look up a string in the functions section's string table. + fn get_func_string(&self, idx: u32) -> Option { + self.functions.get_string(idx).map(|s| s.to_string()) + } + + /// Look up a type by index. + fn get_type(&self, idx: DebugTypeIdx) -> Option<&DebugTypeInfo> { + self.types.get_type(idx) + } + + /// Look up a file by index. + fn get_file(&self, idx: u32) -> Option<&DebugFileInfo> { + self.sources.get_file(idx) + } +} + /// A tool to dump debug information from MASP packages #[derive(Parser, Debug)] #[command( @@ -97,22 +132,49 @@ fn run() -> Result<(), Error> { reader.read_to_end(&mut bytes)?; // Parse the package - let package = Package::read_from(&mut SliceReader::new(&bytes)) + let package: Package = Package::read_from(&mut SliceReader::new(&bytes)) .map_err(|e| Error::Parse(e.to_string()))?; // Get the MAST forest for location decorators let mast_forest = package.mast.mast_forest(); - // Find the debug_info section - let debug_section = package + // Find the three debug sections + let types_section = package + .sections + .iter() + .find(|s| s.id == SectionId::DEBUG_TYPES); + let sources_section = package .sections .iter() - .find(|s| s.id == SectionId::DEBUG_INFO) - .ok_or(Error::NoDebugInfo)?; + .find(|s| s.id == SectionId::DEBUG_SOURCES); + let functions_section = package + .sections + .iter() + .find(|s| s.id == SectionId::DEBUG_FUNCTIONS); - // Parse the debug info - let debug_info = DebugInfoSection::read_from(&mut SliceReader::new(&debug_section.data)) - .map_err(|e| Error::Parse(e.to_string()))?; + // We need at least one section to proceed + if types_section.is_none() && sources_section.is_none() && functions_section.is_none() { + return Err(Error::NoDebugInfo); + } + + // Parse each section (use empty defaults if missing) + let types: DebugTypesSection = match types_section { + Some(s) => DebugTypesSection::read_from(&mut SliceReader::new(&s.data)) + .map_err(|e| Error::Parse(e.to_string()))?, + None => DebugTypesSection::new(), + }; + let sources: DebugSourcesSection = match sources_section { + Some(s) => DebugSourcesSection::read_from(&mut SliceReader::new(&s.data)) + .map_err(|e| Error::Parse(e.to_string()))?, + None => DebugSourcesSection::new(), + }; + let functions: DebugFunctionsSection = match functions_section { + Some(s) => DebugFunctionsSection::read_from(&mut SliceReader::new(&s.data)) + .map_err(|e| Error::Parse(e.to_string()))?, + None => DebugFunctionsSection::new(), + }; + + let debug_sections = DebugSections { types, sources, functions }; // Print header println!("{}", "=".repeat(80)); @@ -126,96 +188,131 @@ fn run() -> Result<(), Error> { .map(|v| v.to_string()) .unwrap_or_else(|| "unknown".into()) ); - println!("Debug info version: {}", debug_info.version); + println!( + "Debug info versions: types={}, sources={}, functions={}", + debug_sections.types.version, + debug_sections.sources.version, + debug_sections.functions.version, + ); println!("{}", "=".repeat(80)); println!(); if cli.summary { - print_summary(&debug_info, mast_forest); + print_summary(&debug_sections, mast_forest); return Ok(()); } match cli.section { - Some(DumpSection::Strings) => print_strings(&debug_info), - Some(DumpSection::Types) => print_types(&debug_info, cli.raw), - Some(DumpSection::Files) => print_files(&debug_info, cli.raw), - Some(DumpSection::Functions) => print_functions(&debug_info, cli.raw, cli.verbose), - Some(DumpSection::Variables) => print_variables(&debug_info, cli.raw), - Some(DumpSection::Locations) => print_locations(mast_forest, &debug_info, cli.verbose), + Some(DumpSection::Strings) => print_strings(&debug_sections), + Some(DumpSection::Types) => print_types(&debug_sections, cli.raw), + Some(DumpSection::Files) => print_files(&debug_sections, cli.raw), + Some(DumpSection::Functions) => print_functions(&debug_sections, cli.raw, cli.verbose), + Some(DumpSection::Variables) => print_variables(&debug_sections, cli.raw), + Some(DumpSection::Locations) => print_locations(mast_forest, &debug_sections, cli.verbose), None => { // Print everything - print_summary(&debug_info, mast_forest); + print_summary(&debug_sections, mast_forest); println!(); - print_strings(&debug_info); + print_strings(&debug_sections); println!(); - print_types(&debug_info, cli.raw); + print_types(&debug_sections, cli.raw); println!(); - print_files(&debug_info, cli.raw); + print_files(&debug_sections, cli.raw); println!(); - print_functions(&debug_info, cli.raw, cli.verbose); + print_functions(&debug_sections, cli.raw, cli.verbose); println!(); - print_locations(mast_forest, &debug_info, cli.verbose); + print_locations(mast_forest, &debug_sections, cli.verbose); } } Ok(()) } -fn print_summary(debug_info: &DebugInfoSection, mast_forest: &MastForest) { +fn print_summary(debug_sections: &DebugSections, mast_forest: &MastForest) { println!(".debug_info summary:"); - println!(" Strings: {} entries", debug_info.strings.len()); - println!(" Types: {} entries", debug_info.types.len()); - println!(" Files: {} entries", debug_info.files.len()); - println!(" Functions: {} entries", debug_info.functions.len()); + println!( + " Strings: {} (types) + {} (sources) + {} (functions)", + debug_sections.types.strings.len(), + debug_sections.sources.strings.len(), + debug_sections.functions.strings.len(), + ); + println!(" Types: {} entries", debug_sections.types.types.len()); + println!(" Files: {} entries", debug_sections.sources.files.len()); + println!( + " Functions: {} entries", + debug_sections.functions.functions.len() + ); - let total_vars: usize = debug_info.functions.iter().map(|f| f.variables.len()).sum(); - let total_inlined: usize = debug_info.functions.iter().map(|f| f.inlined_calls.len()).sum(); + let total_vars: usize = debug_sections + .functions + .functions + .iter() + .map(|f| f.variables.len()) + .sum(); + let total_inlined: usize = debug_sections + .functions + .functions + .iter() + .map(|f| f.inlined_calls.len()) + .sum(); println!(" Variables: {} total (across all functions)", total_vars); println!(" Inlined: {} call sites", total_inlined); - // Count DebugVar decorators in MAST - let debug_var_count = mast_forest - .decorators() - .iter() - .filter(|d| matches!(d, Decorator::DebugVar(_))) - .count(); - println!(" DebugVar decorators: {} in MAST", debug_var_count); + // Count debug vars in MAST + let debug_var_count = mast_forest.debug_info().debug_vars().len(); + println!(" DebugVar entries: {} in MAST", debug_var_count); } -fn print_strings(debug_info: &DebugInfoSection) { +fn print_strings(debug_sections: &DebugSections) { println!(".debug_str contents:"); println!("{:-<80}", ""); - for (idx, s) in debug_info.strings.iter().enumerate() { + + println!(" [types string table]"); + for (idx, s) in debug_sections.types.strings.iter().enumerate() { + println!(" [{:4}] \"{}\"", idx, s); + } + println!(); + println!(" [sources string table]"); + for (idx, s) in debug_sections.sources.strings.iter().enumerate() { + println!(" [{:4}] \"{}\"", idx, s); + } + println!(); + println!(" [functions string table]"); + for (idx, s) in debug_sections.functions.strings.iter().enumerate() { println!(" [{:4}] \"{}\"", idx, s); } } -fn print_types(debug_info: &DebugInfoSection, raw: bool) { +fn print_types(debug_sections: &DebugSections, raw: bool) { println!(".debug_types contents:"); println!("{:-<80}", ""); - for (idx, ty) in debug_info.types.iter().enumerate() { + for (idx, ty) in debug_sections.types.types.iter().enumerate() { print!(" [{:4}] ", idx); - print_type(ty, debug_info, raw, 0); + print_type(ty, debug_sections, raw, 0); println!(); } } -fn print_type(ty: &DebugTypeInfo, debug_info: &DebugInfoSection, raw: bool, indent: usize) { +fn print_type(ty: &DebugTypeInfo, debug_sections: &DebugSections, raw: bool, indent: usize) { let pad = " ".repeat(indent); match ty { DebugTypeInfo::Primitive(prim) => { print!("{}PRIMITIVE: {}", pad, primitive_name(*prim)); - print!(" (size: {} bytes, {} felts)", prim.size_in_bytes(), prim.size_in_felts()); + print!( + " (size: {} bytes, {} felts)", + prim.size_in_bytes(), + prim.size_in_felts() + ); } DebugTypeInfo::Pointer { pointee_type_idx } => { if raw { - print!("{}POINTER -> type[{}]", pad, pointee_type_idx); + print!("{}POINTER -> type[{}]", pad, pointee_type_idx.as_u32()); } else { print!("{}POINTER -> ", pad); - if let Some(pointee) = debug_info.get_type(*pointee_type_idx) { - print_type_brief(pointee, debug_info); + if let Some(pointee) = debug_sections.get_type(*pointee_type_idx) { + print_type_brief(pointee, debug_sections); } else { - print!("", pointee_type_idx); + print!("", pointee_type_idx.as_u32()); } } } @@ -224,11 +321,16 @@ fn print_type(ty: &DebugTypeInfo, debug_info: &DebugInfoSection, raw: bool, inde count, } => { if raw { - print!("{}ARRAY [{}; {:?}]", pad, element_type_idx, count); + print!( + "{}ARRAY [{}; {:?}]", + pad, + element_type_idx.as_u32(), + count + ); } else { print!("{}ARRAY [", pad); - if let Some(elem) = debug_info.get_type(*element_type_idx) { - print_type_brief(elem, debug_info); + if let Some(elem) = debug_sections.get_type(*element_type_idx) { + print_type_brief(elem, debug_sections); } else { print!(""); } @@ -246,7 +348,9 @@ fn print_type(ty: &DebugTypeInfo, debug_info: &DebugInfoSection, raw: bool, inde let name = if raw { format!("str[{}]", name_idx) } else { - debug_info.get_string(*name_idx).unwrap_or("").to_string() + debug_sections + .get_type_string(*name_idx) + .unwrap_or_else(|| "".into()) }; print!("{}STRUCT {} (size: {} bytes)", pad, name, size); if !fields.is_empty() { @@ -255,11 +359,13 @@ fn print_type(ty: &DebugTypeInfo, debug_info: &DebugInfoSection, raw: bool, inde let field_name = if raw { format!("str[{}]", field.name_idx) } else { - debug_info.get_string(field.name_idx).unwrap_or("").to_string() + debug_sections + .get_type_string(field.name_idx) + .unwrap_or_else(|| "".into()) }; print!("{} +{:4}: {} : ", pad, field.offset, field_name); - if let Some(fty) = debug_info.get_type(field.type_idx) { - print_type_brief(fty, debug_info); + if let Some(fty) = debug_sections.get_type(field.type_idx) { + print_type_brief(fty, debug_sections); } else { print!(""); } @@ -277,9 +383,9 @@ fn print_type(ty: &DebugTypeInfo, debug_info: &DebugInfoSection, raw: bool, inde print!(", "); } if raw { - print!("type[{}]", param_idx); - } else if let Some(pty) = debug_info.get_type(*param_idx) { - print_type_brief(pty, debug_info); + print!("type[{}]", param_idx.as_u32()); + } else if let Some(pty) = debug_sections.get_type(*param_idx) { + print_type_brief(pty, debug_sections); } else { print!(""); } @@ -288,9 +394,9 @@ fn print_type(ty: &DebugTypeInfo, debug_info: &DebugInfoSection, raw: bool, inde match return_type_idx { Some(idx) => { if raw { - print!("type[{}]", idx); - } else if let Some(rty) = debug_info.get_type(*idx) { - print_type_brief(rty, debug_info); + print!("type[{}]", idx.as_u32()); + } else if let Some(rty) = debug_sections.get_type(*idx) { + print_type_brief(rty, debug_sections); } else { print!(""); } @@ -304,13 +410,13 @@ fn print_type(ty: &DebugTypeInfo, debug_info: &DebugInfoSection, raw: bool, inde } } -fn print_type_brief(ty: &DebugTypeInfo, debug_info: &DebugInfoSection) { +fn print_type_brief(ty: &DebugTypeInfo, debug_sections: &DebugSections) { match ty { DebugTypeInfo::Primitive(prim) => print!("{}", primitive_name(*prim)), DebugTypeInfo::Pointer { pointee_type_idx } => { print!("*"); - if let Some(p) = debug_info.get_type(*pointee_type_idx) { - print_type_brief(p, debug_info); + if let Some(p) = debug_sections.get_type(*pointee_type_idx) { + print_type_brief(p, debug_sections); } } DebugTypeInfo::Array { @@ -318,8 +424,8 @@ fn print_type_brief(ty: &DebugTypeInfo, debug_info: &DebugInfoSection) { count, } => { print!("["); - if let Some(e) = debug_info.get_type(*element_type_idx) { - print_type_brief(e, debug_info); + if let Some(e) = debug_sections.get_type(*element_type_idx) { + print_type_brief(e, debug_sections); } match count { Some(n) => print!("; {}]", n), @@ -327,7 +433,12 @@ fn print_type_brief(ty: &DebugTypeInfo, debug_info: &DebugInfoSection) { } } DebugTypeInfo::Struct { name_idx, .. } => { - print!("struct {}", debug_info.get_string(*name_idx).unwrap_or("?")); + print!( + "struct {}", + debug_sections + .get_type_string(*name_idx) + .unwrap_or_else(|| "?".into()) + ); } DebugTypeInfo::Function { .. } => print!("fn(...)"), DebugTypeInfo::Unknown => print!("?"), @@ -355,19 +466,21 @@ fn primitive_name(prim: DebugPrimitiveType) -> &'static str { } } -fn print_files(debug_info: &DebugInfoSection, raw: bool) { +fn print_files(debug_sections: &DebugSections, raw: bool) { println!(".debug_files contents:"); println!("{:-<80}", ""); - for (idx, file) in debug_info.files.iter().enumerate() { - print_file(idx, file, debug_info, raw); + for (idx, file) in debug_sections.sources.files.iter().enumerate() { + print_file(idx, file, debug_sections, raw); } } -fn print_file(idx: usize, file: &DebugFileInfo, debug_info: &DebugInfoSection, raw: bool) { +fn print_file(idx: usize, file: &DebugFileInfo, debug_sections: &DebugSections, raw: bool) { let path = if raw { format!("str[{}]", file.path_idx) } else { - debug_info.get_string(file.path_idx).unwrap_or("").to_string() + debug_sections + .get_source_string(file.path_idx) + .unwrap_or_else(|| "".into()) }; print!(" [{:4}] {}", idx, path); @@ -383,11 +496,11 @@ fn print_file(idx: usize, file: &DebugFileInfo, debug_info: &DebugInfoSection, r println!(); } -fn print_functions(debug_info: &DebugInfoSection, raw: bool, verbose: bool) { +fn print_functions(debug_sections: &DebugSections, raw: bool, verbose: bool) { println!(".debug_functions contents:"); println!("{:-<80}", ""); - for (idx, func) in debug_info.functions.iter().enumerate() { - print_function(idx, func, debug_info, raw, verbose); + for (idx, func) in debug_sections.functions.functions.iter().enumerate() { + print_function(idx, func, debug_sections, raw, verbose); println!(); } } @@ -395,14 +508,16 @@ fn print_functions(debug_info: &DebugInfoSection, raw: bool, verbose: bool) { fn print_function( idx: usize, func: &DebugFunctionInfo, - debug_info: &DebugInfoSection, + debug_sections: &DebugSections, raw: bool, verbose: bool, ) { let name = if raw { format!("str[{}]", func.name_idx) } else { - debug_info.get_string(func.name_idx).unwrap_or("").to_string() + debug_sections + .get_func_string(func.name_idx) + .unwrap_or_else(|| "".into()) }; println!(" [{:4}] FUNCTION: {}", idx, name); @@ -412,7 +527,9 @@ fn print_function( let linkage = if raw { format!("str[{}]", linkage_idx) } else { - debug_info.get_string(linkage_idx).unwrap_or("").to_string() + debug_sections + .get_func_string(linkage_idx) + .unwrap_or_else(|| "".into()) }; println!(" Linkage name: {}", linkage); } @@ -421,21 +538,23 @@ fn print_function( let file_path = if raw { format!("file[{}]", func.file_idx) } else { - debug_info + debug_sections .get_file(func.file_idx) - .and_then(|f| debug_info.get_string(f.path_idx)) - .unwrap_or("") - .to_string() + .and_then(|f| debug_sections.get_source_string(f.path_idx)) + .unwrap_or_else(|| "".into()) }; - println!(" Location: {}:{}:{}", file_path, func.line, func.column); + println!( + " Location: {}:{}:{}", + file_path, func.line, func.column + ); // Type if let Some(type_idx) = func.type_idx { print!(" Type: "); if raw { - println!("type[{}]", type_idx); - } else if let Some(ty) = debug_info.get_type(type_idx) { - print_type_brief(ty, debug_info); + println!("type[{}]", type_idx.as_u32()); + } else if let Some(ty) = debug_sections.get_type(type_idx) { + print_type_brief(ty, debug_sections); println!(); } else { println!(""); @@ -445,7 +564,7 @@ fn print_function( // MAST root if let Some(root) = &func.mast_root { print!(" MAST root: 0x"); - for byte in root { + for byte in &root.as_bytes() { print!("{:02x}", byte); } println!(); @@ -455,32 +574,34 @@ fn print_function( if !func.variables.is_empty() { println!(" Variables ({}):", func.variables.len()); for var in &func.variables { - print_variable(var, debug_info, raw, verbose); + print_variable(var, debug_sections, raw, verbose); } } // Inlined calls if !func.inlined_calls.is_empty() && verbose { - println!(" Inlined calls ({}):", func.inlined_calls.len()); + println!( + " Inlined calls ({}):", + func.inlined_calls.len() + ); for call in &func.inlined_calls { let callee = if raw { format!("func[{}]", call.callee_idx) } else { - debug_info + debug_sections + .functions .functions .get(call.callee_idx as usize) - .and_then(|f| debug_info.get_string(f.name_idx)) - .unwrap_or("") - .to_string() + .and_then(|f| debug_sections.get_func_string(f.name_idx)) + .unwrap_or_else(|| "".into()) }; let call_file = if raw { format!("file[{}]", call.file_idx) } else { - debug_info + debug_sections .get_file(call.file_idx) - .and_then(|f| debug_info.get_string(f.path_idx)) - .unwrap_or("") - .to_string() + .and_then(|f| debug_sections.get_source_string(f.path_idx)) + .unwrap_or_else(|| "".into()) }; println!( " - {} inlined at {}:{}:{}", @@ -492,14 +613,16 @@ fn print_function( fn print_variable( var: &DebugVariableInfo, - debug_info: &DebugInfoSection, + debug_sections: &DebugSections, raw: bool, _verbose: bool, ) { let name = if raw { format!("str[{}]", var.name_idx) } else { - debug_info.get_string(var.name_idx).unwrap_or("").to_string() + debug_sections + .get_func_string(var.name_idx) + .unwrap_or_else(|| "".into()) }; let kind = if var.is_parameter() { @@ -511,9 +634,9 @@ fn print_variable( print!(" - {} ({}): ", name, kind); if raw { - print!("type[{}]", var.type_idx); - } else if let Some(ty) = debug_info.get_type(var.type_idx) { - print_type_brief(ty, debug_info); + print!("type[{}]", var.type_idx.as_u32()); + } else if let Some(ty) = debug_sections.get_type(var.type_idx) { + print_type_brief(ty, debug_sections); } else { print!(""); } @@ -527,59 +650,50 @@ fn print_variable( println!(); } -fn print_variables(debug_info: &DebugInfoSection, raw: bool) { +fn print_variables(debug_sections: &DebugSections, raw: bool) { println!(".debug_variables contents (all functions):"); println!("{:-<80}", ""); - for func in &debug_info.functions { + for func in &debug_sections.functions.functions { if func.variables.is_empty() { continue; } - let func_name = debug_info.get_string(func.name_idx).unwrap_or(""); + let func_name = debug_sections + .get_func_string(func.name_idx) + .unwrap_or_else(|| "".into()); println!(" Function: {}", func_name); for var in &func.variables { - print_variable(var, debug_info, raw, false); + print_variable(var, debug_sections, raw, false); } println!(); } } -/// Prints the .debug_loc section - variable location decorators from MAST +/// Prints the .debug_loc section - variable location entries from MAST /// /// This is analogous to DWARF's .debug_loc section which contains location /// lists describing where a variable's value can be found at runtime. -fn print_locations(mast_forest: &MastForest, debug_info: &DebugInfoSection, verbose: bool) { - println!(".debug_loc contents (DebugVar decorators from MAST):"); +fn print_locations(mast_forest: &MastForest, debug_sections: &DebugSections, verbose: bool) { + println!(".debug_loc contents (DebugVar entries from MAST):"); println!("{:-<80}", ""); - // Collect all DebugVar decorators - let debug_vars: Vec<_> = mast_forest - .decorators() - .iter() - .enumerate() - .filter_map(|(idx, dec)| { - if let Decorator::DebugVar(info) = dec { - Some((idx, info)) - } else { - None - } - }) - .collect(); + // Collect all debug vars from the MastForest + let debug_vars = mast_forest.debug_info().debug_vars(); if debug_vars.is_empty() { - println!(" (no DebugVar decorators found)"); + println!(" (no DebugVar entries found)"); return; } // Group by variable name for a cleaner view - let mut by_name: BTreeMap<&str, Vec<(usize, &miden_core::DebugVarInfo)>> = BTreeMap::new(); - for (idx, info) in &debug_vars { - by_name.entry(info.name()).or_default().push((*idx, *info)); + let mut by_name: BTreeMap<&str, Vec<(usize, &DebugVarInfo)>> = BTreeMap::new(); + for (idx, info) in debug_vars.iter().enumerate() { + by_name.entry(info.name()).or_default().push((idx, info)); } - println!(" Total DebugVar decorators: {}", debug_vars.len()); + println!(" Total DebugVar entries: {}", debug_vars.len()); println!(" Unique variable names: {}", by_name.len()); println!(); @@ -587,8 +701,8 @@ fn print_locations(mast_forest: &MastForest, debug_info: &DebugInfoSection, verb println!(" Variable: \"{}\"", name); println!(" {} location entries:", entries.len()); - for (decorator_idx, info) in entries { - print!(" [dec#{}] ", decorator_idx); + for (var_idx, info) in entries { + print!(" [var#{}] ", var_idx); // Print value location print!("{}", info.value_location()); @@ -600,9 +714,10 @@ fn print_locations(mast_forest: &MastForest, debug_info: &DebugInfoSection, verb // Print type info if present and we can resolve it if let Some(type_id) = info.type_id() { - if let Some(ty) = debug_info.get_type(type_id) { + let type_idx = DebugTypeIdx::from(type_id); + if let Some(ty) = debug_sections.get_type(type_idx) { print!(" : "); - print_type_brief(ty, debug_info); + print_type_brief(ty, debug_sections); } else { print!(" : type[{}]", type_id); } @@ -618,11 +733,11 @@ fn print_locations(mast_forest: &MastForest, debug_info: &DebugInfoSection, verb println!(); } - // In verbose mode, also show raw decorator list + // In verbose mode, also show raw list if verbose { - println!(" Raw decorator list (in order):"); + println!(" Raw debug var list (in order):"); println!(" {:-<76}", ""); - for (idx, info) in &debug_vars { + for (idx, info) in debug_vars.iter().enumerate() { println!(" [{:4}] {}", idx, info); } } From a57c49931be4562197aa98af7ea87db1bfd0d955 Mon Sep 17 00:00:00 2001 From: djole Date: Tue, 31 Mar 2026 13:08:55 +0200 Subject: [PATCH 08/32] =?UTF-8?q?fix:=20rebase=20recovery=20=E2=80=94=20en?= =?UTF-8?q?tity=20alignment?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix data_offset_align and raw_entity_metadata_layout_for_value_layout to correctly handle types with alignment > 8 --- codegen/masm/src/lower/component.rs | 37 +++++++++---------- codegen/masm/src/lower/lowering.rs | 14 ++----- frontend/wasm/src/code_translator/mod.rs | 2 + frontend/wasm/src/module/func_translator.rs | 1 + .../wasm/src/module/function_builder_ext.rs | 23 +++++------- hir-macros/src/operation.rs | 1 - .../expected/debug_variable_locations.hir | 4 ++ 7 files changed, 37 insertions(+), 45 deletions(-) diff --git a/codegen/masm/src/lower/component.rs b/codegen/masm/src/lower/component.rs index c7d419946..126d1eb17 100644 --- a/codegen/masm/src/lower/component.rs +++ b/codegen/masm/src/lower/component.rs @@ -647,22 +647,18 @@ impl MasmFunctionBuilder { num_locals, } = self; - // Compute total WASM locals count for FMP offset calculation. - // WASM locals = params (in felts) + local variables (in felts). - // This is needed because DWARF's WasmLocal(idx) uses WASM indexing where - // params come first, while num_locals only counts HIR spilled values. - let num_params_in_felts: u16 = function - .signature() - .params - .iter() - .map(|p| p.ty.size_in_felts() as u16) - .sum(); - let num_wasm_locals = num_params_in_felts + num_locals; + // Align num_locals to WORD_SIZE, matching the assembler's FMP frame sizing. + // num_locals already counts all HIR locals (including those allocated for params). + // The assembler rounds up to next_multiple_of(WORD_SIZE) when advancing FMP + // (see fmp.rs fmp_start_frame_sequence and mem_ops.rs locaddr), so we must use + // the same alignment for debug var offset computation. + let aligned_num_locals = num_locals.next_multiple_of(miden_core::WORD_SIZE as u16); // Patch DebugVar Local locations to compute FMP offset. // During lowering, Local(idx) stores the raw WASM local index. - // Now convert to FMP offset: idx - num_wasm_locals - patch_debug_var_locals_in_block(&mut body, num_wasm_locals); + // Now convert to FMP offset: idx - aligned_num_locals + // This matches locaddr.N which computes -(aligned_num_locals - N). + patch_debug_var_locals_in_block(&mut body, aligned_num_locals); // Strip DebugVar-only procedure bodies. // The Miden assembler rejects procedures whose bodies contain only decorators @@ -707,8 +703,9 @@ fn block_has_real_instructions(block: &masm::Block) -> bool { /// Recursively patch DebugVar Local locations in a block. /// /// Converts `Local(idx)` where idx is the raw WASM local index to `Local(offset)` -/// where offset = idx - num_locals (the FMP offset, typically negative). -fn patch_debug_var_locals_in_block(block: &mut masm::Block, num_locals: u16) { +/// where offset = idx - aligned_num_locals (the FMP-relative offset, typically negative). +/// This matches the assembler's `locaddr.N` formula: `FMP - aligned_num_locals + N`. +fn patch_debug_var_locals_in_block(block: &mut masm::Block, aligned_num_locals: u16) { for op in block.iter_mut() { match op { masm::Op::Inst(span_inst) => { @@ -716,7 +713,7 @@ fn patch_debug_var_locals_in_block(block: &mut masm::Block, num_locals: u16) { if let masm::Instruction::DebugVar(info) = &mut **span_inst { if let DebugVarLocation::Local(idx) = info.value_location() { // Convert raw WASM local index to FMP offset - let fmp_offset = *idx - (num_locals as i16); + let fmp_offset = *idx - (aligned_num_locals as i16); // Create new info with patched location, preserving all fields let mut new_info = DebugVarInfo::new( @@ -737,14 +734,14 @@ fn patch_debug_var_locals_in_block(block: &mut masm::Block, num_locals: u16) { } } masm::Op::If { then_blk, else_blk, .. } => { - patch_debug_var_locals_in_block(then_blk, num_locals); - patch_debug_var_locals_in_block(else_blk, num_locals); + patch_debug_var_locals_in_block(then_blk, aligned_num_locals); + patch_debug_var_locals_in_block(else_blk, aligned_num_locals); } masm::Op::While { body: while_body, .. } => { - patch_debug_var_locals_in_block(while_body, num_locals); + patch_debug_var_locals_in_block(while_body, aligned_num_locals); } masm::Op::Repeat { body: repeat_body, .. } => { - patch_debug_var_locals_in_block(repeat_body, num_locals); + patch_debug_var_locals_in_block(repeat_body, aligned_num_locals); } } } diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index 768ccc2a6..1c35b8205 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1306,16 +1306,10 @@ impl HirLowering for debuginfo::DebugValue { match first_op { DIExpressionOp::WasmStack(offset) => DebugVarLocation::Stack(*offset as u8), DIExpressionOp::WasmLocal(idx) => { - // First check if the value is on the Miden operand stack. - // WASM locals might stay on the stack in Miden if not spilled. - if let Some(pos) = emitter.stack.find(&value) { - DebugVarLocation::Stack(pos as u8) - } else { - // Value is not on stack, assume it's in local memory. - // Store raw WASM local index temporarily. The FMP offset will be - // computed later in MasmFunctionBuilder::build() when num_locals is known. - DebugVarLocation::Local(*idx as i16) - } + // WASM locals are always stored in memory via FMP in Miden. + // Store raw WASM local index; the FMP offset will be computed + // later in MasmFunctionBuilder::build() when num_locals is known. + DebugVarLocation::Local(*idx as i16) } DIExpressionOp::WasmGlobal(_) | DIExpressionOp::Deref => { // For global or dereference, check the stack position of the value diff --git a/frontend/wasm/src/code_translator/mod.rs b/frontend/wasm/src/code_translator/mod.rs index 2c9f12a35..acbb4d0cd 100644 --- a/frontend/wasm/src/code_translator/mod.rs +++ b/frontend/wasm/src/code_translator/mod.rs @@ -96,6 +96,7 @@ pub fn translate_operator( val }; builder.store_local(local, val, span)?; + builder.emit_dbg_value_for_var(var, val, span); } Operator::LocalTee { local_index } => { let var = Variable::from_u32(*local_index); @@ -116,6 +117,7 @@ pub fn translate_operator( val }; builder.store_local(local, val, span)?; + builder.emit_dbg_value_for_var(var, val, span); } /********************************** Globals ****************************************/ Operator::GlobalGet { global_index } => { diff --git a/frontend/wasm/src/module/func_translator.rs b/frontend/wasm/src/module/func_translator.rs index aa54f3bfb..de9cc6b8a 100644 --- a/frontend/wasm/src/module/func_translator.rs +++ b/frontend/wasm/src/module/func_translator.rs @@ -144,6 +144,7 @@ fn declare_parameters( let param_value = entry_block.borrow().arguments()[i]; builder.def_var(var, param_value); + builder.register_parameter(var, param_value); builder.store_local(local, param_value, SourceSpan::UNKNOWN).unwrap(); } next_local diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index 74c7c71e2..bb88e6c8d 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -166,7 +166,7 @@ impl FunctionBuilderExt<'_, B> { self.refresh_function_debug_attrs(); } - fn emit_dbg_value_for_var(&mut self, var: Variable, value: ValueRef, span: SourceSpan) { + pub fn emit_dbg_value_for_var(&mut self, var: Variable, value: ValueRef, span: SourceSpan) { let Some(info) = self.debug_info.as_ref() else { return; }; @@ -191,8 +191,15 @@ impl FunctionBuilderExt<'_, B> { attr.column = column; } + // If DWARF didn't provide a location expression, synthesize one from the + // wasm local index — we know this variable is stored as a wasm local. + let expr = expr_opt.or_else(|| { + let ops = vec![midenc_hir::DIExpressionOp::WasmLocal(idx as u32)]; + Some(midenc_hir::DIExpression::with_ops(ops)) + }); + if let Err(err) = - DebugInfoOpBuilder::builder_mut(self).debug_value_with_expr(value, attr, expr_opt, span) + DebugInfoOpBuilder::builder_mut(self).debug_value_with_expr(value, attr, expr, span) { warn!("failed to emit dbg.value for local {idx}: {err:?}"); } @@ -615,18 +622,6 @@ impl FunctionBuilderExt<'_, B> { self.param_dbg_emitted = true; let params: Vec<_> = self.param_values.to_vec(); for (var, value) in params { - let skip_due_to_schedule = if let Some(info_rc) = self.debug_info.as_ref() { - let info = info_rc.borrow(); - info.locals - .get(var.index()) - .and_then(|entry| entry.as_ref()) - .is_some_and(|entry| !entry.locations.is_empty()) - } else { - false - }; - if skip_due_to_schedule { - continue; - } self.emit_dbg_value_for_var(var, value, span); } } diff --git a/hir-macros/src/operation.rs b/hir-macros/src/operation.rs index ae9ba14f5..2e0a35d7b 100644 --- a/hir-macros/src/operation.rs +++ b/hir-macros/src/operation.rs @@ -471,7 +471,6 @@ impl quote::ToTokens for WithAttrs<'_> { for param in self.0.op_builder_impl.create_params.iter() { if let OpCreateParamType::Attr(OpAttribute { name, ty, .. }) = ¶m.param_ty { let span = name.span(); - let field_name = syn::Lit::Str(syn::LitStr::new(&format!("{name}"), span)); tokens.extend(quote_spanned! { span => op_builder.with_property::<#ty, _>(#field_name, #name)?; }); diff --git a/tests/integration/expected/debug_variable_locations.hir b/tests/integration/expected/debug_variable_locations.hir index d8629ee01..13b34af29 100644 --- a/tests/integration/expected/debug_variable_locations.hir +++ b/tests/integration/expected/debug_variable_locations.hir @@ -4,8 +4,10 @@ builtin.component private @root_ns:root@1.0.0 { hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); %2 = arith.constant 0 : i32; hir.store_local %2 <{ local = #builtin.local_variable<1, i32> }> : (i32); + "debuginfo.debug_value"(%2) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : i32 -> (); %3 = arith.constant 0 : i32; hir.store_local %3 <{ local = #builtin.local_variable<2, i32> }> : (i32); + "debuginfo.debug_value"(%3) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : i32 -> (); cf.br ^block8; ^block7(%1: i32): @@ -27,10 +29,12 @@ builtin.component private @root_ns:root@1.0.0 { %16 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; %17 = arith.add %15, %16 <{ overflow = #builtin.overflow }>; hir.store_local %17 <{ local = #builtin.local_variable<2, i32> }> : (i32); + "debuginfo.debug_value"(%17) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : i32 -> (); %18 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; %19 = arith.constant 1 : i32; %20 = arith.add %18, %19 <{ overflow = #builtin.overflow }>; hir.store_local %20 <{ local = #builtin.local_variable<1, i32> }> : (i32); + "debuginfo.debug_value"(%20) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : i32 -> (); cf.br ^block8; ^block11: %14 = hir.load_local <{ local = #builtin.local_variable<2, i32> }>; From c403ea822f236480412d1356948f458705c7a332 Mon Sep 17 00:00:00 2001 From: djole Date: Tue, 31 Mar 2026 17:13:13 +0200 Subject: [PATCH 09/32] fixup: several bugfixes for var loc - add FrameBase lowering - DWARF parsing (parse variable/params names) - and fix duplicate DebugVar emissions --- codegen/masm/src/lower/component.rs | 52 ++-- codegen/masm/src/lower/lowering.rs | 7 + frontend/wasm/src/module/debug_info.rs | 282 ++++++++++++++---- frontend/wasm/src/module/func_translator.rs | 26 +- .../wasm/src/module/function_builder_ext.rs | 48 ++- hir/src/attributes/debug.rs | 6 + 6 files changed, 332 insertions(+), 89 deletions(-) diff --git a/codegen/masm/src/lower/component.rs b/codegen/masm/src/lower/component.rs index 126d1eb17..bc0b13024 100644 --- a/codegen/masm/src/lower/component.rs +++ b/codegen/masm/src/lower/component.rs @@ -2,7 +2,7 @@ use alloc::{collections::BTreeSet, sync::Arc, vec::Vec}; use miden_assembly::{PathBuf as LibraryPath, ast::InvocationTarget}; use miden_assembly_syntax::{ast::Attribute, parser::WordValue}; -use miden_core::operations::{DebugVarInfo, DebugVarLocation}; +use miden_core::operations::DebugVarLocation; use midenc_hir::{ FunctionIdent, Op, OpExt, SourceSpan, Span, Symbol, TraceTarget, ValueRef, diagnostics::IntoDiagnostic, dialects::builtin, pass::AnalysisManager, @@ -654,11 +654,15 @@ impl MasmFunctionBuilder { // the same alignment for debug var offset computation. let aligned_num_locals = num_locals.next_multiple_of(miden_core::WORD_SIZE as u16); + // Resolve FrameBase global_index → Miden memory address. + // Use the stack pointer offset from the linker's global layout. + let stack_pointer_addr = link_info.globals_layout().stack_pointer_offset(); + // Patch DebugVar Local locations to compute FMP offset. // During lowering, Local(idx) stores the raw WASM local index. // Now convert to FMP offset: idx - aligned_num_locals // This matches locaddr.N which computes -(aligned_num_locals - N). - patch_debug_var_locals_in_block(&mut body, aligned_num_locals); + patch_debug_var_locals_in_block(&mut body, aligned_num_locals, stack_pointer_addr); // Strip DebugVar-only procedure bodies. // The Miden assembler rejects procedures whose bodies contain only decorators @@ -700,12 +704,19 @@ fn block_has_real_instructions(block: &masm::Block) -> bool { }) } -/// Recursively patch DebugVar Local locations in a block. +/// Recursively patch DebugVar locations in a block. /// /// Converts `Local(idx)` where idx is the raw WASM local index to `Local(offset)` /// where offset = idx - aligned_num_locals (the FMP-relative offset, typically negative). /// This matches the assembler's `locaddr.N` formula: `FMP - aligned_num_locals + N`. -fn patch_debug_var_locals_in_block(block: &mut masm::Block, aligned_num_locals: u16) { +/// +/// Also resolves `FrameBase { global_index, byte_offset }` by replacing the WASM +/// global index with the resolved Miden memory address of the stack pointer. +fn patch_debug_var_locals_in_block( + block: &mut masm::Block, + aligned_num_locals: u16, + stack_pointer_addr: Option, +) { for op in block.iter_mut() { match op { masm::Op::Inst(span_inst) => { @@ -714,34 +725,29 @@ fn patch_debug_var_locals_in_block(block: &mut masm::Block, aligned_num_locals: if let DebugVarLocation::Local(idx) = info.value_location() { // Convert raw WASM local index to FMP offset let fmp_offset = *idx - (aligned_num_locals as i16); - - // Create new info with patched location, preserving all fields - let mut new_info = DebugVarInfo::new( - info.name(), - DebugVarLocation::Local(fmp_offset), - ); - if let Some(type_id) = info.type_id() { - new_info.set_type_id(type_id); - } - if let Some(arg_index) = info.arg_index() { - new_info.set_arg_index(arg_index.get()); - } - if let Some(loc) = info.location() { - new_info.set_location(loc.clone()); + info.set_value_location(DebugVarLocation::Local(fmp_offset)); + } else if let DebugVarLocation::FrameBase { byte_offset, .. } = info.value_location() { + // Resolve FrameBase: replace WASM global index with + // the Miden memory address of the stack pointer global. + if let Some(resolved_addr) = stack_pointer_addr { + let byte_offset = *byte_offset; + info.set_value_location(DebugVarLocation::FrameBase { + global_index: resolved_addr, + byte_offset, + }); } - *info = new_info; } } } masm::Op::If { then_blk, else_blk, .. } => { - patch_debug_var_locals_in_block(then_blk, aligned_num_locals); - patch_debug_var_locals_in_block(else_blk, aligned_num_locals); + patch_debug_var_locals_in_block(then_blk, aligned_num_locals, stack_pointer_addr); + patch_debug_var_locals_in_block(else_blk, aligned_num_locals, stack_pointer_addr); } masm::Op::While { body: while_body, .. } => { - patch_debug_var_locals_in_block(while_body, aligned_num_locals); + patch_debug_var_locals_in_block(while_body, aligned_num_locals, stack_pointer_addr); } masm::Op::Repeat { body: repeat_body, .. } => { - patch_debug_var_locals_in_block(repeat_body, aligned_num_locals); + patch_debug_var_locals_in_block(repeat_body, aligned_num_locals, stack_pointer_addr); } } } diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index 1c35b8205..2d5c01eb7 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1296,6 +1296,7 @@ impl HirLowering for debuginfo::DebugValue { | DIExpressionOp::WasmLocal(_) | DIExpressionOp::ConstU64(_) | DIExpressionOp::ConstS64(_) + | DIExpressionOp::FrameBase { .. } ) }); if !has_location_expr && emitter.stack.find(&value).is_none() { @@ -1321,6 +1322,12 @@ impl HirLowering for debuginfo::DebugValue { } DIExpressionOp::ConstU64(val) => DebugVarLocation::Const(Felt::new(*val)), DIExpressionOp::ConstS64(val) => DebugVarLocation::Const(Felt::new(*val as u64)), + DIExpressionOp::FrameBase { global_index, byte_offset } => { + DebugVarLocation::FrameBase { + global_index: *global_index, + byte_offset: *byte_offset, + } + } _ => { // For other operations, try to find the value on the stack if let Some(pos) = emitter.stack.find(&value) { diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs index 50272093f..d9ce731a6 100644 --- a/frontend/wasm/src/module/debug_info.rs +++ b/frontend/wasm/src/module/debug_info.rs @@ -34,6 +34,8 @@ pub enum VariableStorage { Global(u32), Stack(u32), ConstU64(u64), + /// Frame base (global index) + byte offset — from DW_OP_fbreg + FrameBase { global_index: u32, byte_offset: i64 }, Unsupported, } @@ -51,6 +53,12 @@ impl VariableStorage { VariableStorage::Global(idx) => DIExpressionOp::WasmGlobal(*idx), VariableStorage::Stack(idx) => DIExpressionOp::WasmStack(*idx), VariableStorage::ConstU64(val) => DIExpressionOp::ConstU64(*val), + VariableStorage::FrameBase { global_index, byte_offset } => { + DIExpressionOp::FrameBase { + global_index: *global_index, + byte_offset: *byte_offset, + } + } VariableStorage::Unsupported => { DIExpressionOp::Unsupported(Symbol::intern("unsupported")) } @@ -105,7 +113,7 @@ pub fn collect_function_debug_info( ) -> FxHashMap>> { let mut map = FxHashMap::default(); - let dwarf_locals = collect_dwarf_local_data(parsed_module, module, diagnostics); + let collected = collect_dwarf_local_data(parsed_module, module, diagnostics); debug!( "Collecting function debug info for {} functions", @@ -123,7 +131,8 @@ pub fn collect_function_debug_info( body, addr2line, diagnostics, - dwarf_locals.get(&func_index), + collected.by_local.get(&func_index), + collected.frame_base.get(&func_index), ) { debug!( "Collected debug info for function {}: {} locals", @@ -150,6 +159,7 @@ fn build_function_debug_info( addr2line: &Context>, diagnostics: &DiagnosticsHandler, dwarf_locals: Option<&FxHashMap>, + frame_base_vars: Option<&Vec>, ) -> Option { let func_name = module.func_name(func_index); @@ -172,6 +182,7 @@ fn build_function_debug_info( &subprogram, diagnostics, dwarf_locals, + frame_base_vars, ); let location_schedule = build_location_schedule(&locals); @@ -220,6 +231,7 @@ fn determine_location(addr2line: &Context>, offset: u64) -> (u32 } } +#[allow(clippy::too_many_arguments)] fn build_local_debug_info( module: &Module, func_index: FuncIndex, @@ -228,6 +240,7 @@ fn build_local_debug_info( subprogram: &DISubprogram, diagnostics: &DiagnosticsHandler, dwarf_locals: Option<&FxHashMap>, + frame_base_vars: Option<&Vec>, ) -> Vec> { let param_count = wasm_signature.params().len(); let mut local_entries = Vec::new(); @@ -347,6 +360,35 @@ fn build_local_debug_info( } } + // Append FrameBase-only variables beyond normal WASM locals. + // These are variables like local `sum` in debug builds that live in + // linear memory via __stack_pointer and have no WASM local index. + if let Some(fb_vars) = frame_base_vars { + for fb_var in fb_vars { + let name = fb_var.name.unwrap_or_else(|| Symbol::intern("?")); + let mut attr = DILocalVariable::new( + name, + subprogram.file, + subprogram.line, + subprogram.column, + ); + if let Some(line) = fb_var.decl_line.filter(|l| *l != 0) { + attr.line = line; + } + attr.column = fb_var.decl_column; + let expression = if !fb_var.locations.is_empty() { + Some(DIExpression::with_ops(vec![fb_var.locations[0].storage.to_expression_op()])) + } else { + None + }; + locals.push(Some(LocalDebugInfo { + attr, + locations: fb_var.locations.clone(), + expression, + })); + } + } + locals } @@ -357,7 +399,9 @@ fn build_location_schedule(locals: &[Option]) -> Vec]) -> Vec>, + /// FrameBase-only variables that have no WASM local index (e.g. `sum` in debug builds). + frame_base: FxHashMap>, +} + fn collect_dwarf_local_data( parsed_module: &ParsedModule, module: &Module, diagnostics: &DiagnosticsHandler, -) -> FxHashMap> { +) -> CollectedDwarfLocals { let _ = diagnostics; let dwarf = &parsed_module.debuginfo.dwarf; @@ -394,6 +446,7 @@ fn collect_dwarf_local_data( } let mut results: FxHashMap> = FxHashMap::default(); + let mut fb_results: FxHashMap> = FxHashMap::default(); let mut units = dwarf.units(); loop { let header = match units.next() { @@ -426,9 +479,9 @@ fn collect_dwarf_local_data( let _ = delta; // we don't need depth deltas explicitly. if entry.tag() == gimli::DW_TAG_subprogram { - let resolved = - resolve_subprogram_target(dwarf, &unit, &func_by_name, &low_pc_map, entry); - let Some((func_index, low_pc, high_pc)) = resolved else { + let Some(info) = + resolve_subprogram_target(dwarf, &unit, &func_by_name, &low_pc_map, entry) + else { continue; }; @@ -436,18 +489,33 @@ fn collect_dwarf_local_data( dwarf, &unit, entry.offset(), - func_index, - low_pc, - high_pc, + info.func_index, + info.low_pc, + info.high_pc, + info.frame_base_global, &mut results, + &mut fb_results, ) { - debug!("failed to gather variables for function {:?}: {err:?}", func_index); + debug!("failed to gather variables for function {:?}: {err:?}", info.func_index); } } } } - results + CollectedDwarfLocals { + by_local: results, + frame_base: fb_results, + } +} + +/// Result of resolving a DWARF subprogram to a WASM function. +struct SubprogramInfo { + func_index: FuncIndex, + low_pc: u64, + high_pc: Option, + /// The WASM global index used as the frame base (from DW_AT_frame_base). + /// Typically global 0 (__stack_pointer). + frame_base_global: Option, } fn resolve_subprogram_target>( @@ -456,10 +524,11 @@ fn resolve_subprogram_target>( func_by_name: &FxHashMap, low_pc_map: &FxHashMap, entry: &gimli::DebuggingInformationEntry, -) -> Option<(FuncIndex, u64, Option)> { +) -> Option { let mut maybe_name: Option = None; let mut low_pc = None; let mut high_pc = None; + let mut frame_base_global = None; let mut attrs = entry.attrs(); while let Ok(Some(attr)) = attrs.next() { @@ -493,22 +562,43 @@ fn resolve_subprogram_target>( } _ => {} }, + gimli::DW_AT_frame_base => { + // Decode the frame base expression to find which WASM global + // provides the base address (typically __stack_pointer = global 0) + if let AttributeValue::Exprloc(expr) = attr.value() { + let mut ops = expr.operations(unit.encoding()); + while let Ok(Some(op)) = ops.next() { + if let Operation::WasmLocal { index } = op { + // Frame base is a WASM local (unusual but possible) + frame_base_global = Some(index); + } else if let Operation::WasmGlobal { index } = op { + frame_base_global = Some(index); + } + } + } + } _ => {} } } - if let Some(name) = maybe_name - && let Some(&func_index) = func_by_name.get(&name) - { - return Some((func_index, low_pc.unwrap_or_default(), high_pc)); + let make_info = |func_index, lp, hp| SubprogramInfo { + func_index, + low_pc: lp, + high_pc: hp, + frame_base_global, + }; + + if let Some(ref name) = maybe_name { + if let Some(&func_index) = func_by_name.get(name) { + return Some(make_info(func_index, low_pc.unwrap_or_default(), high_pc)); + } } if let Some(base) = low_pc && let Some(&func_index) = low_pc_map.get(&base) { - return Some((func_index, base, high_pc)); + return Some(make_info(func_index, base, high_pc)); } - None } @@ -519,13 +609,19 @@ fn collect_subprogram_variables>( func_index: FuncIndex, low_pc: u64, high_pc: Option, + frame_base_global: Option, results: &mut FxHashMap>, + fb_results: &mut FxHashMap>, ) -> gimli::Result<()> { let mut tree = unit.entries_tree(Some(offset))?; let root = tree.root()?; let mut children = root.children(); + let mut param_counter: u32 = 0; while let Some(child) = children.next()? { - walk_variable_nodes(dwarf, unit, child, func_index, low_pc, high_pc, results)?; + walk_variable_nodes( + dwarf, unit, child, func_index, low_pc, high_pc, frame_base_global, results, + fb_results, &mut param_counter, + )?; } Ok(()) } @@ -537,13 +633,27 @@ fn walk_variable_nodes>( func_index: FuncIndex, low_pc: u64, high_pc: Option, + frame_base_global: Option, results: &mut FxHashMap>, + fb_results: &mut FxHashMap>, + param_counter: &mut u32, ) -> gimli::Result<()> { let entry = node.entry(); - match entry.tag() { + let tag = entry.tag(); + match tag { gimli::DW_TAG_formal_parameter | gimli::DW_TAG_variable => { + // For formal parameters, the WASM local index equals the parameter + // order (params are always the first N WASM locals). + let fallback_index = if tag == gimli::DW_TAG_formal_parameter { + let idx = *param_counter; + *param_counter += 1; + Some(idx) + } else { + None + }; + let mut fb_vars = Vec::new(); if let Some((local_index, mut data)) = - decode_variable_entry(dwarf, unit, entry, low_pc, high_pc)? + decode_variable_entry(dwarf, unit, entry, low_pc, high_pc, frame_base_global, fallback_index, &mut fb_vars)? { let local_map = results.entry(func_index).or_default(); let entry = local_map.entry(local_index).or_insert_with(DwarfLocalData::default); @@ -554,13 +664,19 @@ fn walk_variable_nodes>( entry.locations.append(&mut data.locations); } } + if !fb_vars.is_empty() { + fb_results.entry(func_index).or_default().extend(fb_vars); + } } _ => {} } let mut children = node.children(); while let Some(child) = children.next()? { - walk_variable_nodes(dwarf, unit, child, func_index, low_pc, high_pc, results)?; + walk_variable_nodes( + dwarf, unit, child, func_index, low_pc, high_pc, frame_base_global, results, + fb_results, param_counter, + )?; } Ok(()) } @@ -571,6 +687,9 @@ fn decode_variable_entry>( entry: &gimli::DebuggingInformationEntry<'_, '_, R>, low_pc: u64, high_pc: Option, + frame_base_global: Option, + fallback_index: Option, + frame_base_vars: &mut Vec, ) -> gimli::Result> { let mut name_symbol = None; let mut location_attr = None; @@ -609,22 +728,44 @@ fn decode_variable_entry>( let mut locations = Vec::new(); match location_value { - AttributeValue::Exprloc(expr) => { - if let Some(storage) = decode_storage_from_expression(&expr, unit)? - && let Some(local_index) = storage.as_local() - { - locations.push(LocationDescriptor { - start: low_pc, - end: high_pc, - storage, - }); - let data = DwarfLocalData { - name: name_symbol, - locations, - decl_line, - decl_column, - }; - return Ok(Some((local_index, data))); + AttributeValue::Exprloc(ref expr) => { + let storage = decode_storage_from_expression(expr, unit, frame_base_global)?; + if let Some(storage) = storage { + // Determine the WASM local index for this variable. + // For WasmLocal storage, use the index directly. + // For FrameBase (DW_OP_fbreg), use the parameter order as + // fallback since formal params map to WASM locals 0..N. + let local_index = storage.as_local().or(fallback_index); + if let Some(local_index) = local_index { + locations.push(LocationDescriptor { + start: low_pc, + end: high_pc, + storage, + }); + let data = DwarfLocalData { + name: name_symbol, + locations, + decl_line, + decl_column, + }; + return Ok(Some((local_index, data))); + } else if matches!(&storage, VariableStorage::FrameBase { .. }) { + // FrameBase-only variable (no WASM local index, e.g. local `sum` + // in debug builds). Collect separately instead of dropping. + locations.push(LocationDescriptor { + start: low_pc, + end: high_pc, + storage, + }); + let data = DwarfLocalData { + name: name_symbol, + locations, + decl_line, + decl_column, + }; + frame_base_vars.push(data); + return Ok(None); + } } return Ok(None); } @@ -636,33 +777,46 @@ fn decode_variable_entry>( &dwarf.debug_addr, unit.addr_base, )?; + let mut has_frame_base = false; while let Some(entry) = iter.next()? { let storage_expr = entry.data; - if let Some(storage) = decode_storage_from_expression(&storage_expr, unit)? - && storage.as_local().is_some() - { - locations.push(LocationDescriptor { - start: entry.range.begin, - end: Some(entry.range.end), - storage, - }); - continue; + if let Some(storage) = decode_storage_from_expression(&storage_expr, unit, frame_base_global)? { + if storage.as_local().is_some() || matches!(&storage, VariableStorage::FrameBase { .. }) { + if matches!(&storage, VariableStorage::FrameBase { .. }) { + has_frame_base = true; + } + locations.push(LocationDescriptor { + start: entry.range.begin, + end: Some(entry.range.end), + storage, + }); + } } } if locations.is_empty() { return Ok(None); } - let Some(local_index) = locations.iter().find_map(|desc| desc.storage.as_local()) - else { + // Try to find a WASM local index from any location descriptor + if let Some(local_index) = locations.iter().find_map(|desc| desc.storage.as_local()) { + let data = DwarfLocalData { + name: name_symbol, + locations, + decl_line, + decl_column, + }; + return Ok(Some((local_index, data))); + } else if has_frame_base { + // FrameBase-only location list variable + let data = DwarfLocalData { + name: name_symbol, + locations, + decl_line, + decl_column, + }; + frame_base_vars.push(data); return Ok(None); - }; - let data = DwarfLocalData { - name: name_symbol, - locations, - decl_line, - decl_column, - }; - return Ok(Some((local_index, data))); + } + return Ok(None); } _ => {} } @@ -673,6 +827,7 @@ fn decode_variable_entry>( fn decode_storage_from_expression>( expr: &gimli::Expression, unit: &gimli::Unit, + frame_base_global: Option, ) -> gimli::Result> { let mut operations = expr.clone().operations(unit.encoding()); let mut storage = None; @@ -685,6 +840,17 @@ fn decode_storage_from_expression>( storage = Some(VariableStorage::ConstU64(value)) } Operation::StackValue => {} + Operation::FrameOffset { offset } => { + // DW_OP_fbreg(offset): variable is at frame_base + offset in + // WASM linear memory. The frame base is a WASM global + // (typically __stack_pointer = global 0). + if let Some(global_index) = frame_base_global { + storage = Some(VariableStorage::FrameBase { + global_index, + byte_offset: offset, + }); + } + } _ => {} } } diff --git a/frontend/wasm/src/module/func_translator.rs b/frontend/wasm/src/module/func_translator.rs index de9cc6b8a..9a23fe69e 100644 --- a/frontend/wasm/src/module/func_translator.rs +++ b/frontend/wasm/src/module/func_translator.rs @@ -10,7 +10,7 @@ use std::{cell::RefCell, rc::Rc}; use cranelift_entity::EntityRef; use midenc_hir::{ - BlockRef, Builder, Context, Op, + BlockRef, Builder, Context, Op, Type, diagnostics::{ColumnNumber, LineNumber}, dialects::builtin::{BuiltinOpBuilder, FunctionRef}, }; @@ -77,6 +77,9 @@ impl FuncTranslator { .with_listener(SSABuilderListener::new(self.func_ctx.clone())); let mut builder = FunctionBuilderExt::new(func, &mut op_builder); + // Keep a clone for FrameBase variable declaration below + let debug_info_ref = debug_info.clone(); + if let Some(info) = debug_info.clone() { builder.set_debug_metadata(info); } @@ -99,7 +102,7 @@ impl FuncTranslator { let mut reader = body.get_locals_reader().into_diagnostic()?; - parse_local_decls( + let total_wasm_vars = parse_local_decls( &mut reader, &mut builder, num_params, @@ -107,6 +110,20 @@ impl FuncTranslator { &session.diagnostics, )?; + // Declare extra SSA variables for FrameBase-only debug entries (e.g. local `sum` + // in debug builds that lives in linear memory, not a WASM local). + // Use declare_var_only to avoid allocating HIR locals that would inflate + // num_locals and corrupt FMP offset calculations. + if let Some(info) = debug_info_ref.as_ref() { + let locals_len = info.borrow().locals.len(); + if locals_len > total_wasm_vars { + for idx in total_wasm_vars..locals_len { + let var = Variable::new(idx); + builder.declare_var_only(var, Type::I32); + } + } + } + let mut reader = body.get_operators_reader().into_diagnostic()?; parse_function_body( &mut reader, @@ -153,13 +170,14 @@ fn declare_parameters( /// Parse the local variable declarations that precede the function body. /// /// Declare local variables, starting from `num_params`. +/// Returns the total number of declared variables (params + locals). fn parse_local_decls( reader: &mut wasmparser::LocalsReader<'_>, builder: &mut FunctionBuilderExt<'_, B>, num_params: usize, validator: &mut FuncValidator, diagnostics: &DiagnosticsHandler, -) -> WasmResult<()> { +) -> WasmResult { let mut next_local = num_params; let local_count = reader.get_count(); @@ -170,7 +188,7 @@ fn parse_local_decls( declare_locals(builder, count, ty, &mut next_local, diagnostics)?; } - Ok(()) + Ok(next_local) } /// Declare `count` local variables of the same type, starting from `next_local`. diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index bb88e6c8d..8c0674a8b 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -262,9 +262,33 @@ impl FunctionBuilderExt<'_, B> { } fn emit_scheduled_dbg_value(&mut self, entry: LocationScheduleEntry, span: SourceSpan) { - let var = Variable::new(entry.var_index); - let Ok(value) = self.try_use_var(var) else { + use crate::module::debug_info::VariableStorage; + + // Skip variables already emitted as parameters to avoid duplicates. + if self.param_dbg_emitted + && self.param_values.iter().any(|(v, _)| v.index() == entry.var_index) + { return; + } + + let var = Variable::new(entry.var_index); + let value = match self.try_use_var(var) { + Ok(v) => v, + Err(_) => { + // For FrameBase-only variables (no WASM local), use a dummy SSA value. + // The FrameBase expression will override the value's stack position. + if matches!(&entry.storage, VariableStorage::FrameBase { .. }) { + if let Some((_, v)) = self.param_values.first() { + let dummy = *v; + self.def_var(var, dummy); + dummy + } else { + return; + } + } else { + return; + } + } }; // Create expression from the scheduled location @@ -467,6 +491,19 @@ impl FunctionBuilderExt<'_, B> { local } + /// Declare an SSA variable without allocating an HIR local. + /// + /// Used for FrameBase-only debug variables that live in linear memory + /// and don't need a real function-local storage slot. This avoids + /// inflating `num_locals` which would corrupt FMP offset calculations. + pub fn declare_var_only(&mut self, var: Variable, ty: Type) { + let mut ctx = self.func_ctx.borrow_mut(); + if ctx.types[var] != Type::Unknown { + return; // Already declared + } + ctx.types[var] = ty; + } + /// Declares the type of a variable, so that it can be used later (by calling /// [`FunctionBuilderExt::use_var`]). This function will return an error if the variable /// has been previously declared. @@ -621,9 +658,12 @@ impl FunctionBuilderExt<'_, B> { } self.param_dbg_emitted = true; let params: Vec<_> = self.param_values.to_vec(); - for (var, value) in params { - self.emit_dbg_value_for_var(var, value, span); + for (var, value) in ¶ms { + self.emit_dbg_value_for_var(*var, *value, span); } + // FrameBase-only variables (e.g. local `sum`) are emitted solely via + // the location schedule in apply_location_schedule/emit_scheduled_dbg_value, + // avoiding duplicate DebugVar emissions. } fn span_to_location( diff --git a/hir/src/attributes/debug.rs b/hir/src/attributes/debug.rs index 16efa5272..b004d6ec2 100644 --- a/hir/src/attributes/debug.rs +++ b/hir/src/attributes/debug.rs @@ -243,6 +243,9 @@ pub enum DIExpressionOp { Piece(u64), /// DW_OP_bit_piece - Describes a piece of a variable in bits BitPiece { size: u64, offset: u64 }, + /// DW_OP_fbreg - Frame base register + offset. + /// The variable is in WASM linear memory at `value_of(global[global_index]) + byte_offset`. + FrameBase { global_index: u32, byte_offset: i64 }, /// Placeholder for unsupported operations Unsupported(Symbol), } @@ -302,6 +305,9 @@ impl PrettyPrint for DIExpression { DIExpressionOp::BitPiece { size, offset } => { text(format!("DW_OP_bit_piece {} {}", size, offset)) } + DIExpressionOp::FrameBase { global_index, byte_offset } => { + text(format!("DW_OP_fbreg global[{}]{:+}", global_index, byte_offset)) + } DIExpressionOp::Unsupported(name) => text(name.as_str()), }; } From f89ecadb529adc95d3fae9e0d6f4cee43fa3b2de Mon Sep 17 00:00:00 2001 From: djole Date: Wed, 1 Apr 2026 12:11:20 +0200 Subject: [PATCH 10/32] fixup: remove debug info docs --- docs/DebugInfoFormat.md | 360 ----------------------------- docs/DebugInfoMetadata.md | 472 -------------------------------------- 2 files changed, 832 deletions(-) delete mode 100644 docs/DebugInfoFormat.md delete mode 100644 docs/DebugInfoMetadata.md diff --git a/docs/DebugInfoFormat.md b/docs/DebugInfoFormat.md deleted file mode 100644 index ae646f71a..000000000 --- a/docs/DebugInfoFormat.md +++ /dev/null @@ -1,360 +0,0 @@ -# Debug Info Format Specification - -This document describes the `.debug_info` custom section format used in MASP (Miden Assembly Package) files. This section contains source-level debug information that enables debuggers to map between Miden VM execution state and the original source code. - -## Overview - -The debug info section is stored as a custom section in the MASP package with the section ID `debug_info`. It is designed to be: - -- **Compact**: Uses index-based references and string deduplication -- **Self-contained**: All information needed for debugging is in this section -- **Extensible**: Version field allows for future format evolution - -## Section Structure - -The `.debug_info` section contains the following logical subsections: - -``` -┌─────────────────────────────────────────┐ -│ Debug Info Header │ -│ - version (u8) │ -├─────────────────────────────────────────┤ -│ .debug_str │ -│ - String table (deduplicated) │ -├─────────────────────────────────────────┤ -│ .debug_types │ -│ - Type definitions │ -├─────────────────────────────────────────┤ -│ .debug_files │ -│ - Source file information │ -├─────────────────────────────────────────┤ -│ .debug_functions │ -│ - Function metadata │ -│ - Variables (nested) │ -│ - Inlined calls (nested) │ -└─────────────────────────────────────────┘ -``` - -## Format Version - -Current version: **1** - -The version byte is the first field in the section and indicates the format version. Readers should reject sections with unsupported versions. - ---- - -## .debug_str - String Table - -The string table contains all strings used in the debug info, deduplicated to save space. Other sections reference strings by their index into this table. - -### Contents - -- File paths -- Function names -- Variable names -- Type names -- Linkage/mangled names - -### Example Output - -``` -.debug_str contents: - [ 0] "/Users/user/project/src/lib.rs" - [ 1] "my_function" - [ 2] "x" - [ 3] "result" -``` - ---- - -## .debug_types - Type Information - -The type table contains definitions for all types referenced by variables and functions. Types can reference other types by index, allowing for complex type hierarchies. - -### Type Kinds - -| Tag | Kind | Description | -|-----|------|-------------| -| 0 | Primitive | Built-in scalar types | -| 1 | Pointer | Pointer to another type | -| 2 | Array | Fixed or dynamic array | -| 3 | Struct | Composite type with fields | -| 4 | Function | Function signature | -| 5 | Unknown | Opaque/unknown type | - -### Primitive Types - -| Value | Type | Size (bytes) | Size (felts) | -|-------|------|--------------|--------------| -| 0 | void | 0 | 0 | -| 1 | bool | 1 | 1 | -| 2 | i8 | 1 | 1 | -| 3 | u8 | 1 | 1 | -| 4 | i16 | 2 | 1 | -| 5 | u16 | 2 | 1 | -| 6 | i32 | 4 | 1 | -| 7 | u32 | 4 | 1 | -| 8 | i64 | 8 | 2 | -| 9 | u64 | 8 | 2 | -| 10 | i128 | 16 | 4 | -| 11 | u128 | 16 | 4 | -| 12 | f32 | 4 | 2 | -| 13 | f64 | 8 | 2 | -| 14 | felt | 8 | 1 | -| 15 | word | 32 | 4 | - -### Example Output - -``` -.debug_types contents: - [ 0] PRIMITIVE: i32 (size: 4 bytes, 1 felts) - [ 1] PRIMITIVE: felt (size: 8 bytes, 1 felts) - [ 2] POINTER -> i32 - [ 3] ARRAY [felt; 4] - [ 4] STRUCT Point (size: 16 bytes) - + 0: x : felt - + 8: y : felt -``` - ---- - -## .debug_files - Source File Information - -The file table contains information about source files referenced by functions and variables. - -### Fields - -| Field | Type | Description | -|-------|------|-------------| -| path_idx | u32 | Index into string table for file path | -| directory_idx | Option\ | Optional index for directory path | -| checksum | Option\<[u8; 32]\> | Optional SHA-256 checksum for verification | - -### Example Output - -``` -.debug_files contents: - [ 0] /Users/user/project/src/lib.rs - [ 1] /rustc/abc123.../library/core/src/panicking.rs - [ 2] unknown -``` - ---- - -## .debug_functions - Function Information - -The function table contains debug metadata for each function in the compiled program. - -### Fields - -| Field | Type | Description | -|-------|------|-------------| -| name_idx | u32 | Index into string table for function name | -| linkage_name_idx | Option\ | Optional mangled/linkage name | -| file_idx | u32 | Index into file table | -| line | u32 | Line number where function is defined | -| column | u32 | Column number | -| type_idx | Option\ | Optional function type (index into type table) | -| mast_root | Option\<[u8; 32]\> | MAST root digest linking to compiled code | -| variables | Vec | Local variables and parameters | -| inlined_calls | Vec | Inlined function call sites | - -### Variables - -Each function contains a list of variables (parameters and locals): - -| Field | Type | Description | -|-------|------|-------------| -| name_idx | u32 | Index into string table | -| type_idx | u32 | Index into type table | -| arg_index | u32 | 1-based parameter index (0 = local variable) | -| line | u32 | Declaration line | -| column | u32 | Declaration column | -| scope_depth | u32 | Lexical scope depth (0 = function scope) | - -### Inlined Calls - -For tracking inlined function calls: - -| Field | Type | Description | -|-------|------|-------------| -| callee_idx | u32 | Index into function table for inlined function | -| file_idx | u32 | Call site file | -| line | u32 | Call site line | -| column | u32 | Call site column | - -### Example Output - -``` -.debug_functions contents: - [ 0] FUNCTION: my_function - Location: /Users/user/project/src/lib.rs:10:1 - MAST root: 0xabcd1234... - Variables (3): - - x (param #1): i32 @ 10:14 - - y (param #2): i32 @ 10:22 - - result (local): i32 @ 11:9 [scope depth: 1] - Inlined calls (1): - - helper_fn inlined at lib.rs:12:5 -``` - ---- - -## Usage - -### Generating Debug Info - -Compile with debug info enabled: - -```bash -midenc input.wasm --exe --debug full -o output.masp -``` - -For projects using `trim-paths`, use the `-Z trim-path-prefix` option to preserve absolute paths: - -```bash -midenc input.wasm --exe --debug full \ - -Z trim-path-prefix="/path/to/project" \ - -o output.masp -``` - -### Inspecting Debug Info - -Use the `miden-debugdump` tool to inspect debug info in a MASP file: - -```bash -# Full dump (includes all sections) -miden-debugdump output.masp - -# Summary only -miden-debugdump output.masp --summary - -# Specific section from .debug_info -miden-debugdump output.masp --section functions -miden-debugdump output.masp --section variables -miden-debugdump output.masp --section types -miden-debugdump output.masp --section files -miden-debugdump output.masp --section strings - -# Show DebugVar decorators from MAST (.debug_loc) -miden-debugdump output.masp --section locations - -# Verbose mode (shows additional details like raw decorator list) -miden-debugdump output.masp --section locations --verbose - -# Raw indices (for debugging the debug info itself) -miden-debugdump output.masp --raw -``` - ---- - -## Design Rationale - -### Index-Based References - -All cross-references use indices rather than embedding data directly. This: -- Enables string deduplication (file paths, names appear once) -- Reduces section size -- Allows efficient random access - -### Separation of Concerns - -The section is divided into logical subsections: -- **Strings**: Shared across all other sections -- **Types**: Can be referenced by multiple variables/functions -- **Files**: Shared by multiple functions -- **Functions**: Contains variables and inlined calls inline - -### Compatibility with DWARF - -The format is inspired by DWARF but simplified for Miden's needs: -- No complex DIE tree structure -- No location expressions (handled by `DebugVar` decorators in MAST) -- No line number tables (locations embedded in functions/variables) - ---- - -## Debug Variable Locations - -Debug information in MASP is split between two locations: the `.debug_info` custom section (documented above) and `Decorator::DebugVar` entries embedded in the MAST instruction stream. - -### Architecture Overview - -``` -┌──────────────────────────────────────────────────────────────────┐ -│ MASP Package │ -├──────────────────────────────────────────────────────────────────┤ -│ MAST Forest │ -│ ├── MastNode[] │ -│ │ └── Decorator::DebugVar(DebugVarInfo) ← Runtime locations │ -│ │ • name: "x" │ -│ │ • value_location: Stack(0) / Local(2) / Memory(...) │ -│ │ • source location │ -│ └── String table (for names) │ -├──────────────────────────────────────────────────────────────────┤ -│ .debug_info Section (separate custom section) │ -│ ├── .debug_str (deduplicated strings) │ -│ ├── .debug_types (type definitions) │ -│ ├── .debug_files (source file paths) │ -│ └── .debug_functions (static metadata, variables, inlined) │ -└──────────────────────────────────────────────────────────────────┘ -``` - -### Why Two Locations? - -| Aspect | `Decorator::DebugVar` in MAST | `.debug_info` Section | -|--------|-------------------------------|----------------------| -| **Where stored** | Embedded in instruction stream | Custom section at end of MASP | -| **Purpose** | Runtime value location at specific execution points | Static metadata (types, files, function info) | -| **When used** | During execution, debugger reads variable values | To display type names, source files, etc. | -| **DWARF analog** | Location lists (`.debug_loc`) | `.debug_info` / `.debug_abbrev` | - -The `.debug_info` section tells you **what** variables exist (name, type, scope). The `DebugVar` decorators tell you **where** a variable's value is at a specific point during execution. - -### DebugVarInfo Structure - -Each `Decorator::DebugVar` contains a `DebugVarInfo` with the following fields: - -| Field | Type | Description | -|-------|------|-------------| -| name | String | Variable name | -| value_location | DebugVarLocation | Where to find the value | -| type_id | Option\ | Index into `.debug_types` | -| arg_index | Option\ | 1-based parameter index (if parameter) | -| location | Option\ | Source location of declaration | - -### DebugVarLocation Variants - -The `value_location` field describes where the variable's value can be found at runtime: - -| Variant | Encoding | Description | -|---------|----------|-------------| -| `Stack(u8)` | Tag 0 + u8 | Value is at stack position N (0 = top) | -| `Memory(u32)` | Tag 1 + u32 | Value is at memory word address | -| `Const(u64)` | Tag 2 + u64 | Value is a constant field element | -| `Local(u16)` | Tag 3 + u16 | Value is in local variable slot N | -| `Expression(Vec)` | Tag 4 + len + bytes | Complex location (DWARF-style expression) | - -### Example - -For a function like: -```rust -fn add(x: i32, y: i32) -> i32 { - let sum = x + y; - sum -} -``` - -The MAST will contain decorators like: -``` -# At function entry -Decorator::DebugVar { name: "x", value_location: Local(0), arg_index: Some(1), ... } -Decorator::DebugVar { name: "y", value_location: Local(1), arg_index: Some(2), ... } - -# After computing sum -Decorator::DebugVar { name: "sum", value_location: Stack(0), arg_index: None, ... } -``` - -A debugger pausing at a specific instruction can read these decorators to know where each variable's value is stored at that moment. - ---- diff --git a/docs/DebugInfoMetadata.md b/docs/DebugInfoMetadata.md deleted file mode 100644 index 27cce9496..000000000 --- a/docs/DebugInfoMetadata.md +++ /dev/null @@ -1,472 +0,0 @@ -# Debug Info Metadata Pipeline - -This note describes how the Miden compiler threads source-level variable -metadata through HIR when compiling Wasm input. The goal is to make every HIR -function carry `DI*` attributes and `debuginfo.*` operations that mirror the -DWARF records present in the Wasm binary, so downstream passes (or tooling -consuming serialized HIR) can reason about user variables. - -## The DebugInfo Dialect - -Debug variable tracking is implemented as a first-class IR dialect -(`midenc-dialect-debuginfo`, namespace `"debuginfo"`), inspired by -[Mojo's DebugInfo dialect](https://llvm.org/devmtg/2024-04/slides/TechnicalTalks/MojoDebugging.pdf). -Unlike metadata-based approaches (e.g., Flang/FIR), debug operations here are -real IR operations with SSA operands, which means: - -- If a transform deletes a value without updating its debug uses, that is a - hard error — not a silent drop. -- Standard `replace_all_uses_with` automatically propagates value replacements - to debug uses. -- The IR verifier catches dangling debug references. - -### Operations - -| Operation | Operands | Purpose | -|-----------|----------|---------| -| `debuginfo.debug_value` | SSA value + `DILocalVariableAttr` + `DIExpressionAttr` | Records the current value of a source variable | -| `debuginfo.debug_declare` | SSA address + `DILocalVariableAttr` | Records the storage location (address) of a variable | -| `debuginfo.debug_kill` | `DILocalVariableAttr` only | Marks a variable as dead at this program point | - -### Design Pillars - -1. **SSA use-def chains** — debug values participate in standard use-def tracking, - making it impossible for transforms to silently lose debug info. -2. **Expression trees** — `DIExpressionAttr` describes how to recover source values - from transformed IR values (encoding the inverse transformation). -3. **Explicit lifetimes** — `debuginfo.debug_kill` provides precise variable death - points instead of relying on scope-based heuristics. - -### Builder API - -The `DebugInfoOpBuilder` trait provides a convenient API for emitting debug ops: - -```rust -// Track a variable's value: -builder.debug_value(ssa_value, variable_attr, span)?; - -// Track with a custom expression (e.g., value needs a dereference): -builder.debug_value_with_expr(ssa_value, variable_attr, Some(expr), span)?; - -// Track a variable's storage address: -builder.debug_declare(address_value, variable_attr, span)?; - -// Mark a variable as dead: -builder.debug_kill(variable_attr, span)?; -``` - -The trait has a blanket implementation for all `Builder` types, so any IR builder -can emit debug operations directly. - -## High-Level Flow - -1. **DWARF ingestion** – while `ModuleEnvironment` parses the module, we retain - the full set of DWARF sections (`.debug_info`, `.debug_line`, etc.) and the - wasm name section. -2. **Metadata extraction** – before we translate functions, we walk the DWARF - using `addr2line` to determine source files and fall back to the wasm module - path when no debug info is present. We also load parameter/local names from - the name section. The result is a `FunctionDebugInfo` record containing a - `DICompileUnitAttr`, `DISubprogramAttr`, and a per-index list of - `DILocalVariableAttr`s. -3. **Translation-time tracking** – every `FuncTranslator` receives the - `FunctionDebugInfo` for the function it is translating. `FunctionBuilderExt` - attaches the compile-unit/subprogram attrs to the function op, records entry - parameters, and emits `debuginfo.debug_value` operations whenever locals change. -4. **Span-aware updates** – as each wasm operator is translated we store the - real `SourceSpan`. The first non-unknown span is used to retroactively patch - the compile unit, subprogram, and parameter variable records with real file, - line, and column information so the resulting HIR references surfaces from - the actual user file. - -The emitted HIR therefore contains both the SSA instructions and the debug -operations that map values back to the user program. - -## HIR Metadata Constructs - -The core attribute types live in `hir/src/attributes/debug.rs`: - -- `DICompileUnitAttr` – captures language, primary file, optional directory, - producer string, and optimized flag. Stored once per function/module. -- `DISubprogramAttr` – names the function, file, line/column, optional linkage - name, and flags indicating definition/local status. Does not embed the compile - unit to avoid redundancy - stored once per function. -- `DILocalVariableAttr` – describes parameters or locals, including the source - location, optional argument index, and optional `Type`. Does not embed the - scope to avoid redundancy - the scope is implied by the containing function. -- `DIExpressionAttr` – represents DWARF location expressions that describe how - to compute or locate a variable's value. -- `DIExpressionOp` – individual operations within a DIExpression, including: - - `WasmLocal(u32)` - Variable is in a WebAssembly local - - `WasmGlobal(u32)` - Variable is in a WebAssembly global - - `WasmStack(u32)` - Variable is on the WebAssembly operand stack - - `ConstU64(u64)` - Unsigned constant value - - Additional DWARF operations for complex expressions - -These attrs are exported from `midenc_hir` so clients can construct them -programmatically. The debug operations (`debuginfo.debug_value`, -`debuginfo.debug_declare`, `debuginfo.debug_kill` from -`dialects/debuginfo/src/ops.rs`) consume SSA values plus the metadata -attributes. The `debug_value` operation includes a `DIExpressionAttr` field -that describes the location or computation of the variable's value. - -## Collecting Metadata from Wasm - -`frontend/wasm/src/module/debug_info.rs` is the central collector. The key -steps are: - -1. Iterate over the bodies scheduled for translation (`ParsedModule::function_body_inputs`). -2. For each body, determine the source file and first line using `addr2line` and - store fallbacks (module path or `unknown`) when debug info is missing. -3. Construct `DICompileUnitAttr`/`DISubprogramAttr` and a `Vec>` - that covers both signature parameters and wasm locals. Parameter/local names - sourced from the name section are used when available; otherwise we emit - synthesized names (`arg{n}`, `local{n}`). -4. Store the result in a map `FxHashMap>>` - attached to `ParsedModule`. We use `RefCell` so later stages can patch the - attrs once the translator sees more accurate spans. - -## Using Metadata During Translation - -The translation machinery picks up those records as follows: - -- `build_ir.rs` moves the precomputed map onto the `FuncTranslator` invocation. -- `FuncTranslator::translate_body` installs the debug info on its - `FunctionBuilderExt` before any instructions are emitted. -- `FunctionBuilderExt::set_debug_metadata` attaches compile-unit/subprogram - attrs to the function op and resets its internal bookkeeping. -- Entry parameters are stored via `register_parameter` so we can emit - `debug_value` operations after we encounter the first real span (parameters - have no dedicated wasm operator with source ranges). -- Every wasm operator calls `builder.record_debug_span(span)` prior to emission; - the first non-unknown span updates the compile unit/subprogram attrs and - triggers parameter `debug_value` emission so arguments are tied to the correct - location. -- `def_var_with_dbg` is the canonical entry point for `local.set` and - `local.tee`. It updates the SSA value and immediately emits a - `debuginfo.debug_value` with the precise span of the store. -- Decoded `DW_AT_location` ranges are normalized into a per-function schedule. - As the translator visits each wasm offset we opportunistically emit extra - `debug_value` operations so source variables track transitions between Wasm - locals without relying on `debuginfo.debug_declare`. -- When present, `DW_AT_decl_line`/`DW_AT_decl_column` on variables override the - default span so we keep the original lexical definition sites instead of - inheriting the statement we first observed during translation. - -Locals declared in the wasm prologue receive an initial value but no debug -operation until they are defined in user code. Subsequent writes insert -additional `debug_value` ops so consumers can track value changes over time. - -## Example - -In the serialized HIR for the test pipeline you now see: - -```hir -debuginfo.debug_value v0 #[expression = di.expression(DW_OP_WASM_local 0)] - #[variable = di.local_variable( - name = arg0, - file = /path/to/lib.rs, - line = 25, - column = 5, - arg = 1, - ty = i32 - )] # /path/to/lib.rs:25:5; -``` - -The `expression` attribute indicates that the variable is stored in WASM local 0. -When a variable moves between locations, additional `debug_value` operations are -emitted with updated expressions: - -```hir -debuginfo.debug_value v22 #[expression = di.expression(DW_OP_WASM_local 3)] - #[variable = di.local_variable(name = sum, ...)] -``` - -Both the attribute and the trailing comment reference the same source location -so downstream tooling can disambiguate the variable regardless of how it parses -HIR. - -## Transform Hooks - -The `debuginfo::transform` module (`dialects/debuginfo/src/transform.rs`) -provides utilities that make it straightforward for transform authors to -maintain debug info across IR transformations. - -### Simple Replacements (Automatic) - -When a transform replaces one value with another (e.g., CSE, copy propagation), -the standard `replace_all_uses_with` automatically updates all debug uses: - -```text -// Before: debuginfo.debug_value %1 #[variable = x] -// rewriter.replace_all_uses_with(%1, %0) -// After: debuginfo.debug_value %0 #[variable = x] -- automatic! -``` - -### Complex Transforms (SalvageAction) - -For transforms that change a value's representation (not just replace it), -the `salvage_debug_info()` function takes a `SalvageAction` describing the -inverse transformation. Available actions: - -| Action | Use Case | Expression Update | -|--------|----------|-------------------| -| `Deref { new_value }` | Value promoted to stack allocation | Prepends `DW_OP_deref` | -| `OffsetBy { new_value, offset }` | Frame pointer adjustment | Appends `const(offset), minus` | -| `WithExpression { new_value, ops }` | Arbitrary complex transform | Appends custom expression ops | -| `Constant { value }` | Constant propagation | Emits `debuginfo.debug_kill` (future: constant expression) | -| `Undef` | Value completely removed | Emits `debuginfo.debug_kill` | - -Example usage in a transform: - -```rust -use midenc_dialect_debuginfo::transform::{salvage_debug_info, SalvageAction}; - -// Value was promoted to memory: -let ptr = builder.alloca(ty, span)?; -builder.store(old_val, ptr, span)?; -salvage_debug_info(&old_val, &SalvageAction::Deref { new_value: ptr }, &mut builder); -``` - -### Helper Functions - -- `is_debug_info_op(op)` — checks if an operation is a debug info op (useful - for DCE to skip debug uses when determining liveness) -- `debug_value_users(value)` — collects all `debuginfo.debug_value` ops that - reference a given value -- `collect_debug_ops(op)` — recursively collects all debug ops within an - operation's regions - -## Guide for Pass Authors - -Debug info operations are **observational** — they observe SSA values but don't -consume them or affect program semantics. This has implications for how passes -should handle them: - -### The Golden Rule - -**Debug uses should never prevent optimizations.** If a value is dead except for -debug uses, treat it as dead. If an operation can be sunk/moved except for debug -uses, sink/move it anyway. - -### What You Get for Free - -1. **Value replacements propagate automatically.** When you call - `replace_all_uses_with(old, new)`, debug uses are updated too. No action needed. - -2. **The verifier catches mistakes.** If you delete an operation without handling - its debug uses, the verifier will report dangling references. This is - intentional — silent debug info loss is worse than a loud failure. - -### What You Must Handle - -#### 1. Dead Code Elimination - -When determining if a value is dead, exclude debug uses: - -```rust -use crate::sink::{has_only_debug_uses, erase_debug_users}; - -// Wrong: debug uses keep the value "alive" -if !value.is_used() { ... } - -// Right: only non-debug uses matter -if has_only_debug_uses(&*value.borrow()) { - erase_debug_users(value); // Clean up debug ops first - defining_op.erase(); // Then erase the definition -} -``` - -#### 2. Sinking / Code Motion - -When checking if an operation can be moved, exclude debug uses from the -dominance check: - -```rust -fn can_sink(&self, op: &Operation, target_region: &Region) -> bool { - op.results().iter().all(|result| { - result.borrow().iter_uses().all(|user| { - // Skip debug uses — they're observational - if is_debug_info_op(&user.owner.borrow()) { - return true; - } - self.dominates(target_region, &user.owner) - }) - }) -} -``` - -Before moving an operation, erase debug uses that would violate dominance: - -```rust -// Erase debug uses outside target region before moving -for result in op.borrow().results().iter() { - erase_debug_users(result.borrow().as_value_ref()); -} -move_op_into_region(op, target_region); -``` - -#### 3. Value Transformations - -When a transform changes how a value is computed (not just replaces it), use -`salvage_debug_info()` to update the debug expressions: - -```rust -use midenc_dialect_debuginfo::transform::{salvage_debug_info, SalvageAction}; - -// Value was promoted to a stack slot: -let ptr = builder.alloca(ty, span)?; -builder.store(old_val, ptr, span)?; -salvage_debug_info(&old_val, &SalvageAction::Deref { new_value: ptr }, &mut builder); - -// Value was completely optimized away: -salvage_debug_info(&old_val, &SalvageAction::Undef, &mut builder); -``` - -#### 4. Deleting Operations - -Always erase debug users before erasing the defining operation: - -```rust -for result in op.borrow().results().iter() { - erase_debug_users(result.borrow().as_value_ref()); -} -op.borrow_mut().erase(); -``` - -### Quick Reference - -| Scenario | Action | -|----------|--------| -| Replacing value A with B | Just use `replace_all_uses_with` — automatic | -| Checking if value is dead | Use `has_only_debug_uses()`, not `is_used()` | -| Moving/sinking an op | Exclude debug uses from dominance checks | -| Before moving an op | Call `erase_debug_users()` on results | -| Before deleting an op | Call `erase_debug_users()` on results | -| Value computation changed | Use `salvage_debug_info()` with appropriate action | -| Value optimized to constant | Use `SalvageAction::Constant` or `::Undef` | - -### Defense in Depth - -The MASM codegen has additional hardening: - -- `DebugValue::emit()` skips emission if the value is not on the stack and has - no location expression (gracefully handles orphaned debug ops) -- `emit_inst()` silently skips debuginfo-dialect ops that have no `HirLowering` - implementation (e.g., `debuginfo.kill`, `debuginfo.declare`) -- `MasmFunctionBuilder::build()` strips debug-only procedure bodies that would - be rejected by the assembler - -These are safety nets, not substitutes for proper debug info handling in passes. - -## Kinda Fallback Behavior/Best Effort cases - -- If DWARF lookup fails entirely, we still emit attrs but populate - `file = unknown`, `line = 0`, and omit columns. As soon as a real span is - observed, those fields are patched. -- If the wasm name section lacks parameter/local names, we keep the generated - `arg{n}`/`local{n}` placeholders in the HIR. This mirrors LLVM’s behavior when - debug names are unavailable. - -## What we can do next and what are the limitations - -- **Location expressions** – We now decode `DW_AT_location` records for locals - and parameters, interpret simple Wasm location opcodes (including locals, - globals, and operand-stack slots), and attach them to `debuginfo.debug_value` - operations as `DIExpressionAttr`. The system emits additional `debug_value` - operations whenever a variable's storage changes, with each operation - containing the appropriate expression. This allows modeling multi-location - lifetimes where variables move between different storage locations. Support - for more complex composite expressions (pieces, arithmetic operations, etc.) - is implemented but not fully utilized from DWARF parsing yet. -- **Lifetimes** – we reset the compile-unit/subprogram metadata to the first - span we encounter, but we do not track scopes or lexical block DIEs. Extending - the collector to read `DW_TAG_lexical_block` and other scope markers would - allow more precise lifetime modelling. -- **Cross-language inputs** – the language string comes from DWARF or defaults - to `"wasm"`. If the Wasm file was produced by Rust/C compilers we could read - `DW_AT_language` to provide richer values. -- **Incremental spans** – parameter debug entries currently use the first - non-unknown span in the function. For multi-file functions we might wish to - attach per-parameter spans using `DW_AT_decl_file`/`DW_AT_decl_line` if the - DWARF provides them. -- **MASM codegen** – The MASM backend emits `Decorator::DebugVar` entries - containing `DebugVarInfo` with variable names, runtime locations - (`DebugVarLocation::Stack`, `Local`, etc.), source positions, and type - information. These decorators are embedded in the MAST instruction stream, - enabling debuggers to track variable values at specific execution points. - - **Local variable FMP offset handling** uses a two-phase approach: - - 1. **During lowering** (`DebugValue::emit()` in `lowering.rs`): When a value - is not on the operand stack (i.e., it was spilled to memory), the emitter - records `DebugVarLocation::Local(wasm_idx)` using the raw WASM local index - from the `DIExpressionOp::WasmLocal` attribute. This index is stable and - known from DWARF. - - 2. **After body is built** (`patch_debug_var_locals_in_block()` in - `component.rs`): Once the entire procedure body is emitted and `num_locals` - is finalized, a fixup pass converts `Local(wasm_idx)` to `Local(fmp_offset)` - where `fmp_offset = wasm_idx - num_wasm_locals`. The FMP offset is negative, - pointing below the frame pointer where spilled locals reside. - - This separation keeps lowering simple (no need to thread `num_locals` through - the emitter) while ensuring correct FMP-relative offsets in the final output. - - **Debug-only procedure bodies**: If a procedure body contains only `DebugVar` - decorators and no real instructions, the MASM codegen strips the decorators - entirely. The Miden assembler rejects such bodies because decorators don't - affect MAST digests—two empty procedures with different decorators would be - indistinguishable. - -These refinements can be implemented without changing the public HIR surface; we -would only update the metadata collector and the builder helpers. - -## Testing - -The debug info implementation is validated by lit tests in `tests/lit/debug/`: - -- **simple_debug.shtest** – verifies basic debug info for function parameters -- **function_metadata.shtest** – tests debug metadata on multi-parameter functions -- **variable_locations.shtest** – validates debug info tracking for variables in a loop -- more... - -Each test compiles a small Rust snippet with DWARF enabled (`-C debuginfo=2`), -runs it through `midenc compile --emit hir`, and uses `FileCheck` to verify that -`debuginfo.debug_value` operations are emitted with the correct `di.local_variable` -attributes containing variable names, file paths, line numbers, and types. - -To run the debug info tests: - -```bash -/opt/homebrew/bin/lit -va tests/lit/debug/ -``` - -Or to run a specific test: - -```bash -/opt/homebrew/bin/lit -va tests/lit/debug/simple_debug.shtest -``` - -## Bottomline - -- Debug variable tracking uses a dedicated `debuginfo` dialect with SSA-based - operations (`debuginfo.debug_value`, `debuginfo.debug_declare`, - `debuginfo.debug_kill`), making debug info a first-class IR citizen that - transforms cannot silently drop. -- HIR exposes DWARF-like metadata via reusable `DI*` attributes including - `DIExpressionAttr` for location expressions. -- The wasm frontend precomputes function metadata, keeps it mutable during - translation, and emits `debuginfo.debug_value` operations with location - expressions for every parameter/variable assignment. -- Transform authors maintain debug info via `salvage_debug_info()` — they only - describe the inverse of their transformation, and the framework updates all - affected debug operations automatically. -- Location expressions (DW_OP_WASM_local, etc.) are preserved from DWARF and - attached to `debug_value` operations, enabling accurate tracking of variables - as they move between different storage locations. -- The serialized HIR describes user variables with accurate file/line/column - information and storage locations, providing a foundation for future tooling - (debugging, diagnostics correlation, or IR-level analysis). -- The design avoids redundancy by not embedding scope hierarchies in each variable, - instead relying on structural containment to establish relationships. From 98b6aee859e98d584fe7368b88fcd2208ec62f2f Mon Sep 17 00:00:00 2001 From: djole Date: Wed, 1 Apr 2026 12:41:36 +0200 Subject: [PATCH 11/32] fixup: debug var loc issues in frontend and codegen --- codegen/masm/src/lower/lowering.rs | 42 ++++++++----------- frontend/wasm/src/module/debug_info.rs | 9 ++-- .../wasm/src/module/function_builder_ext.rs | 18 +++++--- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index 2d5c01eb7..bc16732d9 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1303,48 +1303,40 @@ impl HirLowering for debuginfo::DebugValue { // Value has been dropped and we have no other location info, skip return Ok(()); } + // Resolve the runtime location. Returns None when the location cannot + // be determined (value dropped and no expression info), in which case + // we skip emitting the decorator entirely rather than emitting a + // placeholder — the debugger would have nothing useful to show. let value_location = if let Some(first_op) = expr.operations.first() { match first_op { - DIExpressionOp::WasmStack(offset) => DebugVarLocation::Stack(*offset as u8), + DIExpressionOp::WasmStack(offset) => Some(DebugVarLocation::Stack(*offset as u8)), DIExpressionOp::WasmLocal(idx) => { // WASM locals are always stored in memory via FMP in Miden. // Store raw WASM local index; the FMP offset will be computed // later in MasmFunctionBuilder::build() when num_locals is known. - DebugVarLocation::Local(*idx as i16) + i16::try_from(*idx).ok().map(DebugVarLocation::Local) } DIExpressionOp::WasmGlobal(_) | DIExpressionOp::Deref => { - // For global or dereference, check the stack position of the value - if let Some(pos) = emitter.stack.find(&value) { - DebugVarLocation::Stack(pos as u8) - } else { - DebugVarLocation::Expression(vec![]) - } + emitter.stack.find(&value).map(|pos| DebugVarLocation::Stack(pos as u8)) } - DIExpressionOp::ConstU64(val) => DebugVarLocation::Const(Felt::new(*val)), - DIExpressionOp::ConstS64(val) => DebugVarLocation::Const(Felt::new(*val as u64)), + DIExpressionOp::ConstU64(val) => Some(DebugVarLocation::Const(Felt::new(*val))), + DIExpressionOp::ConstS64(val) => Some(DebugVarLocation::Const(Felt::new(*val as u64))), DIExpressionOp::FrameBase { global_index, byte_offset } => { - DebugVarLocation::FrameBase { + Some(DebugVarLocation::FrameBase { global_index: *global_index, byte_offset: *byte_offset, - } + }) } _ => { - // For other operations, try to find the value on the stack - if let Some(pos) = emitter.stack.find(&value) { - DebugVarLocation::Stack(pos as u8) - } else { - DebugVarLocation::Expression(vec![]) - } + emitter.stack.find(&value).map(|pos| DebugVarLocation::Stack(pos as u8)) } } } else { - // No expression, try to find the value on the stack - if let Some(pos) = emitter.stack.find(&value) { - DebugVarLocation::Stack(pos as u8) - } else { - // Value not found, use expression - DebugVarLocation::Expression(vec![]) - } + emitter.stack.find(&value).map(|pos| DebugVarLocation::Stack(pos as u8)) + }; + + let Some(value_location) = value_location else { + return Ok(()); }; let mut debug_var = DebugVarInfo::new(var.name.to_string(), value_location); diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs index d9ce731a6..c69c137d4 100644 --- a/frontend/wasm/src/module/debug_info.rs +++ b/frontend/wasm/src/module/debug_info.rs @@ -564,13 +564,14 @@ fn resolve_subprogram_target>( }, gimli::DW_AT_frame_base => { // Decode the frame base expression to find which WASM global - // provides the base address (typically __stack_pointer = global 0) + // provides the base address (typically __stack_pointer = global 0). + // Only WASM globals are supported — downstream FrameBase resolution + // assumes the index refers to a global in the linker's layout. if let AttributeValue::Exprloc(expr) = attr.value() { let mut ops = expr.operations(unit.encoding()); while let Ok(Some(op)) = ops.next() { - if let Operation::WasmLocal { index } = op { - // Frame base is a WASM local (unusual but possible) - frame_base_global = Some(index); + if let Operation::WasmLocal { .. } = op { + debug!("DW_AT_frame_base uses WASM local; only globals are supported — ignoring"); } else if let Operation::WasmGlobal { index } = op { frame_base_global = Some(index); } diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index 8c0674a8b..1fce8f210 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -275,14 +275,22 @@ impl FunctionBuilderExt<'_, B> { let value = match self.try_use_var(var) { Ok(v) => v, Err(_) => { - // For FrameBase-only variables (no WASM local), use a dummy SSA value. - // The FrameBase expression will override the value's stack position. if matches!(&entry.storage, VariableStorage::FrameBase { .. }) { + // FrameBase-only variables have no WASM local, so no SSA value + // exists for them. The debuginfo.value op requires an SSA operand, + // so we attach an existing parameter value as an anchor. The MASM + // lowering ignores this operand when the DIExpression contains + // FrameBase — the location is fully described by the expression. if let Some((_, v)) = self.param_values.first() { - let dummy = *v; - self.def_var(var, dummy); - dummy + let anchor = *v; + self.def_var(var, anchor); + anchor } else { + warn!( + "cannot track FrameBase variable (index {}): \ + no SSA value available (function has no parameters)", + entry.var_index + ); return; } } else { From f3426e6e4919a814c6c906abca1925ac9a30f9b2 Mon Sep 17 00:00:00 2001 From: djole Date: Wed, 1 Apr 2026 13:34:42 +0200 Subject: [PATCH 12/32] fix: update lit tests for current HIR output and debugdump formats --- tests/lit/debug/function_metadata.shtest | 6 +++--- tests/lit/debug/location_expressions.shtest | 6 +++--- tests/lit/debug/simple_debug.shtest | 6 +++--- tests/lit/debug/variable_locations.shtest | 6 +++--- tests/lit/debugdump/locations-source-loc.wat | 14 ++++++------- tests/lit/debugdump/locations.wat | 4 ++-- tests/lit/debugdump/simple.wat | 2 +- tests/lit/debugdump/summary.wat | 2 +- .../source-location/test-project/Cargo.toml | 17 ++++++++++++++++ .../source-location/test-project/src/lib.rs | 20 +++++++++++++++++++ 10 files changed, 60 insertions(+), 23 deletions(-) create mode 100644 tests/lit/source-location/test-project/Cargo.toml create mode 100644 tests/lit/source-location/test-project/src/lib.rs diff --git a/tests/lit/debug/function_metadata.shtest b/tests/lit/debug/function_metadata.shtest index 23434d6fc..5909e6eea 100644 --- a/tests/lit/debug/function_metadata.shtest +++ b/tests/lit/debug/function_metadata.shtest @@ -1,6 +1,6 @@ # Test that HIR includes source locations for function parameters -# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/function_metadata.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/function_metadata.wasm\" && bin/midenc \"\$TMPDIR/function_metadata.wasm\" --entrypoint=multiply -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/function_metadata.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/function_metadata.wasm\" && bin/midenc \"\$TMPDIR/function_metadata.wasm\" --entrypoint=function_metadata::multiply -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s # Check that function has source location annotations -# CHECK-LABEL: builtin.function @multiply -# CHECK: #loc("{{.*}}function_metadata.rs":{{[0-9]+}} +# CHECK-LABEL: builtin.function{{.*}}@multiply +# CHECK: loc({{.*}}function_metadata.rs:{{[0-9]+}} diff --git a/tests/lit/debug/location_expressions.shtest b/tests/lit/debug/location_expressions.shtest index 867d6e1b4..0d7ca8088 100644 --- a/tests/lit/debug/location_expressions.shtest +++ b/tests/lit/debug/location_expressions.shtest @@ -1,9 +1,9 @@ # Test that debug info with source locations is properly represented in HIR # This test verifies that operations include source location annotations -# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/location_expressions.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/location_expressions.wasm\" && bin/midenc \"\$TMPDIR/location_expressions.wasm\" --entrypoint=test_expressions -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/location_expressions.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/location_expressions.wasm\" && bin/midenc \"\$TMPDIR/location_expressions.wasm\" --entrypoint=location_expressions::test_expressions -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s # Test that the function exists with 4 parameters -# CHECK-LABEL: builtin.function @test_expressions({{.*}}: i32, {{.*}}: i32, {{.*}}: i32, {{.*}}: i32) -> i32 +# CHECK-LABEL: builtin.function{{.*}}@test_expressions({{.*}}: i32, {{.*}}: i32, {{.*}}: i32, {{.*}}: i32) -> i32 # Test that operations have source location annotations -# CHECK: #loc("{{.*}}location_expressions.rs":{{[0-9]+}} +# CHECK: loc({{.*}}location_expressions.rs:{{[0-9]+}} diff --git a/tests/lit/debug/simple_debug.shtest b/tests/lit/debug/simple_debug.shtest index 4df8ec78f..073ec66e3 100644 --- a/tests/lit/debug/simple_debug.shtest +++ b/tests/lit/debug/simple_debug.shtest @@ -1,6 +1,6 @@ # Test that basic debug info source locations are emitted for a simple function -# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/simple_debug.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/simple_debug.wasm\" && bin/midenc \"\$TMPDIR/simple_debug.wasm\" --entrypoint=add -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/simple_debug.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/simple_debug.wasm\" && bin/midenc \"\$TMPDIR/simple_debug.wasm\" --entrypoint=simple_debug::add -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s # Check that function has source location annotations -# CHECK-LABEL: builtin.function @add -# CHECK: #loc("{{.*}}simple_debug.rs":{{[0-9]+}} +# CHECK-LABEL: builtin.function{{.*}}@add +# CHECK: loc({{.*}}simple_debug.rs:{{[0-9]+}} diff --git a/tests/lit/debug/variable_locations.shtest b/tests/lit/debug/variable_locations.shtest index 5e6861546..2ddf07a5a 100644 --- a/tests/lit/debug/variable_locations.shtest +++ b/tests/lit/debug/variable_locations.shtest @@ -1,6 +1,6 @@ # Test that debug info tracks source locations in a loop -# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/variable_locations.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/variable_locations.wasm\" && bin/midenc \"\$TMPDIR/variable_locations.wasm\" --entrypoint=entrypoint -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s +# RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && rustc 'tests/lit/debug/variable_locations.rs' --crate-type=cdylib --target wasm32-unknown-unknown -C opt-level=0 -C panic=abort -C debuginfo=2 -o \"\$TMPDIR/variable_locations.wasm\" && bin/midenc \"\$TMPDIR/variable_locations.wasm\" --entrypoint=variable_locations::entrypoint -Z print-hir-source-locations --emit hir=- 2>&1" | filecheck %s # Check that function has source location annotations -# CHECK-LABEL: builtin.function @entrypoint -# CHECK: #loc("{{.*}}variable_locations.rs":{{[0-9]+}} +# CHECK-LABEL: builtin.function{{.*}}@entrypoint +# CHECK: loc({{.*}}variable_locations.rs:{{[0-9]+}} diff --git a/tests/lit/debugdump/locations-source-loc.wat b/tests/lit/debugdump/locations-source-loc.wat index 9b9441af3..5d95d7056 100644 --- a/tests/lit/debugdump/locations-source-loc.wat +++ b/tests/lit/debugdump/locations-source-loc.wat @@ -1,25 +1,25 @@ -;; Test that .debug_loc section shows DebugVar decorators with source locations +;; Test that .debug_loc section shows DebugVar entries with source locations ;; from a real Rust project compiled with debug info. ;; ;; RUN: cargo build --release --target wasm32-unknown-unknown --manifest-path tests/lit/source-location/test-project/Cargo.toml 2>&1 ;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc 'tests/lit/source-location/test-project/target/wasm32-unknown-unknown/release/source_location_test.wasm' --lib --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s ;; Check header -;; CHECK: .debug_loc contents (DebugVar decorators from MAST): -;; CHECK: Total DebugVar decorators: 3 +;; CHECK: .debug_loc contents (DebugVar entries from MAST): +;; CHECK: Total DebugVar entries: 4 ;; CHECK: Unique variable names: 3 ;; Check variable "arg0" - parameter from test_assertion function ;; CHECK: Variable: "arg0" ;; CHECK: 1 location entries: -;; CHECK: local[0] (param #2) +;; CHECK: FMP-4 (param #2) ;; Check variable "local3" - from panic handler ;; CHECK: Variable: "local3" ;; CHECK: 1 location entries: -;; CHECK: stack[0] +;; CHECK: FMP-1 ;; Check variable "x" - parameter from entrypoint function ;; CHECK: Variable: "x" -;; CHECK: 1 location entries: -;; CHECK: local[0] (param #2) +;; CHECK: 2 location entries: +;; CHECK: FMP-4 (param #2) diff --git a/tests/lit/debugdump/locations.wat b/tests/lit/debugdump/locations.wat index aead42315..3b05ff93c 100644 --- a/tests/lit/debugdump/locations.wat +++ b/tests/lit/debugdump/locations.wat @@ -2,9 +2,9 @@ ;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc '%s' --exe --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s ;; Check header for .debug_loc section -;; CHECK: .debug_loc contents (DebugVar decorators from MAST): +;; CHECK: .debug_loc contents (DebugVar entries from MAST): ;; For raw WAT files without debug info, we expect no decorators -;; CHECK: (no DebugVar decorators found) +;; CHECK: (no DebugVar entries found) (module (func $add (export "add") (param i32 i32) (result i32) diff --git a/tests/lit/debugdump/simple.wat b/tests/lit/debugdump/simple.wat index 3d9312e19..62426740c 100644 --- a/tests/lit/debugdump/simple.wat +++ b/tests/lit/debugdump/simple.wat @@ -3,7 +3,7 @@ ;; Check header ;; CHECK: DEBUG INFO DUMP: -;; CHECK: Debug info version: 1 +;; CHECK: Debug info versions: ;; Check summary section is present ;; CHECK: .debug_info summary: diff --git a/tests/lit/debugdump/summary.wat b/tests/lit/debugdump/summary.wat index a127c8d07..ae73b73dc 100644 --- a/tests/lit/debugdump/summary.wat +++ b/tests/lit/debugdump/summary.wat @@ -3,7 +3,7 @@ ;; Check summary is present ;; CHECK: .debug_info summary: -;; CHECK: Strings:{{.*}}entries +;; CHECK: Strings: ;; CHECK: Types:{{.*}}entries ;; CHECK: Files:{{.*}}entries ;; CHECK: Functions:{{.*}}entries diff --git a/tests/lit/source-location/test-project/Cargo.toml b/tests/lit/source-location/test-project/Cargo.toml new file mode 100644 index 000000000..9abe2f178 --- /dev/null +++ b/tests/lit/source-location/test-project/Cargo.toml @@ -0,0 +1,17 @@ +cargo-features = ["trim-paths"] + +[package] +name = "source_location_test" +version = "0.1.0" +edition = "2024" + +[lib] +crate-type = ["cdylib"] + +[profile.release] +debug = true +trim-paths = ["diagnostics", "object"] + +[profile.dev] +debug = true +trim-paths = ["diagnostics", "object"] diff --git a/tests/lit/source-location/test-project/src/lib.rs b/tests/lit/source-location/test-project/src/lib.rs new file mode 100644 index 000000000..35082cd10 --- /dev/null +++ b/tests/lit/source-location/test-project/src/lib.rs @@ -0,0 +1,20 @@ +#![no_std] +#![no_main] + +#[panic_handler] +fn my_panic(_info: &core::panic::PanicInfo) -> ! { + core::arch::wasm32::unreachable() +} + +#[unsafe(no_mangle)] +pub extern "C" fn test_assertion(x: u32) -> u32 { + assert!(x > 100, "x should be greater than 100"); + + x +} + +#[unsafe(no_mangle)] +#[inline(never)] +pub fn entrypoint(x: u32) -> u32 { + test_assertion(x) +} From 3351d0fb8c57d023a35ff3c111dc72116473029c Mon Sep 17 00:00:00 2001 From: djole Date: Wed, 1 Apr 2026 14:33:15 +0200 Subject: [PATCH 13/32] fix: adapt debugdump to upstream Package.version API change --- tools/debugdump/src/main.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/debugdump/src/main.rs b/tools/debugdump/src/main.rs index 08f1e9faa..fec90e710 100644 --- a/tools/debugdump/src/main.rs +++ b/tools/debugdump/src/main.rs @@ -182,11 +182,7 @@ fn run() -> Result<(), Error> { println!( "Package: {} (version: {})", package.name, - package - .version - .as_ref() - .map(|v| v.to_string()) - .unwrap_or_else(|| "unknown".into()) + package.version ); println!( "Debug info versions: types={}, sources={}, functions={}", From 188a5749f416e253500324ffd5e9e10f79bae9a5 Mon Sep 17 00:00:00 2001 From: djole Date: Wed, 1 Apr 2026 14:46:17 +0200 Subject: [PATCH 14/32] chore: fix formatting --- codegen/masm/src/lower/component.rs | 22 ++++- codegen/masm/src/lower/lowering.rs | 24 +++-- dialects/debuginfo/src/ops.rs | 2 +- dialects/scf/src/transforms/cfg_to_scf.rs | 32 ++----- frontend/wasm/src/module/debug_info.rs | 93 ++++++++++++------- .../wasm/src/module/function_builder_ext.rs | 12 +-- hir-transform/src/sink.rs | 10 +- hir/src/attributes/debug.rs | 7 +- midenc-compile/src/debug_info.rs | 8 +- tools/debugdump/src/main.rs | 79 ++++------------ 10 files changed, 139 insertions(+), 150 deletions(-) diff --git a/codegen/masm/src/lower/component.rs b/codegen/masm/src/lower/component.rs index bc0b13024..b5b704727 100644 --- a/codegen/masm/src/lower/component.rs +++ b/codegen/masm/src/lower/component.rs @@ -726,7 +726,9 @@ fn patch_debug_var_locals_in_block( // Convert raw WASM local index to FMP offset let fmp_offset = *idx - (aligned_num_locals as i16); info.set_value_location(DebugVarLocation::Local(fmp_offset)); - } else if let DebugVarLocation::FrameBase { byte_offset, .. } = info.value_location() { + } else if let DebugVarLocation::FrameBase { byte_offset, .. } = + info.value_location() + { // Resolve FrameBase: replace WASM global index with // the Miden memory address of the stack pointer global. if let Some(resolved_addr) = stack_pointer_addr { @@ -739,15 +741,25 @@ fn patch_debug_var_locals_in_block( } } } - masm::Op::If { then_blk, else_blk, .. } => { + masm::Op::If { + then_blk, else_blk, .. + } => { patch_debug_var_locals_in_block(then_blk, aligned_num_locals, stack_pointer_addr); patch_debug_var_locals_in_block(else_blk, aligned_num_locals, stack_pointer_addr); } - masm::Op::While { body: while_body, .. } => { + masm::Op::While { + body: while_body, .. + } => { patch_debug_var_locals_in_block(while_body, aligned_num_locals, stack_pointer_addr); } - masm::Op::Repeat { body: repeat_body, .. } => { - patch_debug_var_locals_in_block(repeat_body, aligned_num_locals, stack_pointer_addr); + masm::Op::Repeat { + body: repeat_body, .. + } => { + patch_debug_var_locals_in_block( + repeat_body, + aligned_num_locals, + stack_pointer_addr, + ); } } } diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index bc16732d9..cdbba0836 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1277,7 +1277,10 @@ impl HirLowering for debuginfo::DebugValue { } fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { - use miden_core::{Felt, operations::{DebugVarInfo, DebugVarLocation}}; + use miden_core::{ + Felt, + operations::{DebugVarInfo, DebugVarLocation}, + }; use midenc_hir::DIExpressionOp; // Get the variable info @@ -1320,16 +1323,17 @@ impl HirLowering for debuginfo::DebugValue { emitter.stack.find(&value).map(|pos| DebugVarLocation::Stack(pos as u8)) } DIExpressionOp::ConstU64(val) => Some(DebugVarLocation::Const(Felt::new(*val))), - DIExpressionOp::ConstS64(val) => Some(DebugVarLocation::Const(Felt::new(*val as u64))), - DIExpressionOp::FrameBase { global_index, byte_offset } => { - Some(DebugVarLocation::FrameBase { - global_index: *global_index, - byte_offset: *byte_offset, - }) - } - _ => { - emitter.stack.find(&value).map(|pos| DebugVarLocation::Stack(pos as u8)) + DIExpressionOp::ConstS64(val) => { + Some(DebugVarLocation::Const(Felt::new(*val as u64))) } + DIExpressionOp::FrameBase { + global_index, + byte_offset, + } => Some(DebugVarLocation::FrameBase { + global_index: *global_index, + byte_offset: *byte_offset, + }), + _ => emitter.stack.find(&value).map(|pos| DebugVarLocation::Stack(pos as u8)), } } else { emitter.stack.find(&value).map(|pos| DebugVarLocation::Stack(pos as u8)) diff --git a/dialects/debuginfo/src/ops.rs b/dialects/debuginfo/src/ops.rs index 1c948e24f..f60a4dcd9 100644 --- a/dialects/debuginfo/src/ops.rs +++ b/dialects/debuginfo/src/ops.rs @@ -2,9 +2,9 @@ use midenc_hir::{ DIExpressionAttr, DILocalVariableAttr, UnsafeIntrusiveEntityRef, derive::operation, traits::AnyType, }; + // Note: DILocalVariableAttr and DIExpressionAttr are now the generated wrapper // types from #[derive(DialectAttribute)] on DILocalVariable and DIExpression. - use crate::DebugInfoDialect; pub type DebugValueRef = UnsafeIntrusiveEntityRef; diff --git a/dialects/scf/src/transforms/cfg_to_scf.rs b/dialects/scf/src/transforms/cfg_to_scf.rs index e6b1c15f9..0bd6834c4 100644 --- a/dialects/scf/src/transforms/cfg_to_scf.rs +++ b/dialects/scf/src/transforms/cfg_to_scf.rs @@ -870,49 +870,33 @@ mod tests { let block = builder.current_block(); let input = block.borrow().arguments()[0].upcast(); - let input_var = DILocalVariable::new( - Symbol::intern("input"), - Symbol::intern("test.rs"), - 1, - Some(1), - ); - let result_var = DILocalVariable::new( - Symbol::intern("result"), - Symbol::intern("test.rs"), - 2, - Some(1), - ); + let input_var = + DILocalVariable::new(Symbol::intern("input"), Symbol::intern("test.rs"), 1, Some(1)); + let result_var = + DILocalVariable::new(Symbol::intern("result"), Symbol::intern("test.rs"), 2, Some(1)); let zero = builder.u32(0, span); let is_zero = builder.eq(input, zero, span)?; // Track the input variable - builder - .builder_mut() - .debug_value(input, input_var.clone(), span)?; + builder.builder_mut().debug_value(input, input_var.clone(), span)?; builder.cond_br(is_zero, if_is_zero, [], if_is_nonzero, [], span)?; builder.switch_to_block(if_is_zero); let a = builder.incr(input, span)?; // Track result in then-branch - builder - .builder_mut() - .debug_value(a, result_var.clone(), span)?; + builder.builder_mut().debug_value(a, result_var.clone(), span)?; builder.br(exit_block, [a], span)?; builder.switch_to_block(if_is_nonzero); let b = builder.mul(input, input, span)?; // Track result in else-branch - builder - .builder_mut() - .debug_value(b, result_var.clone(), span)?; + builder.builder_mut().debug_value(b, result_var.clone(), span)?; builder.br(exit_block, [b], span)?; builder.switch_to_block(exit_block); // KEY: this debug_value uses the block argument `return_val`, which will be // replaced by the scf.if result via replace_all_uses_with - builder - .builder_mut() - .debug_value(return_val, result_var.clone(), span)?; + builder.builder_mut().debug_value(return_val, result_var.clone(), span)?; builder.ret(Some(return_val), span)?; let operation = function.as_operation_ref(); diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs index c69c137d4..af5982220 100644 --- a/frontend/wasm/src/module/debug_info.rs +++ b/frontend/wasm/src/module/debug_info.rs @@ -7,8 +7,8 @@ use cranelift_entity::EntityRef; use gimli::{self, AttributeValue, read::Operation}; use log::debug; use midenc_hir::{ - DICompileUnit, DIExpression, DIExpressionOp, DILocalVariable, DISubprogram, - FxHashMap, SourceSpan, interner::Symbol, + DICompileUnit, DIExpression, DIExpressionOp, DILocalVariable, DISubprogram, FxHashMap, + SourceSpan, interner::Symbol, }; use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic}; @@ -35,7 +35,10 @@ pub enum VariableStorage { Stack(u32), ConstU64(u64), /// Frame base (global index) + byte offset — from DW_OP_fbreg - FrameBase { global_index: u32, byte_offset: i64 }, + FrameBase { + global_index: u32, + byte_offset: i64, + }, Unsupported, } @@ -53,12 +56,13 @@ impl VariableStorage { VariableStorage::Global(idx) => DIExpressionOp::WasmGlobal(*idx), VariableStorage::Stack(idx) => DIExpressionOp::WasmStack(*idx), VariableStorage::ConstU64(val) => DIExpressionOp::ConstU64(*val), - VariableStorage::FrameBase { global_index, byte_offset } => { - DIExpressionOp::FrameBase { - global_index: *global_index, - byte_offset: *byte_offset, - } - } + VariableStorage::FrameBase { + global_index, + byte_offset, + } => DIExpressionOp::FrameBase { + global_index: *global_index, + byte_offset: *byte_offset, + }, VariableStorage::Unsupported => { DIExpressionOp::Unsupported(Symbol::intern("unsupported")) } @@ -268,12 +272,8 @@ fn build_local_debug_info( { name_symbol = symbol; } - let mut attr = DILocalVariable::new( - name_symbol, - subprogram.file, - subprogram.line, - subprogram.column, - ); + let mut attr = + DILocalVariable::new(name_symbol, subprogram.file, subprogram.line, subprogram.column); attr.arg_index = Some((param_idx + 1) as u32); if let Ok(ty) = ir_type(*wasm_ty, diagnostics) { attr.ty = Some(ty); @@ -366,12 +366,8 @@ fn build_local_debug_info( if let Some(fb_vars) = frame_base_vars { for fb_var in fb_vars { let name = fb_var.name.unwrap_or_else(|| Symbol::intern("?")); - let mut attr = DILocalVariable::new( - name, - subprogram.file, - subprogram.line, - subprogram.column, - ); + let mut attr = + DILocalVariable::new(name, subprogram.file, subprogram.line, subprogram.column); if let Some(line) = fb_var.decl_line.filter(|l| *l != 0) { attr.line = line; } @@ -496,7 +492,10 @@ fn collect_dwarf_local_data( &mut results, &mut fb_results, ) { - debug!("failed to gather variables for function {:?}: {err:?}", info.func_index); + debug!( + "failed to gather variables for function {:?}: {err:?}", + info.func_index + ); } } } @@ -571,7 +570,10 @@ fn resolve_subprogram_target>( let mut ops = expr.operations(unit.encoding()); while let Ok(Some(op)) = ops.next() { if let Operation::WasmLocal { .. } = op { - debug!("DW_AT_frame_base uses WASM local; only globals are supported — ignoring"); + debug!( + "DW_AT_frame_base uses WASM local; only globals are supported — \ + ignoring" + ); } else if let Operation::WasmGlobal { index } = op { frame_base_global = Some(index); } @@ -620,8 +622,16 @@ fn collect_subprogram_variables>( let mut param_counter: u32 = 0; while let Some(child) = children.next()? { walk_variable_nodes( - dwarf, unit, child, func_index, low_pc, high_pc, frame_base_global, results, - fb_results, &mut param_counter, + dwarf, + unit, + child, + func_index, + low_pc, + high_pc, + frame_base_global, + results, + fb_results, + &mut param_counter, )?; } Ok(()) @@ -653,9 +663,16 @@ fn walk_variable_nodes>( None }; let mut fb_vars = Vec::new(); - if let Some((local_index, mut data)) = - decode_variable_entry(dwarf, unit, entry, low_pc, high_pc, frame_base_global, fallback_index, &mut fb_vars)? - { + if let Some((local_index, mut data)) = decode_variable_entry( + dwarf, + unit, + entry, + low_pc, + high_pc, + frame_base_global, + fallback_index, + &mut fb_vars, + )? { let local_map = results.entry(func_index).or_default(); let entry = local_map.entry(local_index).or_insert_with(DwarfLocalData::default); entry.name = entry.name.or(data.name); @@ -675,8 +692,16 @@ fn walk_variable_nodes>( let mut children = node.children(); while let Some(child) = children.next()? { walk_variable_nodes( - dwarf, unit, child, func_index, low_pc, high_pc, frame_base_global, results, - fb_results, param_counter, + dwarf, + unit, + child, + func_index, + low_pc, + high_pc, + frame_base_global, + results, + fb_results, + param_counter, )?; } Ok(()) @@ -781,8 +806,12 @@ fn decode_variable_entry>( let mut has_frame_base = false; while let Some(entry) = iter.next()? { let storage_expr = entry.data; - if let Some(storage) = decode_storage_from_expression(&storage_expr, unit, frame_base_global)? { - if storage.as_local().is_some() || matches!(&storage, VariableStorage::FrameBase { .. }) { + if let Some(storage) = + decode_storage_from_expression(&storage_expr, unit, frame_base_global)? + { + if storage.as_local().is_some() + || matches!(&storage, VariableStorage::FrameBase { .. }) + { if matches!(&storage, VariableStorage::FrameBase { .. }) { has_frame_base = true; } diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index 1fce8f210..948bad457 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -11,8 +11,8 @@ use midenc_dialect_hir::HirOpBuilder; use midenc_dialect_ub::UndefinedBehaviorOpBuilder; use midenc_dialect_wasm::WasmOpBuilder; use midenc_hir::{ - BlockRef, Builder, Context, EntityRef, FxHashMap, FxHashSet, Ident, Listener, ListenerType, - Op, OpBuilder, OperationRef, ProgramPoint, RegionRef, SmallVec, SourceSpan, Type, ValueRef, + BlockRef, Builder, Context, EntityRef, FxHashMap, FxHashSet, Ident, Listener, ListenerType, Op, + OpBuilder, OperationRef, ProgramPoint, RegionRef, SmallVec, SourceSpan, Type, ValueRef, dialects::builtin::{ BuiltinOpBuilder, FunctionBuilder, FunctionRef, attributes::{LocalVariable, Signature}, @@ -287,8 +287,8 @@ impl FunctionBuilderExt<'_, B> { anchor } else { warn!( - "cannot track FrameBase variable (index {}): \ - no SSA value available (function has no parameters)", + "cannot track FrameBase variable (index {}): no SSA value available \ + (function has no parameters)", entry.var_index ); return; @@ -325,8 +325,8 @@ impl FunctionBuilderExt<'_, B> { attr.column = column; } - if let Err(err) = - DebugInfoOpBuilder::builder_mut(self).debug_value_with_expr(value, attr, expression, span) + if let Err(err) = DebugInfoOpBuilder::builder_mut(self) + .debug_value_with_expr(value, attr, expression, span) { warn!("failed to emit scheduled dbg.value for local {idx}: {err:?}"); } diff --git a/hir-transform/src/sink.rs b/hir-transform/src/sink.rs index 0474c2556..d8b6782bf 100644 --- a/hir-transform/src/sink.rs +++ b/hir-transform/src/sink.rs @@ -25,9 +25,9 @@ fn is_debug_info_op(op: &Operation) -> bool { /// Debug uses are excluded because they are observational and should never /// prevent value-producing operations from being moved or eliminated. fn is_sole_non_debug_user(value: &dyn Value, operation: OperationRef) -> bool { - value.iter_uses().all(|user| { - user.owner == operation || is_debug_info_op(&user.owner.borrow()) - }) + value + .iter_uses() + .all(|user| user.owner == operation || is_debug_info_op(&user.owner.borrow())) } /// Returns `true` if the only remaining uses of the given value are debug info uses @@ -325,8 +325,8 @@ impl Pass for SinkOperandDefs { // now if it has no side effects. let is_memory_effect_free = op.is_memory_effect_free() || op.implements::(); - let only_debug_uses = !op.is_used() - || op.results().iter().all(|r| has_only_debug_uses(&*r.borrow())); + let only_debug_uses = + !op.is_used() || op.results().iter().all(|r| has_only_debug_uses(&*r.borrow())); if only_debug_uses && is_memory_effect_free && !op.implements::() diff --git a/hir/src/attributes/debug.rs b/hir/src/attributes/debug.rs index b004d6ec2..7e3421eb9 100644 --- a/hir/src/attributes/debug.rs +++ b/hir/src/attributes/debug.rs @@ -305,9 +305,10 @@ impl PrettyPrint for DIExpression { DIExpressionOp::BitPiece { size, offset } => { text(format!("DW_OP_bit_piece {} {}", size, offset)) } - DIExpressionOp::FrameBase { global_index, byte_offset } => { - text(format!("DW_OP_fbreg global[{}]{:+}", global_index, byte_offset)) - } + DIExpressionOp::FrameBase { + global_index, + byte_offset, + } => text(format!("DW_OP_fbreg global[{}]{:+}", global_index, byte_offset)), DIExpressionOp::Unsupported(name) => text(name.as_str()), }; } diff --git a/midenc-compile/src/debug_info.rs b/midenc-compile/src/debug_info.rs index 481fbc92e..688f2a2a2 100644 --- a/midenc-compile/src/debug_info.rs +++ b/midenc-compile/src/debug_info.rs @@ -154,8 +154,8 @@ impl DebugInfoBuilder { fn collect_from_function(&mut self, function: &builtin::Function) { // Try to get DISubprogram from the function's attributes - let subprogram_attr = function - .get_attribute(midenc_hir::interner::Symbol::intern("di.subprogram")); + let subprogram_attr = + function.get_attribute(midenc_hir::interner::Symbol::intern("di.subprogram")); let subprogram = subprogram_attr.and_then(|attr| { let borrowed = attr.borrow(); @@ -308,9 +308,7 @@ fn hir_type_to_debug_type(ty: &Type, builder: &mut DebugInfoBuilder) -> DebugTyp fn type_to_key(ty: &DebugTypeInfo) -> TypeKey { match ty { DebugTypeInfo::Primitive(p) => TypeKey::Primitive(*p as u8), - DebugTypeInfo::Pointer { pointee_type_idx } => { - TypeKey::Pointer(pointee_type_idx.as_u32()) - } + DebugTypeInfo::Pointer { pointee_type_idx } => TypeKey::Pointer(pointee_type_idx.as_u32()), DebugTypeInfo::Array { element_type_idx, count, diff --git a/tools/debugdump/src/main.rs b/tools/debugdump/src/main.rs index fec90e710..2ffc3574e 100644 --- a/tools/debugdump/src/main.rs +++ b/tools/debugdump/src/main.rs @@ -139,18 +139,9 @@ fn run() -> Result<(), Error> { let mast_forest = package.mast.mast_forest(); // Find the three debug sections - let types_section = package - .sections - .iter() - .find(|s| s.id == SectionId::DEBUG_TYPES); - let sources_section = package - .sections - .iter() - .find(|s| s.id == SectionId::DEBUG_SOURCES); - let functions_section = package - .sections - .iter() - .find(|s| s.id == SectionId::DEBUG_FUNCTIONS); + let types_section = package.sections.iter().find(|s| s.id == SectionId::DEBUG_TYPES); + let sources_section = package.sections.iter().find(|s| s.id == SectionId::DEBUG_SOURCES); + let functions_section = package.sections.iter().find(|s| s.id == SectionId::DEBUG_FUNCTIONS); // We need at least one section to proceed if types_section.is_none() && sources_section.is_none() && functions_section.is_none() { @@ -174,16 +165,16 @@ fn run() -> Result<(), Error> { None => DebugFunctionsSection::new(), }; - let debug_sections = DebugSections { types, sources, functions }; + let debug_sections = DebugSections { + types, + sources, + functions, + }; // Print header println!("{}", "=".repeat(80)); println!("DEBUG INFO DUMP: {}", cli.input.display()); - println!( - "Package: {} (version: {})", - package.name, - package.version - ); + println!("Package: {} (version: {})", package.name, package.version); println!( "Debug info versions: types={}, sources={}, functions={}", debug_sections.types.version, @@ -234,23 +225,12 @@ fn print_summary(debug_sections: &DebugSections, mast_forest: &MastForest) { ); println!(" Types: {} entries", debug_sections.types.types.len()); println!(" Files: {} entries", debug_sections.sources.files.len()); - println!( - " Functions: {} entries", - debug_sections.functions.functions.len() - ); + println!(" Functions: {} entries", debug_sections.functions.functions.len()); - let total_vars: usize = debug_sections - .functions - .functions - .iter() - .map(|f| f.variables.len()) - .sum(); - let total_inlined: usize = debug_sections - .functions - .functions - .iter() - .map(|f| f.inlined_calls.len()) - .sum(); + let total_vars: usize = + debug_sections.functions.functions.iter().map(|f| f.variables.len()).sum(); + let total_inlined: usize = + debug_sections.functions.functions.iter().map(|f| f.inlined_calls.len()).sum(); println!(" Variables: {} total (across all functions)", total_vars); println!(" Inlined: {} call sites", total_inlined); @@ -294,11 +274,7 @@ fn print_type(ty: &DebugTypeInfo, debug_sections: &DebugSections, raw: bool, ind match ty { DebugTypeInfo::Primitive(prim) => { print!("{}PRIMITIVE: {}", pad, primitive_name(*prim)); - print!( - " (size: {} bytes, {} felts)", - prim.size_in_bytes(), - prim.size_in_felts() - ); + print!(" (size: {} bytes, {} felts)", prim.size_in_bytes(), prim.size_in_felts()); } DebugTypeInfo::Pointer { pointee_type_idx } => { if raw { @@ -317,12 +293,7 @@ fn print_type(ty: &DebugTypeInfo, debug_sections: &DebugSections, raw: bool, ind count, } => { if raw { - print!( - "{}ARRAY [{}; {:?}]", - pad, - element_type_idx.as_u32(), - count - ); + print!("{}ARRAY [{}; {:?}]", pad, element_type_idx.as_u32(), count); } else { print!("{}ARRAY [", pad); if let Some(elem) = debug_sections.get_type(*element_type_idx) { @@ -344,9 +315,7 @@ fn print_type(ty: &DebugTypeInfo, debug_sections: &DebugSections, raw: bool, ind let name = if raw { format!("str[{}]", name_idx) } else { - debug_sections - .get_type_string(*name_idx) - .unwrap_or_else(|| "".into()) + debug_sections.get_type_string(*name_idx).unwrap_or_else(|| "".into()) }; print!("{}STRUCT {} (size: {} bytes)", pad, name, size); if !fields.is_empty() { @@ -431,9 +400,7 @@ fn print_type_brief(ty: &DebugTypeInfo, debug_sections: &DebugSections) { DebugTypeInfo::Struct { name_idx, .. } => { print!( "struct {}", - debug_sections - .get_type_string(*name_idx) - .unwrap_or_else(|| "?".into()) + debug_sections.get_type_string(*name_idx).unwrap_or_else(|| "?".into()) ); } DebugTypeInfo::Function { .. } => print!("fn(...)"), @@ -539,10 +506,7 @@ fn print_function( .and_then(|f| debug_sections.get_source_string(f.path_idx)) .unwrap_or_else(|| "".into()) }; - println!( - " Location: {}:{}:{}", - file_path, func.line, func.column - ); + println!(" Location: {}:{}:{}", file_path, func.line, func.column); // Type if let Some(type_idx) = func.type_idx { @@ -576,10 +540,7 @@ fn print_function( // Inlined calls if !func.inlined_calls.is_empty() && verbose { - println!( - " Inlined calls ({}):", - func.inlined_calls.len() - ); + println!(" Inlined calls ({}):", func.inlined_calls.len()); for call in &func.inlined_calls { let callee = if raw { format!("func[{}]", call.callee_idx) From 5a372f01df778e998f4b1dc6df3bb1cb7377a6bf Mon Sep 17 00:00:00 2001 From: djole Date: Tue, 14 Apr 2026 11:09:57 +0200 Subject: [PATCH 15/32] fix: post-rebase cleanup for origin/next --- Cargo.lock | 142 +++++++----------- Cargo.toml | 18 ++- dialects/scf/src/transforms/cfg_to_scf.rs | 30 +--- ..._to_scf_debug_value_preservation_after.hir | 31 ++-- ...to_scf_debug_value_preservation_before.hir | 29 ++-- hir-macros/src/operation.rs | 1 + 6 files changed, 101 insertions(+), 150 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 731c453da..1624ff6f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2398,7 +2398,7 @@ dependencies = [ "miden-crypto", "miden-protocol", "miden-standards", - "miden-utils-sync 0.22.2", ++ "miden-utils-sync", "primitive-types", "regex", "serde", @@ -2411,44 +2411,33 @@ dependencies = [ name = "miden-air" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45551e1417cb2be47064c36fe6e1e69ab10ad7b4b55f0731d8cac109b7738b9" -dependencies = [ - "miden-core", - "miden-crypto", - "miden-utils-indexing 0.22.2", - "thiserror 2.0.18", - "tracing", -] - -[[package]] -name = "miden-air" -version = "0.22.3" -dependencies = [ - "miden-core", - "miden-crypto", - "miden-utils-indexing 0.22.3", + checksum = "d15646ebc95906b2a7cb66711d1e184f53fd6edc2605730bbcf0c2a129f792cf" + dependencies = [ + "miden-core", + "miden-crypto", + "miden-utils-indexing", "thiserror 2.0.18", "tracing", ] [[package]] name = "miden-assembly" -version = "0.22.3" +version = "0.22.1" dependencies = [ "env_logger", "log", "miden-assembly-syntax", "miden-core", "miden-mast-package", - "miden-package-registry 0.22.3", - "miden-project 0.22.3", + "miden-package-registry", + "miden-project", "smallvec", "thiserror 2.0.18", ] [[package]] name = "miden-assembly-syntax" -version = "0.22.3" +version = "0.22.1" dependencies = [ "aho-corasick", "env_logger", @@ -2556,7 +2545,7 @@ dependencies = [ [[package]] name = "miden-core" -version = "0.22.3" +version = "0.22.1" dependencies = [ "derive_more", "itertools 0.14.0", @@ -2564,8 +2553,8 @@ dependencies = [ "miden-debug-types", "miden-formatting", "miden-utils-core-derive", - "miden-utils-indexing 0.22.3", - "miden-utils-sync 0.22.3", + "miden-utils-indexing", + "miden-utils-sync", "num-derive", "num-traits", "proptest", @@ -2577,17 +2566,16 @@ dependencies = [ [[package]] name = "miden-core-lib" version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d2ea7e17c4382255c6e0cb1e4b90693449dcf5a286a844e2918af66b371c0ab" + source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "env_logger", "fs-err", "miden-assembly", "miden-core", "miden-crypto", - "miden-package-registry 0.22.2", + "miden-package-registry", "miden-processor", - "miden-utils-sync 0.22.2", + "miden-utils-sync", "thiserror 2.0.18", ] @@ -2725,14 +2713,14 @@ dependencies = [ [[package]] name = "miden-debug-types" -version = "0.22.3" +version = "0.22.1" dependencies = [ "memchr", "miden-crypto", "miden-formatting", "miden-miette", - "miden-utils-indexing 0.22.3", - "miden-utils-sync 0.22.3", + "miden-utils-indexing", + "miden-utils-sync", "paste", "serde", "serde_spanned 1.1.1", @@ -2870,7 +2858,7 @@ dependencies = [ [[package]] name = "miden-mast-package" -version = "0.22.3" +version = "0.22.1" dependencies = [ "derive_more", "miden-assembly-syntax", @@ -2954,42 +2942,28 @@ dependencies = [ [[package]] name = "miden-package-registry" -version = "0.22.2" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e07af92dc184a71132a34d89ad15e69633435bfd36fb5af4ce18b200bd1952e5" -dependencies = [ - "miden-assembly-syntax", - "miden-core", - "miden-mast-package", - "pubgrub", - "serde", - "smallvec", - "thiserror 2.0.18", -] - -[[package]] -name = "miden-package-registry" -version = "0.22.3" -dependencies = [ - "miden-assembly-syntax", - "miden-core", - "miden-mast-package", - "pubgrub", - "serde", - "smallvec", - "thiserror 2.0.18", -] + dependencies = [ + "miden-assembly-syntax", + "miden-core", + "miden-mast-package", + "pubgrub", + "serde", + "smallvec", + "thiserror 2.0.18", + ] [[package]] name = "miden-processor" -version = "0.22.3" +version = "0.22.1" dependencies = [ "itertools 0.14.0", - "miden-air 0.22.3", + "miden-air", "miden-core", "miden-debug-types", "miden-utils-diagnostics", - "miden-utils-indexing 0.22.3", + "miden-utils-indexing", "paste", "rayon", "thiserror 2.0.18", @@ -3011,20 +2985,6 @@ dependencies = [ "toml 1.1.2+spec-1.1.0", ] -[[package]] -name = "miden-project" -version = "0.22.3" -dependencies = [ - "miden-assembly-syntax", - "miden-core", - "miden-mast-package", - "miden-package-registry 0.22.3", - "serde", - "serde-untagged", - "thiserror 2.0.18", - "toml 1.1.2+spec-1.1.0", -] - [[package]] name = "miden-protocol" version = "0.14.5" @@ -3042,7 +3002,7 @@ dependencies = [ "miden-mast-package", "miden-processor", "miden-protocol-macros", - "miden-utils-sync 0.22.2", + "miden-utils-sync", "miden-verifier", "rand 0.9.4", "rand_chacha", @@ -3224,7 +3184,7 @@ dependencies = [ [[package]] name = "miden-utils-core-derive" -version = "0.22.3" +version = "0.22.1" dependencies = [ "proc-macro2", "quote", @@ -3233,7 +3193,7 @@ dependencies = [ [[package]] name = "miden-utils-diagnostics" -version = "0.22.3" +version = "0.22.1" dependencies = [ "miden-crypto", "miden-debug-types", @@ -3249,33 +3209,37 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5cc2e62161113179a370ae0bf1fd33eb8d20b6131e8559d2dc0bead5cffae586" dependencies = [ "miden-crypto", + "serde", "thiserror 2.0.18", ] [[package]] name = "miden-utils-indexing" -version = "0.22.3" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8834e76299686bcce3de1685158aa4cff49b7fa5e0e00a6cc811e8f2cf5775f" dependencies = [ "miden-crypto", - "serde", "thiserror 2.0.18", ] [[package]] -name = "miden-utils-sync" + name = "miden-utils-sync" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e9210b3592b577843710daf68293087c68b53d8482c82f6875ad83d578cb51e" -dependencies = [ - "lock_api", - "loom", - "once_cell", - "parking_lot", -] - -[[package]] -name = "miden-utils-sync" -version = "0.22.3" + dependencies = [ + "lock_api", + "loom", + "once_cell", + "parking_lot", + ] + + [[package]] + name = "miden-utils-sync" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a9e9747e9664c1a0997bb040ae291306ea0a1c74a572141ec66cec855c1b0e8" dependencies = [ "lock_api", "loom", diff --git a/Cargo.toml b/Cargo.toml index 51c48a1b5..bc992022e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,12 +163,18 @@ midenc-expect-test = { path = "tools/expect-test" } miden-field = { version = "^0.24" } [patch.crates-io] -miden-assembly = { path = "../miden-vm/crates/assembly" } -miden-assembly-syntax = { path = "../miden-vm/crates/assembly-syntax" } -miden-core = { path = "../miden-vm/core" } -miden-debug-types = { path = "../miden-vm/crates/debug-types" } -miden-mast-package = { path = "../miden-vm/crates/mast-package" } -miden-processor = { path = "../miden-vm/processor" } +miden-assembly = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-assembly-syntax = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-core = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-core-lib = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-debug-types = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-mast-package = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-package-registry = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-processor = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-project = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-utils-diagnostics = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-utils-indexing = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } +miden-utils-sync = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } # miden-protocol = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } # miden-standards = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } # miden-tx = { tag = "v0.14.0-beta.4", git = "https://github.com/0xMiden/miden-base" } diff --git a/dialects/scf/src/transforms/cfg_to_scf.rs b/dialects/scf/src/transforms/cfg_to_scf.rs index 0bd6834c4..4ada35735 100644 --- a/dialects/scf/src/transforms/cfg_to_scf.rs +++ b/dialects/scf/src/transforms/cfg_to_scf.rs @@ -845,22 +845,14 @@ mod tests { #[test] fn cfg_to_scf_debug_value_preservation() -> Result<(), Report> { use midenc_dialect_debuginfo::{DebugInfoDialect, DebugInfoOpBuilder}; - use midenc_hir::DILocalVariable; + use midenc_hir::{DILocalVariable, interner::Symbol}; - let context = Rc::new(Context::default()); - context.get_or_register_dialect::(); - let mut builder = OpBuilder::new(context.clone()); + let mut test = + Test::new("cfg_to_scf_debug_value_preservation", &[Type::U32], &[Type::U32]); + test.context().get_or_register_dialect::(); let span = SourceSpan::default(); - let function = { - let builder = builder.create::(span); - let name = Ident::new("test".into(), span); - let signature = Signature::new([AbiParam::new(Type::U32)], [AbiParam::new(Type::U32)]); - builder(name, signature).unwrap() - }; - - // Define function body - let mut builder = FunctionBuilder::new(function, &mut builder); + let mut builder = test.function_builder(); let if_is_zero = builder.create_block(); let if_is_nonzero = builder.create_block(); @@ -877,19 +869,16 @@ mod tests { let zero = builder.u32(0, span); let is_zero = builder.eq(input, zero, span)?; - // Track the input variable builder.builder_mut().debug_value(input, input_var.clone(), span)?; builder.cond_br(is_zero, if_is_zero, [], if_is_nonzero, [], span)?; builder.switch_to_block(if_is_zero); let a = builder.incr(input, span)?; - // Track result in then-branch builder.builder_mut().debug_value(a, result_var.clone(), span)?; builder.br(exit_block, [a], span)?; builder.switch_to_block(if_is_nonzero); let b = builder.mul(input, input, span)?; - // Track result in else-branch builder.builder_mut().debug_value(b, result_var.clone(), span)?; builder.br(exit_block, [b], span)?; @@ -899,19 +888,14 @@ mod tests { builder.builder_mut().debug_value(return_val, result_var.clone(), span)?; builder.ret(Some(return_val), span)?; - let operation = function.as_operation_ref(); + let operation = test.function().as_operation_ref(); - // Verify the input IR let input_ir = format!("{}", &operation.borrow()); expect_file!["expected/cfg_to_scf_debug_value_preservation_before.hir"] .assert_eq(&input_ir); - // Run transformation - let mut pm = pass::PassManager::on::(context, pass::Nesting::Implicit); - pm.add_pass(Box::new(LiftControlFlowToSCF)); - pm.run(operation)?; + test.apply_pass::(true)?; - // Verify that debug values survive with updated SSA operands let output = format!("{}", &operation.borrow()); expect_file!["expected/cfg_to_scf_debug_value_preservation_after.hir"].assert_eq(&output); diff --git a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir index 16da7f506..78268a0b0 100644 --- a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir +++ b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir @@ -1,19 +1,16 @@ -public builtin.function @test(v0: u32) -> u32 { -^block0(v0: u32): - v2 = arith.constant 0 : u32; - v3 = arith.eq v0, v2 : i1; - debuginfo.debug_value v0 #[expression = di.expression()] #[variable = di.local_variable(name = input, file = test.rs, line = 1, column = 1)]; - v8 = scf.if v3 : u32 { - ^block1: - v4 = arith.incr v0 : u32; - debuginfo.debug_value v4 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; - scf.yield v4; +builtin.function public extern("C") @cfg_to_scf_debug_value_preservation(%0: u32) -> u32 { + %2 = arith.constant 0 : u32; + %3 = arith.eq %0, %2; + "debuginfo.debug_value"(%0) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + %8 = scf.if %3 then { + %4 = arith.incr %0; + "debuginfo.debug_value"(%4) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + scf.yield %4 : (u32); } else { - ^block2: - v5 = arith.mul v0, v0 : u32 #[overflow = checked]; - debuginfo.debug_value v5 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; - scf.yield v5; - }; - debuginfo.debug_value v8 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; - builtin.ret v8; + %5 = arith.mul %0, %0 <{ overflow = #builtin.overflow }>; + "debuginfo.debug_value"(%5) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + scf.yield %5 : (u32); + } : (i1) -> (u32); + "debuginfo.debug_value"(%8) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + builtin.ret %8 : (u32); }; \ No newline at end of file diff --git a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir index 894e4e9dc..8b5c2c435 100644 --- a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir +++ b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir @@ -1,18 +1,17 @@ -public builtin.function @test(v0: u32) -> u32 { -^block0(v0: u32): - v2 = arith.constant 0 : u32; - v3 = arith.eq v0, v2 : i1; - debuginfo.debug_value v0 #[expression = di.expression()] #[variable = di.local_variable(name = input, file = test.rs, line = 1, column = 1)]; - cf.cond_br v3 ^block1, ^block2; +builtin.function public extern("C") @cfg_to_scf_debug_value_preservation(%0: u32) -> u32 { + %2 = arith.constant 0 : u32; + %3 = arith.eq %0, %2; + "debuginfo.debug_value"(%0) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + cf.cond_br %3 ^block1, ^block2 : (i1); ^block1: - v4 = arith.incr v0 : u32; - debuginfo.debug_value v4 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; - cf.br ^block3(v4); + %4 = arith.incr %0; + "debuginfo.debug_value"(%4) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + cf.br ^block3:(%4); ^block2: - v5 = arith.mul v0, v0 : u32 #[overflow = checked]; - debuginfo.debug_value v5 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; - cf.br ^block3(v5); -^block3(v1: u32): - debuginfo.debug_value v1 #[expression = di.expression()] #[variable = di.local_variable(name = result, file = test.rs, line = 2, column = 1)]; - builtin.ret v1; + %5 = arith.mul %0, %0 <{ overflow = #builtin.overflow }>; + "debuginfo.debug_value"(%5) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + cf.br ^block3:(%5); +^block3(%1: u32): + "debuginfo.debug_value"(%1) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + builtin.ret %1 : (u32); }; \ No newline at end of file diff --git a/hir-macros/src/operation.rs b/hir-macros/src/operation.rs index 2e0a35d7b..ae9ba14f5 100644 --- a/hir-macros/src/operation.rs +++ b/hir-macros/src/operation.rs @@ -471,6 +471,7 @@ impl quote::ToTokens for WithAttrs<'_> { for param in self.0.op_builder_impl.create_params.iter() { if let OpCreateParamType::Attr(OpAttribute { name, ty, .. }) = ¶m.param_ty { let span = name.span(); + let field_name = syn::Lit::Str(syn::LitStr::new(&format!("{name}"), span)); tokens.extend(quote_spanned! { span => op_builder.with_property::<#ty, _>(#field_name, #name)?; }); From 1e8f2afa5ea3272407adc9ff0c10080589620ae3 Mon Sep 17 00:00:00 2001 From: djole Date: Tue, 14 Apr 2026 11:19:30 +0200 Subject: [PATCH 16/32] fix: register DebugValue as no-op in HIR evaluator --- Cargo.lock | 1 + dialects/scf/src/transforms/cfg_to_scf.rs | 3 +-- eval/Cargo.toml | 1 + eval/src/eval.rs | 8 ++++++++ eval/src/lib.rs | 8 ++++++++ 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1624ff6f7..1e8898d9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3524,6 +3524,7 @@ dependencies = [ "miden-thiserror", "midenc-dialect-arith", "midenc-dialect-cf", + "midenc-dialect-debuginfo", "midenc-dialect-hir", "midenc-dialect-scf", "midenc-dialect-ub", diff --git a/dialects/scf/src/transforms/cfg_to_scf.rs b/dialects/scf/src/transforms/cfg_to_scf.rs index 4ada35735..26b4180e9 100644 --- a/dialects/scf/src/transforms/cfg_to_scf.rs +++ b/dialects/scf/src/transforms/cfg_to_scf.rs @@ -847,8 +847,7 @@ mod tests { use midenc_dialect_debuginfo::{DebugInfoDialect, DebugInfoOpBuilder}; use midenc_hir::{DILocalVariable, interner::Symbol}; - let mut test = - Test::new("cfg_to_scf_debug_value_preservation", &[Type::U32], &[Type::U32]); + let mut test = Test::new("cfg_to_scf_debug_value_preservation", &[Type::U32], &[Type::U32]); test.context().get_or_register_dialect::(); let span = SourceSpan::default(); diff --git a/eval/Cargo.toml b/eval/Cargo.toml index 9f3b7b231..c65b69cb6 100644 --- a/eval/Cargo.toml +++ b/eval/Cargo.toml @@ -22,6 +22,7 @@ log.workspace = true miden-core.workspace = true midenc-dialect-arith.workspace = true midenc-dialect-cf.workspace = true +midenc-dialect-debuginfo.workspace = true midenc-dialect-scf.workspace = true midenc-dialect-hir.workspace = true midenc-dialect-ub.workspace = true diff --git a/eval/src/eval.rs b/eval/src/eval.rs index 44e10cc21..38aceaaa8 100644 --- a/eval/src/eval.rs +++ b/eval/src/eval.rs @@ -5,6 +5,7 @@ use alloc::{ use midenc_dialect_arith as arith; use midenc_dialect_cf as cf; +use midenc_dialect_debuginfo as debuginfo; use midenc_dialect_hir as hir; use midenc_dialect_scf as scf; use midenc_dialect_ub as ub; @@ -106,6 +107,13 @@ impl Eval for ub::Unreachable { } } +// Debug info operations are purely observational and have no runtime semantics. +impl Eval for debuginfo::DebugValue { + fn eval(&self, _evaluator: &mut HirEvaluator) -> Result { + Ok(ControlFlowEffect::None) + } +} + impl Eval for ub::Poison { fn eval(&self, evaluator: &mut HirEvaluator) -> Result { let value = match self.value().as_immediate() { diff --git a/eval/src/lib.rs b/eval/src/lib.rs index 03e37cdd0..6f242de64 100644 --- a/eval/src/lib.rs +++ b/eval/src/lib.rs @@ -15,6 +15,7 @@ mod value; use midenc_dialect_arith as arith; use midenc_dialect_cf as cf; +use midenc_dialect_debuginfo as debuginfo; use midenc_dialect_hir as hir; use midenc_dialect_scf as scf; use midenc_dialect_ub as ub; @@ -48,6 +49,9 @@ inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( eval_wasm_dialect )); +inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( + eval_debuginfo_dialect +)); fn eval_builtin_dialect(info: &mut ::midenc_hir::DialectInfo) { info.register_operation_trait::(); @@ -154,3 +158,7 @@ fn eval_wasm_dialect(info: &mut ::midenc_hir::DialectInfo) { info.register_operation_trait::(); info.register_operation_trait::(); } + +fn eval_debuginfo_dialect(info: &mut ::midenc_hir::DialectInfo) { + info.register_operation_trait::(); +} From f19ef3bafd06c9e3a5dd3a6f1b94c0fbc4e72b45 Mon Sep 17 00:00:00 2001 From: djole Date: Tue, 14 Apr 2026 11:24:50 +0200 Subject: [PATCH 17/32] fix: skip debug info ops in liveness analysis --- hir-analysis/src/analyses/liveness.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/hir-analysis/src/analyses/liveness.rs b/hir-analysis/src/analyses/liveness.rs index 31baea14e..c328ed314 100644 --- a/hir-analysis/src/analyses/liveness.rs +++ b/hir-analysis/src/analyses/liveness.rs @@ -22,6 +22,14 @@ use crate::{ /// The distance penalty applied to an edge which exits a loop pub const LOOP_EXIT_DISTANCE: u32 = 100_000; +/// Returns `true` if the operation belongs to the debuginfo dialect. +/// +/// Debug info ops (debuginfo.debug_value, etc.) are purely observational — their +/// operands are not real uses and must not keep values alive. +fn is_debug_info_op(op: &Operation) -> bool { + op.name().dialect().as_str() == "debuginfo" +} + /// This analysis computes what values are live, and the distance to next use, for all program /// points in the given operation. It computes both live-in and live-out sets, in order to answer /// liveness questions about the state of the program at an operation, as well as questions about @@ -360,9 +368,15 @@ impl DenseBackwardDataFlowAnalysis for Liveness { temp_live_in.remove(result); } - // Set the next-use distance of any operands to 0 - for operand in op.operands().all().iter() { - temp_live_in.insert(operand.borrow().as_value_ref(), 0); + // Set the next-use distance of any operands to 0. + // Skip debug info ops: their operands are observational metadata and must + // not keep values alive, otherwise scf.if branches can end up with + // mismatched operand-stack sizes when one branch has a real use and the + // other only a debug use. + if !is_debug_info_op(op) { + for operand in op.operands().all().iter() { + temp_live_in.insert(operand.borrow().as_value_ref(), 0); + } } // Determine if the state has changed, if so, then overwrite `live_in` with what we've From 920db856ace225f82fce54dd9de7798cc42531f9 Mon Sep 17 00:00:00 2001 From: djole Date: Tue, 14 Apr 2026 11:54:05 +0200 Subject: [PATCH 18/32] fix: skip location schedule emission for undefined variables --- .../wasm/src/module/function_builder_ext.rs | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index 948bad457..80bdb3e82 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -137,6 +137,11 @@ pub struct FunctionBuilderExt<'c, B: ?Sized + Builder> { debug_info: Option>>, param_values: Vec<(Variable, ValueRef)>, param_dbg_emitted: bool, + /// Set of variables that have been defined via def_var. Used by + /// apply_location_schedule to avoid calling try_use_var on undefined + /// variables, which would insert block parameters as a side effect and + /// corrupt the CFG. + defined_vars: alloc::collections::BTreeSet, } impl<'c> FunctionBuilderExt<'c, OpBuilder> { @@ -152,6 +157,7 @@ impl<'c> FunctionBuilderExt<'c, OpBuilder> { debug_info: None, param_values: Vec::new(), param_dbg_emitted: false, + defined_vars: alloc::collections::BTreeSet::new(), } } } @@ -271,11 +277,19 @@ impl FunctionBuilderExt<'_, B> { return; } + // Only emit debug values for variables that have already been defined. + // Calling try_use_var on an undefined variable would insert block + // parameters (phis) as a side effect, corrupting the CFG. + let is_frame_base = matches!(&entry.storage, VariableStorage::FrameBase { .. }); + if !is_frame_base && !self.defined_vars.contains(&(entry.var_index as u32)) { + return; + } + let var = Variable::new(entry.var_index); let value = match self.try_use_var(var) { Ok(v) => v, Err(_) => { - if matches!(&entry.storage, VariableStorage::FrameBase { .. }) { + if is_frame_base { // FrameBase-only variables have no WASM local, so no SSA value // exists for them. The debuginfo.value op requires an SSA operand, // so we attach an existing parameter value as an anchor. The MASM @@ -571,12 +585,16 @@ impl FunctionBuilderExt<'_, B> { /// an error if the value supplied does not match the type the variable was /// declared to have. pub fn try_def_var(&mut self, var: Variable, val: ValueRef) -> Result<(), DefVariableError> { - let mut func_ctx = self.func_ctx.borrow_mut(); - let var_ty = func_ctx.types.get(var).ok_or(DefVariableError::DefinedBeforeDeclared(var))?; - if var_ty != val.borrow().ty() { - return Err(DefVariableError::TypeMismatch(var, val)); + { + let mut func_ctx = self.func_ctx.borrow_mut(); + let var_ty = + func_ctx.types.get(var).ok_or(DefVariableError::DefinedBeforeDeclared(var))?; + if var_ty != val.borrow().ty() { + return Err(DefVariableError::TypeMismatch(var, val)); + } + func_ctx.ssa.def_var(var, val, self.inner.current_block()); } - func_ctx.ssa.def_var(var, val, self.inner.current_block()); + self.defined_vars.insert(var.index() as u32); Ok(()) } From 577eb6f2117a747d5091334fd7e5988cd847fcb8 Mon Sep 17 00:00:00 2001 From: djole Date: Tue, 14 Apr 2026 16:27:24 +0200 Subject: [PATCH 19/32] fix: clippy for debug_info.rs --- frontend/wasm/src/module/debug_info.rs | 31 +++++++++++++------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs index af5982220..12127809a 100644 --- a/frontend/wasm/src/module/debug_info.rs +++ b/frontend/wasm/src/module/debug_info.rs @@ -591,10 +591,10 @@ fn resolve_subprogram_target>( frame_base_global, }; - if let Some(ref name) = maybe_name { - if let Some(&func_index) = func_by_name.get(name) { - return Some(make_info(func_index, low_pc.unwrap_or_default(), high_pc)); - } + if let Some(ref name) = maybe_name + && let Some(&func_index) = func_by_name.get(name) + { + return Some(make_info(func_index, low_pc.unwrap_or_default(), high_pc)); } if let Some(base) = low_pc @@ -605,6 +605,7 @@ fn resolve_subprogram_target>( None } +#[allow(clippy::too_many_arguments)] fn collect_subprogram_variables>( dwarf: &gimli::Dwarf, unit: &gimli::Unit, @@ -637,6 +638,7 @@ fn collect_subprogram_variables>( Ok(()) } +#[allow(clippy::too_many_arguments)] fn walk_variable_nodes>( dwarf: &gimli::Dwarf, unit: &gimli::Unit, @@ -707,6 +709,7 @@ fn walk_variable_nodes>( Ok(()) } +#[allow(clippy::too_many_arguments)] fn decode_variable_entry>( dwarf: &gimli::Dwarf, unit: &gimli::Unit, @@ -808,19 +811,17 @@ fn decode_variable_entry>( let storage_expr = entry.data; if let Some(storage) = decode_storage_from_expression(&storage_expr, unit, frame_base_global)? + && (storage.as_local().is_some() + || matches!(&storage, VariableStorage::FrameBase { .. })) { - if storage.as_local().is_some() - || matches!(&storage, VariableStorage::FrameBase { .. }) - { - if matches!(&storage, VariableStorage::FrameBase { .. }) { - has_frame_base = true; - } - locations.push(LocationDescriptor { - start: entry.range.begin, - end: Some(entry.range.end), - storage, - }); + if matches!(&storage, VariableStorage::FrameBase { .. }) { + has_frame_base = true; } + locations.push(LocationDescriptor { + start: entry.range.begin, + end: Some(entry.range.end), + storage, + }); } } if locations.is_empty() { From 9ed7a2e4bd4340e732ca009e1b77712781731bdd Mon Sep 17 00:00:00 2001 From: djole Date: Tue, 14 Apr 2026 22:03:55 +0200 Subject: [PATCH 20/32] fix: preserve DWARF frame-base variables --- codegen/masm/src/lower/component.rs | 35 +++++++++---- frontend/wasm/src/module/debug_info.rs | 50 ++++++++++++------- .../wasm/src/module/function_builder_ext.rs | 10 +--- hir/src/attributes/debug.rs | 41 ++++++++++++++- hir/src/lib.rs | 3 +- midenc-compile/src/debug_info.rs | 2 +- tools/debugdump/src/main.rs | 28 ++++++++++- 7 files changed, 125 insertions(+), 44 deletions(-) diff --git a/codegen/masm/src/lower/component.rs b/codegen/masm/src/lower/component.rs index b5b704727..4817cf8b2 100644 --- a/codegen/masm/src/lower/component.rs +++ b/codegen/masm/src/lower/component.rs @@ -5,7 +5,8 @@ use miden_assembly_syntax::{ast::Attribute, parser::WordValue}; use miden_core::operations::DebugVarLocation; use midenc_hir::{ FunctionIdent, Op, OpExt, SourceSpan, Span, Symbol, TraceTarget, ValueRef, - diagnostics::IntoDiagnostic, dialects::builtin, pass::AnalysisManager, + decode_frame_base_local_index, diagnostics::IntoDiagnostic, dialects::builtin, + encode_frame_base_local_offset, pass::AnalysisManager, }; use midenc_hir_analysis::analyses::LivenessAnalysis; use midenc_session::{ @@ -726,17 +727,29 @@ fn patch_debug_var_locals_in_block( // Convert raw WASM local index to FMP offset let fmp_offset = *idx - (aligned_num_locals as i16); info.set_value_location(DebugVarLocation::Local(fmp_offset)); - } else if let DebugVarLocation::FrameBase { byte_offset, .. } = - info.value_location() + } else if let DebugVarLocation::FrameBase { + global_index, + byte_offset, + } = info.value_location() { - // Resolve FrameBase: replace WASM global index with - // the Miden memory address of the stack pointer global. - if let Some(resolved_addr) = stack_pointer_addr { - let byte_offset = *byte_offset; - info.set_value_location(DebugVarLocation::FrameBase { - global_index: resolved_addr, - byte_offset, - }); + let byte_offset = *byte_offset; + if let Some(local_index) = decode_frame_base_local_index(*global_index) { + if let Ok(local_index) = i16::try_from(local_index) { + let local_offset = local_index - (aligned_num_locals as i16); + info.set_value_location(DebugVarLocation::FrameBase { + global_index: encode_frame_base_local_offset(local_offset), + byte_offset, + }); + } + } else { + // Resolve FrameBase: replace WASM global index with + // the Miden memory address of the stack pointer global. + if let Some(resolved_addr) = stack_pointer_addr { + info.set_value_location(DebugVarLocation::FrameBase { + global_index: resolved_addr, + byte_offset, + }); + } } } } diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs index 12127809a..1d9c0e43e 100644 --- a/frontend/wasm/src/module/debug_info.rs +++ b/frontend/wasm/src/module/debug_info.rs @@ -8,7 +8,7 @@ use gimli::{self, AttributeValue, read::Operation}; use log::debug; use midenc_hir::{ DICompileUnit, DIExpression, DIExpressionOp, DILocalVariable, DISubprogram, FxHashMap, - SourceSpan, interner::Symbol, + SourceSpan, encode_frame_base_local_index, interner::Symbol, }; use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic}; @@ -34,7 +34,11 @@ pub enum VariableStorage { Global(u32), Stack(u32), ConstU64(u64), - /// Frame base (global index) + byte offset — from DW_OP_fbreg + /// Frame base + byte offset — from DW_OP_fbreg. + /// + /// For Wasm-global frame bases, `global_index` is the Wasm global index. + /// For Wasm-local frame bases, it is encoded with + /// `encode_frame_base_local_index`. FrameBase { global_index: u32, byte_offset: i64, @@ -260,6 +264,8 @@ fn build_local_debug_info( let total = param_count + local_count; let mut locals = vec![None; total]; + let has_dwarf_locals = dwarf_locals.is_some_and(|locals| !locals.is_empty()) + || frame_base_vars.is_some_and(|locals| !locals.is_empty()); for (param_idx, wasm_ty) in wasm_signature.params().iter().enumerate() { let index_u32 = param_idx as u32; @@ -274,7 +280,7 @@ fn build_local_debug_info( } let mut attr = DILocalVariable::new(name_symbol, subprogram.file, subprogram.line, subprogram.column); - attr.arg_index = Some((param_idx + 1) as u32); + attr.arg_index = Some(param_idx as u32); if let Ok(ty) = ir_type(*wasm_ty, diagnostics) { attr.ty = Some(ty); } @@ -311,9 +317,14 @@ fn build_local_debug_info( for _ in 0..count { let index_u32 = next_local_index as u32; let dwarf_entry = dwarf_locals.and_then(|map| map.get(&index_u32)); - let mut name_symbol = module - .local_name(func_index, index_u32) - .unwrap_or_else(|| Symbol::intern(format!("local{next_local_index}"))); + let local_name = module.local_name(func_index, index_u32); + if has_dwarf_locals && dwarf_entry.is_none() && local_name.is_none() { + next_local_index += 1; + continue; + } + + let mut name_symbol = + local_name.unwrap_or_else(|| Symbol::intern(format!("local{next_local_index}"))); if let Some(info) = dwarf_entry && let Some(symbol) = info.name { @@ -512,8 +523,9 @@ struct SubprogramInfo { func_index: FuncIndex, low_pc: u64, high_pc: Option, - /// The WASM global index used as the frame base (from DW_AT_frame_base). - /// Typically global 0 (__stack_pointer). + /// The encoded WASM location used as the frame base (from DW_AT_frame_base). + /// Plain values are Wasm globals; values encoded with + /// `encode_frame_base_local_index` are Wasm locals. frame_base_global: Option, } @@ -562,20 +574,20 @@ fn resolve_subprogram_target>( _ => {} }, gimli::DW_AT_frame_base => { - // Decode the frame base expression to find which WASM global - // provides the base address (typically __stack_pointer = global 0). - // Only WASM globals are supported — downstream FrameBase resolution - // assumes the index refers to a global in the linker's layout. + // Decode the frame base expression. Rust-generated Wasm commonly + // uses a generated Wasm local as the frame pointer; globals are + // still supported for producers that use __stack_pointer directly. if let AttributeValue::Exprloc(expr) = attr.value() { let mut ops = expr.operations(unit.encoding()); while let Ok(Some(op)) = ops.next() { - if let Operation::WasmLocal { .. } = op { - debug!( - "DW_AT_frame_base uses WASM local; only globals are supported — \ - ignoring" - ); - } else if let Operation::WasmGlobal { index } = op { - frame_base_global = Some(index); + match op { + Operation::WasmLocal { index } => { + frame_base_global = encode_frame_base_local_index(index); + } + Operation::WasmGlobal { index } => { + frame_base_global = Some(index); + } + _ => {} } } } diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index 80bdb3e82..4ed2a696a 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -327,18 +327,10 @@ impl FunctionBuilderExt<'_, B> { let info = info.borrow(); info.local_attr(idx).cloned() }; - let Some(mut attr) = attr_opt else { + let Some(attr) = attr_opt else { return; }; - if let Some((file_symbol, _directory, line, column)) = self.span_to_location(span) { - attr.file = file_symbol; - if line != 0 { - attr.line = line; - } - attr.column = column; - } - if let Err(err) = DebugInfoOpBuilder::builder_mut(self) .debug_value_with_expr(value, attr, expression, span) { diff --git a/hir/src/attributes/debug.rs b/hir/src/attributes/debug.rs index 7e3421eb9..694083775 100644 --- a/hir/src/attributes/debug.rs +++ b/hir/src/attributes/debug.rs @@ -250,6 +250,39 @@ pub enum DIExpressionOp { Unsupported(Symbol), } +/// High-bit marker used to carry a Wasm-local frame base through the existing +/// `FrameBase { global_index, byte_offset }` debug-location shape without +/// changing the VM-facing `DebugVarLocation` ABI. +/// +/// Before MASM lowering completes, the low bits hold a raw Wasm local index. +/// After local patching, the low 16 bits hold the signed FMP-relative offset of +/// the Miden local containing the frame-base byte address. +pub const FRAME_BASE_LOCAL_MARKER: u32 = 1 << 31; + +pub fn encode_frame_base_local_index(local_index: u32) -> Option { + if local_index < FRAME_BASE_LOCAL_MARKER { + Some(FRAME_BASE_LOCAL_MARKER | local_index) + } else { + None + } +} + +pub fn decode_frame_base_local_index(encoded: u32) -> Option { + (encoded & FRAME_BASE_LOCAL_MARKER != 0).then_some(encoded & !FRAME_BASE_LOCAL_MARKER) +} + +pub fn encode_frame_base_local_offset(local_offset: i16) -> u32 { + FRAME_BASE_LOCAL_MARKER | u16::from_le_bytes(local_offset.to_le_bytes()) as u32 +} + +pub fn decode_frame_base_local_offset(encoded: u32) -> Option { + if encoded & FRAME_BASE_LOCAL_MARKER == 0 { + return None; + } + let low_bits = (encoded & 0xffff) as u16; + Some(i16::from_le_bytes(low_bits.to_le_bytes())) +} + /// Represents a DWARF expression that describes how to compute or locate a variable's value #[derive(DialectAttribute, Clone, Debug, Default, PartialEq, Eq, Hash)] #[attribute(dialect = BuiltinDialect, implements(AttrPrinter))] @@ -308,7 +341,13 @@ impl PrettyPrint for DIExpression { DIExpressionOp::FrameBase { global_index, byte_offset, - } => text(format!("DW_OP_fbreg global[{}]{:+}", global_index, byte_offset)), + } => { + if let Some(local_index) = decode_frame_base_local_index(*global_index) { + text(format!("DW_OP_fbreg local[{}]{:+}", local_index, byte_offset)) + } else { + text(format!("DW_OP_fbreg global[{}]{:+}", global_index, byte_offset)) + } + } DIExpressionOp::Unsupported(name) => text(name.as_str()), }; } diff --git a/hir/src/lib.rs b/hir/src/lib.rs index a440ba7cc..d9d8d2789 100644 --- a/hir/src/lib.rs +++ b/hir/src/lib.rs @@ -85,7 +85,8 @@ pub use self::{ Attribute, AttributeName, AttributeRef, AttributeRegistration, AttributeValue, DICompileUnit, DICompileUnitAttr, DIExpression, DIExpressionAttr, DIExpressionOp, DILocalVariable, DILocalVariableAttr, DISubprogram, DISubprogramAttr, NamedAttribute, - NamedAttributeList, + NamedAttributeList, decode_frame_base_local_index, decode_frame_base_local_offset, + encode_frame_base_local_index, encode_frame_base_local_offset, }, dialects::builtin::attributes::{Location, Overflow, Visibility, version}, direction::{Backward, Direction, Forward}, diff --git a/midenc-compile/src/debug_info.rs b/midenc-compile/src/debug_info.rs index 688f2a2a2..6888019db 100644 --- a/midenc-compile/src/debug_info.rs +++ b/midenc-compile/src/debug_info.rs @@ -244,7 +244,7 @@ impl DebugInfoBuilder { let mut var_info = DebugVariableInfo::new(name_idx, type_idx, line, column); if let Some(arg_index) = var.arg_index { - var_info = var_info.with_arg_index(arg_index); + var_info = var_info.with_arg_index(arg_index + 1); } Some(var_info) diff --git a/tools/debugdump/src/main.rs b/tools/debugdump/src/main.rs index 2ffc3574e..23dc7449f 100644 --- a/tools/debugdump/src/main.rs +++ b/tools/debugdump/src/main.rs @@ -14,7 +14,7 @@ use std::{ use clap::{Parser, ValueEnum}; use miden_core::{ mast::MastForest, - operations::DebugVarInfo, + operations::{DebugVarInfo, DebugVarLocation}, serde::{Deserializable, SliceReader}, }; use miden_mast_package::{ @@ -35,6 +35,30 @@ enum Error { NoDebugInfo, } +const FRAME_BASE_LOCAL_MARKER: u32 = 1 << 31; + +fn decode_frame_base_local_offset(encoded: u32) -> Option { + if encoded & FRAME_BASE_LOCAL_MARKER == 0 { + return None; + } + + let low_bits = (encoded & 0xffff) as u16; + Some(i16::from_le_bytes(low_bits.to_le_bytes())) +} + +fn format_debug_var_location(location: &DebugVarLocation) -> String { + if let DebugVarLocation::FrameBase { + global_index, + byte_offset, + } = location + && let Some(offset) = decode_frame_base_local_offset(*global_index) + { + format!("frame_base(FMP{offset:+}){byte_offset:+}") + } else { + location.to_string() + } +} + /// Holds the three debug info sections with helper accessors. struct DebugSections { types: DebugTypesSection, @@ -662,7 +686,7 @@ fn print_locations(mast_forest: &MastForest, debug_sections: &DebugSections, ver print!(" [var#{}] ", var_idx); // Print value location - print!("{}", info.value_location()); + print!("{}", format_debug_var_location(info.value_location())); // Print argument info if present if let Some(arg_idx) = info.arg_index() { From 615fee1695562caf56421d52dfbe41b302f5049e Mon Sep 17 00:00:00 2001 From: djole Date: Tue, 28 Apr 2026 17:54:08 +0200 Subject: [PATCH 21/32] fix: run debugdump tests via litcheck --- Makefile.toml | 23 ++++++--- tests/lit/debugdump/lit.suite.toml | 6 +++ tests/lit/debugdump/locations-source-loc.wat | 8 +-- tests/lit/debugdump/locations.wat | 2 +- tests/lit/debugdump/simple.wat | 2 +- tests/lit/debugdump/summary.wat | 2 +- tests/lit/lit.cfg.py | 51 -------------------- tests/lit/lit.suite.toml | 1 + 8 files changed, 31 insertions(+), 64 deletions(-) delete mode 100644 tests/lit/lit.cfg.py diff --git a/Makefile.toml b/Makefile.toml index b5c355d15..15afa53e6 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -235,6 +235,20 @@ args = [ "${MIDENC_BIN_DIR}", ] +[tasks.miden-debugdump] +category = "Build" +description = "Builds miden-debugdump and installs it to the bin folder" +command = "cargo" +args = [ + "-Z", + "unstable-options", + "build", + "-p", + "miden-debugdump", + "--artifact-dir", + "${MIDENC_BIN_DIR}", +] + [tasks.miden-objtool] category = "Build" description = "Builds miden-objtool and installs it to the bin folder" @@ -442,12 +456,9 @@ args = [ "--verbose", "--path", "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/bin", - "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/parse", - "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/wasm-translation", - "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/source-location", - "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit/debugdump", + "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit", ] -dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt"] +dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt", "miden-debugdump"] [tasks.lit] category = "Test" @@ -458,7 +469,7 @@ args = [ "lit", "${@}", ] -dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt"] +dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt", "miden-debugdump"] [tasks.litcheck] diff --git a/tests/lit/debugdump/lit.suite.toml b/tests/lit/debugdump/lit.suite.toml index 162db014a..a0ec4334b 100644 --- a/tests/lit/debugdump/lit.suite.toml +++ b/tests/lit/debugdump/lit.suite.toml @@ -2,4 +2,10 @@ name = "debugdump" patterns = ["*.wat"] working_dir = "../../../" +[substitutions] +"midenc" = "$$MIDENC_BIN_DIR/midenc" +"miden-debugdump" = "$$MIDENC_BIN_DIR/miden-debugdump" +"%cargo" = "cargo +$$CARGO_MAKE_RUSTUP_TOOLCHAIN_NAME" +"%target_dir" = "$$CARGO_TARGET_DIR" + [format.shtest] diff --git a/tests/lit/debugdump/locations-source-loc.wat b/tests/lit/debugdump/locations-source-loc.wat index 5d95d7056..6df3ceeb8 100644 --- a/tests/lit/debugdump/locations-source-loc.wat +++ b/tests/lit/debugdump/locations-source-loc.wat @@ -1,8 +1,8 @@ ;; Test that .debug_loc section shows DebugVar entries with source locations ;; from a real Rust project compiled with debug info. ;; -;; RUN: cargo build --release --target wasm32-unknown-unknown --manifest-path tests/lit/source-location/test-project/Cargo.toml 2>&1 -;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc 'tests/lit/source-location/test-project/target/wasm32-unknown-unknown/release/source_location_test.wasm' --lib --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s +;; RUN: %cargo build --target-dir %target_dir/debugdump-source-location --release --target wasm32-unknown-unknown --manifest-path tests/lit/source-location/test-project/Cargo.toml 2>&1 +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%target_dir/debugdump-source-location/wasm32-unknown-unknown/release/source_location_test.wasm' --lib --debug full -o \"\$TMPFILE\" && miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s ;; Check header ;; CHECK: .debug_loc contents (DebugVar entries from MAST): @@ -12,7 +12,7 @@ ;; Check variable "arg0" - parameter from test_assertion function ;; CHECK: Variable: "arg0" ;; CHECK: 1 location entries: -;; CHECK: FMP-4 (param #2) +;; CHECK: FMP-4 (param #1) ;; Check variable "local3" - from panic handler ;; CHECK: Variable: "local3" @@ -22,4 +22,4 @@ ;; Check variable "x" - parameter from entrypoint function ;; CHECK: Variable: "x" ;; CHECK: 2 location entries: -;; CHECK: FMP-4 (param #2) +;; CHECK: FMP-4 (param #1) diff --git a/tests/lit/debugdump/locations.wat b/tests/lit/debugdump/locations.wat index 3b05ff93c..3ba06eb57 100644 --- a/tests/lit/debugdump/locations.wat +++ b/tests/lit/debugdump/locations.wat @@ -1,5 +1,5 @@ ;; Test that .debug_loc section is present and handles empty case -;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc '%s' --exe --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s ;; Check header for .debug_loc section ;; CHECK: .debug_loc contents (DebugVar entries from MAST): diff --git a/tests/lit/debugdump/simple.wat b/tests/lit/debugdump/simple.wat index 62426740c..0d2d9903a 100644 --- a/tests/lit/debugdump/simple.wat +++ b/tests/lit/debugdump/simple.wat @@ -1,5 +1,5 @@ ;; Test that miden-debugdump correctly parses and displays debug info from a .masp file -;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc '%s' --exe --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\"" | filecheck %s +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-debugdump \"\$TMPFILE\"" | filecheck %s ;; Check header ;; CHECK: DEBUG INFO DUMP: diff --git a/tests/lit/debugdump/summary.wat b/tests/lit/debugdump/summary.wat index ae73b73dc..8498a9778 100644 --- a/tests/lit/debugdump/summary.wat +++ b/tests/lit/debugdump/summary.wat @@ -1,5 +1,5 @@ ;; Test that miden-debugdump --summary shows only summary output -;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && bin/midenc '%s' --exe --debug full -o \"\$TMPFILE\" && target/debug/miden-debugdump \"\$TMPFILE\" --summary" | filecheck %s +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-debugdump \"\$TMPFILE\" --summary" | filecheck %s ;; Check summary is present ;; CHECK: .debug_info summary: diff --git a/tests/lit/lit.cfg.py b/tests/lit/lit.cfg.py deleted file mode 100644 index 75043fcf0..000000000 --- a/tests/lit/lit.cfg.py +++ /dev/null @@ -1,51 +0,0 @@ -import os -import sys -import shlex - -from lit.formats import ShTest -import lit.util - -config.name = "miden-lit" -config.test_format = ShTest() -config.suffixes = [".shtest", ".hir", ".wat"] - -source_root = os.path.dirname(__file__) -repo_root = os.path.abspath(os.path.join(source_root, os.pardir, os.pardir)) -config.test_source_root = source_root -config.test_exec_root = repo_root -bin_dir = os.path.join(repo_root, "bin") -config.environment["PATH"] = bin_dir + os.pathsep + config.environment.get("PATH", "") -# Use cargo run to ensure proper runtime environment -# Redirect cargo's stderr to suppress build warnings, but keep midenc's stderr -midenc_cmd = f"cargo run --manifest-path {shlex.quote(os.path.join(repo_root, 'Cargo.toml'))} --bin midenc 2>/dev/null --" -config.substitutions.append(("%midenc", midenc_cmd)) - -# Try to find FileCheck in common locations -filecheck = ( - lit.util.which("FileCheck") - or lit.util.which("filecheck") - or lit.util.which("llvm-filecheck") -) - -# Check homebrew LLVM locations if not found -if not filecheck: - homebrew_paths = [ - "/opt/homebrew/opt/llvm@20/bin/FileCheck", - "/opt/homebrew/opt/llvm/bin/FileCheck", - "/usr/local/opt/llvm/bin/FileCheck", - ] - for path in homebrew_paths: - if os.path.exists(path): - filecheck = path - break - -# Fall back to simple_filecheck.py only if system FileCheck not found -if not filecheck: - script = os.path.join(source_root, 'tools', 'simple_filecheck.py') - filecheck = f"{shlex.quote(sys.executable)} {shlex.quote(script)}" - -config.substitutions.append(("%filecheck", filecheck)) - -config.substitutions.append(("%S", source_root)) - -config.environment.setdefault("RUSTFLAGS", "") diff --git a/tests/lit/lit.suite.toml b/tests/lit/lit.suite.toml index 826f0e25a..e8b52875d 100644 --- a/tests/lit/lit.suite.toml +++ b/tests/lit/lit.suite.toml @@ -4,6 +4,7 @@ patterns = ["*.wat", "*.masm", "*.stderr"] [substitutions] "midenc" = "$$MIDENC_BIN_DIR/midenc" "hir-opt" = "$$MIDENC_BIN_DIR/hir-opt" +"miden-debugdump" = "$$MIDENC_BIN_DIR/miden-debugdump" "%cargo" = "cargo +$$CARGO_MAKE_RUSTUP_TOOLCHAIN_NAME" "%target_dir" = "$$CARGO_TARGET_DIR" From 206e2834c19345bc45b4744c501ee04e2ca7243f Mon Sep 17 00:00:00 2001 From: djole Date: Wed, 29 Apr 2026 09:39:45 +0200 Subject: [PATCH 22/32] fix: use released miden vm debug APIs --- Cargo.lock | 104 ++++++++++++++++++++++++++--------------------------- Cargo.toml | 17 --------- 2 files changed, 51 insertions(+), 70 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e8898d9a..b85fe988e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2398,7 +2398,7 @@ dependencies = [ "miden-crypto", "miden-protocol", "miden-standards", -+ "miden-utils-sync", + "miden-utils-sync", "primitive-types", "regex", "serde", @@ -2411,10 +2411,10 @@ dependencies = [ name = "miden-air" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" - checksum = "d15646ebc95906b2a7cb66711d1e184f53fd6edc2605730bbcf0c2a129f792cf" - dependencies = [ - "miden-core", - "miden-crypto", +checksum = "b45551e1417cb2be47064c36fe6e1e69ab10ad7b4b55f0731d8cac109b7738b9" +dependencies = [ + "miden-core", + "miden-crypto", "miden-utils-indexing", "thiserror 2.0.18", "tracing", @@ -2422,7 +2422,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "miden-assembly" -version = "0.22.1" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2094e2b943f7bf955a2bc3b44b0ad7c4f45a286f170eaa7e5060871c44847a" dependencies = [ "env_logger", "log", @@ -2430,14 +2432,16 @@ dependencies = [ "miden-core", "miden-mast-package", "miden-package-registry", - "miden-project", + "miden-project", "smallvec", "thiserror 2.0.18", ] [[package]] name = "miden-assembly-syntax" -version = "0.22.1" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a3212614ad28399612f39024c1e321dc8cebc8998def06058e60462ddc3856" dependencies = [ "aho-corasick", "env_logger", @@ -2545,7 +2549,9 @@ dependencies = [ [[package]] name = "miden-core" -version = "0.22.1" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39a4a2e2de49213ec899e88fe399d4ec568c8eb9e8c747d6ed58938c40031daa" dependencies = [ "derive_more", "itertools 0.14.0", @@ -2566,7 +2572,8 @@ dependencies = [ [[package]] name = "miden-core-lib" version = "0.22.2" - source = "registry+https://github.com/rust-lang/crates.io-index" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2ea7e17c4382255c6e0cb1e4b90693449dcf5a286a844e2918af66b371c0ab" dependencies = [ "env_logger", "fs-err", @@ -2713,7 +2720,9 @@ dependencies = [ [[package]] name = "miden-debug-types" -version = "0.22.1" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16570786d938b7f795921b3a84890708a7d72708442c622eb58c2fb5480821e9" dependencies = [ "memchr", "miden-crypto", @@ -2858,7 +2867,9 @@ dependencies = [ [[package]] name = "miden-mast-package" -version = "0.22.1" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0953396dc5e575b79bccb8b7da6e0d18ce71bcde899901bb4293a433f9003b94" dependencies = [ "derive_more", "miden-assembly-syntax", @@ -2942,21 +2953,24 @@ dependencies = [ [[package]] name = "miden-package-registry" -version = "0.22.1" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" - dependencies = [ - "miden-assembly-syntax", - "miden-core", - "miden-mast-package", - "pubgrub", - "serde", - "smallvec", - "thiserror 2.0.18", - ] +checksum = "e07af92dc184a71132a34d89ad15e69633435bfd36fb5af4ce18b200bd1952e5" +dependencies = [ + "miden-assembly-syntax", + "miden-core", + "miden-mast-package", + "pubgrub", + "serde", + "smallvec", + "thiserror 2.0.18", +] [[package]] name = "miden-processor" -version = "0.22.1" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "340c424f9f62b56a808c9a479cef016f25478e227555ce39cb2684e8baf26542" dependencies = [ "itertools 0.14.0", "miden-air", @@ -2980,7 +2994,9 @@ dependencies = [ "miden-assembly-syntax", "miden-core", "miden-mast-package", - "miden-package-registry 0.22.2", + "miden-package-registry", + "serde", + "serde-untagged", "thiserror 2.0.18", "toml 1.1.2+spec-1.1.0", ] @@ -3033,7 +3049,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fadcff2d171f81f2737a35a007c756753a8298d067555c7a556bd72f570b32f9" dependencies = [ "bincode", - "miden-air 0.22.2", + "miden-air", "miden-core", "miden-crypto", "miden-debug-types", @@ -3184,7 +3200,9 @@ dependencies = [ [[package]] name = "miden-utils-core-derive" -version = "0.22.1" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd5103e9b6527ad396dce12c135cea1984dfd77ebbffa76f260f4e139906cc4" dependencies = [ "proc-macro2", "quote", @@ -3193,7 +3211,9 @@ dependencies = [ [[package]] name = "miden-utils-diagnostics" -version = "0.22.1" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72226906c968c2e7c37435d67be9e29aeba05336db30c4e57d290cc6efb1da9d" dependencies = [ "miden-crypto", "miden-debug-types", @@ -3214,32 +3234,10 @@ dependencies = [ ] [[package]] -name = "miden-utils-indexing" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8834e76299686bcce3de1685158aa4cff49b7fa5e0e00a6cc811e8f2cf5775f" -dependencies = [ - "miden-crypto", - "thiserror 2.0.18", -] - -[[package]] - name = "miden-utils-sync" +name = "miden-utils-sync" version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e9210b3592b577843710daf68293087c68b53d8482c82f6875ad83d578cb51e" - dependencies = [ - "lock_api", - "loom", - "once_cell", - "parking_lot", - ] - - [[package]] - name = "miden-utils-sync" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9e9747e9664c1a0997bb040ae291306ea0a1c74a572141ec66cec855c1b0e8" dependencies = [ "lock_api", "loom", @@ -3254,7 +3252,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83f47bf33268ffb31c2fc452debf8e4ba76fbb3175566efbfe850c4886fb5b37" dependencies = [ "bincode", - "miden-air 0.22.2", + "miden-air", "miden-core", "miden-crypto", "serde", @@ -3292,9 +3290,9 @@ dependencies = [ "miden-assembly-syntax", "miden-core", "miden-mast-package", - "miden-package-registry 0.22.2", + "miden-package-registry", "miden-processor", - "miden-project 0.22.2", + "miden-project", "miden-protocol", "miden-thiserror", "midenc-dialect-arith", diff --git a/Cargo.toml b/Cargo.toml index bc992022e..9ffa8d694 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -162,23 +162,6 @@ miden-integration-tests = { path = "tests/integration" } midenc-expect-test = { path = "tools/expect-test" } miden-field = { version = "^0.24" } -[patch.crates-io] -miden-assembly = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-assembly-syntax = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-core = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-core-lib = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-debug-types = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-mast-package = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-package-registry = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-processor = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-project = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-utils-diagnostics = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-utils-indexing = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -miden-utils-sync = { git = "https://github.com/walnuthq/miden-vm", branch = "fix/debug-var-dedup-crash" } -# miden-protocol = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } -# miden-standards = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } -# miden-tx = { tag = "v0.14.0-beta.4", git = "https://github.com/0xMiden/miden-base" } - [profile.dev] lto = false # Needed for 'inventory' to work From e04048ad5831b0235d1d8abe6a4a0c9f461d6467 Mon Sep 17 00:00:00 2001 From: djole Date: Wed, 29 Apr 2026 11:52:56 +0200 Subject: [PATCH 23/32] fixup: update tests --- .../integration-network/src/mockchain/basic_wallet.rs | 10 +++++----- .../src/mockchain/counter_contract.rs | 2 +- .../src/mockchain/counter_contract_no_auth.rs | 2 +- .../src/mockchain/counter_contract_rust_auth.rs | 2 +- .../expected/debug_conditional_assignment.hir | 4 ++-- tests/integration/expected/debug_multiple_locals.hir | 4 ++-- tests/integration/expected/debug_nested_loops.hir | 4 ++-- tests/integration/expected/debug_simple_params.hir | 4 ++-- .../integration/expected/debug_variable_locations.hir | 4 ++-- tests/integration/src/rust_masm_tests/examples.rs | 8 ++++---- tools/cargo-miden/tests/build.rs | 5 +++-- 11 files changed, 25 insertions(+), 24 deletions(-) diff --git a/tests/integration-network/src/mockchain/basic_wallet.rs b/tests/integration-network/src/mockchain/basic_wallet.rs index a5bddce71..b55b62897 100644 --- a/tests/integration-network/src/mockchain/basic_wallet.rs +++ b/tests/integration-network/src/mockchain/basic_wallet.rs @@ -107,7 +107,7 @@ pub fn test_basic_wallet_p2id() { chain.build_tx_context(alice_id, &[p2id_note_mint.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, consume_tx_context_builder); expect!["3216"].assert_eq(prologue_cycles(&tx_measurements)); - expect!["20072"].assert_eq(note_cycles(&tx_measurements, p2id_note_mint.id())); + expect!["20094"].assert_eq(note_cycles(&tx_measurements, p2id_note_mint.id())); eprintln!("\n=== Checking Alice's account has the minted asset ==="); let alice_account = chain.committed_account(alice_id).unwrap(); @@ -127,12 +127,12 @@ pub fn test_basic_wallet_p2id() { &mut note_rng, ); let tx_measurements = execute_tx(&mut chain, alice_tx_context_builder); - expect!["26217"].assert_eq(tx_script_processing_cycles(&tx_measurements)); + expect!["25009"].assert_eq(tx_script_processing_cycles(&tx_measurements)); eprintln!("\n=== Step 4: Bob consumes p2id note ==="); let consume_tx_context_builder = chain.build_tx_context(bob_id, &[bob_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, consume_tx_context_builder); - expect!["20072"].assert_eq(note_cycles(&tx_measurements, bob_note.id())); + expect!["20094"].assert_eq(note_cycles(&tx_measurements, bob_note.id())); eprintln!("\n=== Checking Bob's account has the transferred asset ==="); let bob_account = chain.committed_account(bob_id).unwrap(); @@ -257,7 +257,7 @@ pub fn test_basic_wallet_p2ide() { let consume_tx_context_builder = chain.build_tx_context(bob_id, &[p2ide_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, consume_tx_context_builder); - expect!["21211"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); + expect!["20569"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); // Step 5: verify balances let bob_account = chain.committed_account(bob_id).unwrap(); @@ -382,7 +382,7 @@ pub fn test_basic_wallet_p2ide_reclaim() { let reclaim_tx_context_builder = chain.build_tx_context(alice_id, &[p2ide_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, reclaim_tx_context_builder); - expect!["22871"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); + expect!["21582"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); // Step 5: verify Alice has her original amount back let alice_account = chain.committed_account(alice_id).unwrap(); diff --git a/tests/integration-network/src/mockchain/counter_contract.rs b/tests/integration-network/src/mockchain/counter_contract.rs index a92fb08f8..c5bf35447 100644 --- a/tests/integration-network/src/mockchain/counter_contract.rs +++ b/tests/integration-network/src/mockchain/counter_contract.rs @@ -68,7 +68,7 @@ pub fn test_counter_contract() { .build_tx_context(counter_account.clone(), &[counter_note.id()], &[]) .unwrap(); let tx_measurements = execute_tx(&mut chain, tx_context_builder); - expect!["49505"].assert_eq(note_cycles(&tx_measurements, counter_note.id())); + expect!["24294"].assert_eq(note_cycles(&tx_measurements, counter_note.id())); // The counter contract storage value should be 2 after the note is consumed (incremented by 1). assert_counter_storage( diff --git a/tests/integration-network/src/mockchain/counter_contract_no_auth.rs b/tests/integration-network/src/mockchain/counter_contract_no_auth.rs index ff2066b23..00909db45 100644 --- a/tests/integration-network/src/mockchain/counter_contract_no_auth.rs +++ b/tests/integration-network/src/mockchain/counter_contract_no_auth.rs @@ -105,7 +105,7 @@ pub fn test_counter_contract_no_auth() { .unwrap(); let tx_measurements = execute_tx(&mut chain, tx_context_builder); expect!["1803"].assert_eq(auth_procedure_cycles(&tx_measurements)); - expect!["49505"].assert_eq(note_cycles(&tx_measurements, counter_note.id())); + expect!["24294"].assert_eq(note_cycles(&tx_measurements, counter_note.id())); // The counter contract storage value should be 2 after the note is consumed assert_counter_storage( diff --git a/tests/integration-network/src/mockchain/counter_contract_rust_auth.rs b/tests/integration-network/src/mockchain/counter_contract_rust_auth.rs index ef2919d9a..eef14a217 100644 --- a/tests/integration-network/src/mockchain/counter_contract_rust_auth.rs +++ b/tests/integration-network/src/mockchain/counter_contract_rust_auth.rs @@ -72,7 +72,7 @@ pub fn test_counter_contract_rust_auth_blocks_unauthorized_note_creation() { let tx_context = tx_context_builder.build().unwrap(); let executed_tx = block_on(tx_context.execute()).expect("authorized client should be able to create a note"); - expect!["86895"].assert_eq(auth_procedure_cycles(executed_tx.measurements())); + expect!["82697"].assert_eq(auth_procedure_cycles(executed_tx.measurements())); assert_eq!(executed_tx.output_notes().num_notes(), 1); assert_eq!(executed_tx.output_notes().get_note(0).id(), own_note.id()); diff --git a/tests/integration/expected/debug_conditional_assignment.hir b/tests/integration/expected/debug_conditional_assignment.hir index 8564b3852..09aaab61a 100644 --- a/tests/integration/expected/debug_conditional_assignment.hir +++ b/tests/integration/expected/debug_conditional_assignment.hir @@ -26,10 +26,10 @@ builtin.component private @root_ns:root@1.0.0 { builtin.global_variable private @__stack_pointer : i32 { builtin.ret_imm #builtin.number<1048576>; }; - builtin.global_variable public @gv1 : i32 { + builtin.global_variable public @global1 : i32 { builtin.ret_imm #builtin.number<1048576>; }; - builtin.global_variable public @gv2 : i32 { + builtin.global_variable public @global2 : i32 { builtin.ret_imm #builtin.number<1048576>; }; }; diff --git a/tests/integration/expected/debug_multiple_locals.hir b/tests/integration/expected/debug_multiple_locals.hir index a79c4d40e..c2bc22eef 100644 --- a/tests/integration/expected/debug_multiple_locals.hir +++ b/tests/integration/expected/debug_multiple_locals.hir @@ -17,10 +17,10 @@ builtin.component private @root_ns:root@1.0.0 { builtin.global_variable private @__stack_pointer : i32 { builtin.ret_imm #builtin.number<1048576>; }; - builtin.global_variable public @gv1 : i32 { + builtin.global_variable public @global1 : i32 { builtin.ret_imm #builtin.number<1048576>; }; - builtin.global_variable public @gv2 : i32 { + builtin.global_variable public @global2 : i32 { builtin.ret_imm #builtin.number<1048576>; }; }; diff --git a/tests/integration/expected/debug_nested_loops.hir b/tests/integration/expected/debug_nested_loops.hir index 688505a36..391c3c774 100644 --- a/tests/integration/expected/debug_nested_loops.hir +++ b/tests/integration/expected/debug_nested_loops.hir @@ -26,10 +26,10 @@ builtin.component private @root_ns:root@1.0.0 { builtin.global_variable private @__stack_pointer : i32 { builtin.ret_imm #builtin.number<1048576>; }; - builtin.global_variable public @gv1 : i32 { + builtin.global_variable public @global1 : i32 { builtin.ret_imm #builtin.number<1048576>; }; - builtin.global_variable public @gv2 : i32 { + builtin.global_variable public @global2 : i32 { builtin.ret_imm #builtin.number<1048576>; }; }; diff --git a/tests/integration/expected/debug_simple_params.hir b/tests/integration/expected/debug_simple_params.hir index d7bfedd45..3f128228d 100644 --- a/tests/integration/expected/debug_simple_params.hir +++ b/tests/integration/expected/debug_simple_params.hir @@ -13,10 +13,10 @@ builtin.component private @root_ns:root@1.0.0 { builtin.global_variable private @__stack_pointer : i32 { builtin.ret_imm #builtin.number<1048576>; }; - builtin.global_variable public @gv1 : i32 { + builtin.global_variable public @global1 : i32 { builtin.ret_imm #builtin.number<1048576>; }; - builtin.global_variable public @gv2 : i32 { + builtin.global_variable public @global2 : i32 { builtin.ret_imm #builtin.number<1048576>; }; }; diff --git a/tests/integration/expected/debug_variable_locations.hir b/tests/integration/expected/debug_variable_locations.hir index 13b34af29..9828234f9 100644 --- a/tests/integration/expected/debug_variable_locations.hir +++ b/tests/integration/expected/debug_variable_locations.hir @@ -43,10 +43,10 @@ builtin.component private @root_ns:root@1.0.0 { builtin.global_variable private @__stack_pointer : i32 { builtin.ret_imm #builtin.number<1048576>; }; - builtin.global_variable public @gv1 : i32 { + builtin.global_variable public @global1 : i32 { builtin.ret_imm #builtin.number<1048576>; }; - builtin.global_variable public @gv2 : i32 { + builtin.global_variable public @global2 : i32 { builtin.ret_imm #builtin.number<1048576>; }; }; diff --git a/tests/integration/src/rust_masm_tests/examples.rs b/tests/integration/src/rust_masm_tests/examples.rs index c4dda7d0e..4fc975c54 100644 --- a/tests/integration/src/rust_masm_tests/examples.rs +++ b/tests/integration/src/rust_masm_tests/examples.rs @@ -329,7 +329,7 @@ fn basic_wallet_and_p2id() { CompilerTest::rust_source_cargo_miden("../../examples/basic-wallet", config.clone(), []); let account_package = account_test.compile_package(); assert!(account_package.is_library(), "expected library"); - expect!["35906"].assert_eq(stripped_mast_size_str(&account_package)); + expect!["36630"].assert_eq(stripped_mast_size_str(&account_package)); let mut tx_script_test = CompilerTest::rust_source_cargo_miden( "../../examples/basic-wallet-tx-script", @@ -338,19 +338,19 @@ fn basic_wallet_and_p2id() { ); let tx_script_package = tx_script_test.compile_package(); assert!(tx_script_package.is_program(), "expected program"); - expect!["56437"].assert_eq(stripped_mast_size_str(&tx_script_package)); + expect!["56999"].assert_eq(stripped_mast_size_str(&tx_script_package)); let mut p2id_test = CompilerTest::rust_source_cargo_miden("../../examples/p2id-note", config.clone(), []); let note_package = p2id_test.compile_package(); assert!(note_package.is_library(), "expected library"); - expect!["53082"].assert_eq(stripped_mast_size_str(¬e_package)); + expect!["55262"].assert_eq(stripped_mast_size_str(¬e_package)); let mut p2ide_test = CompilerTest::rust_source_cargo_miden("../../examples/p2ide-note", config, []); let p2ide_package = p2ide_test.compile_package(); assert!(p2ide_package.is_library(), "expected library"); - expect!["62672"].assert_eq(stripped_mast_size_str(&p2ide_package)); + expect!["61528"].assert_eq(stripped_mast_size_str(&p2ide_package)); } #[test] diff --git a/tools/cargo-miden/tests/build.rs b/tools/cargo-miden/tests/build.rs index ddfa5795b..932e67aae 100644 --- a/tools/cargo-miden/tests/build.rs +++ b/tools/cargo-miden/tests/build.rs @@ -207,12 +207,13 @@ fn new_project_integration_tests_pass() { let output = std::process::Command::new("cargo") .arg("test") + .arg("--tests") .current_dir(&integration_dir) .output() - .expect("failed to spawn `cargo test` inside integration directory"); + .expect("failed to spawn `cargo test --tests` inside integration directory"); if !output.status.success() { panic!( - "`cargo test` failed in {} with status {:?}\nstdout:\n{}\nstderr:\n{}", + "`cargo test --tests` failed in {} with status {:?}\nstdout:\n{}\nstderr:\n{}", integration_dir.display(), output.status.code(), String::from_utf8_lossy(&output.stdout), From ca53f3898d75c544575932dd83f536e4c61ad487 Mon Sep 17 00:00:00 2001 From: djole Date: Wed, 29 Apr 2026 13:09:21 +0200 Subject: [PATCH 24/32] fixup: improve debugdump help --- tools/debugdump/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/debugdump/src/main.rs b/tools/debugdump/src/main.rs index 23dc7449f..db969dda9 100644 --- a/tools/debugdump/src/main.rs +++ b/tools/debugdump/src/main.rs @@ -97,7 +97,7 @@ impl DebugSections { #[derive(Parser, Debug)] #[command( name = "miden-debugdump", - about = "Dump debug information from MASP packages (similar to llvm-dwarfdump)", + about = "Dump debug information from MASP packages", version, rename_all = "kebab-case" )] From 4874f47520f309d2da4440f29b84be627ed1d93e Mon Sep 17 00:00:00 2001 From: djole Date: Wed, 29 Apr 2026 15:48:31 +0200 Subject: [PATCH 25/32] fix(debug): emit component core type metadata --- frontend/wasm/src/component/lift_exports.rs | 88 +++++++-- frontend/wasm/src/component/translator.rs | 3 + frontend/wasm/src/component/types/mod.rs | 65 ++++++- hir/src/attributes/debug.rs | 29 ++- midenc-compile/src/debug_info.rs | 197 +++++++++++++++++++- 5 files changed, 357 insertions(+), 25 deletions(-) diff --git a/frontend/wasm/src/component/lift_exports.rs b/frontend/wasm/src/component/lift_exports.rs index 60ac3cb10..26f8e4b01 100644 --- a/frontend/wasm/src/component/lift_exports.rs +++ b/frontend/wasm/src/component/lift_exports.rs @@ -5,8 +5,8 @@ use midenc_dialect_cf::ControlFlowOpBuilder; use midenc_dialect_hir::HirOpBuilder; use midenc_frontend_wasm_metadata::ProtocolExportKind; use midenc_hir::{ - FunctionType, Ident, Op, OpExt, SmallVec, SourceSpan, SymbolPath, ValueRange, ValueRef, - Visibility, + DICompileUnit, DISubprogram, FunctionType, Ident, Op, OpExt, SmallVec, SourceSpan, SymbolPath, + ValueRange, ValueRef, Visibility, dialects::builtin::{ BuiltinOpBuilder, ComponentBuilder, ModuleBuilder, attributes::{Signature, UnitAttr}, @@ -25,11 +25,18 @@ use crate::{ }, }; +struct ComponentExportMetadata<'a> { + ty: &'a FunctionType, + param_names: &'a [String], + protocol_export_kind: Option, +} + /// Generates a lifted component export wrapper around a lowered core Wasm export. pub fn generate_export_lifting_function( component_builder: &mut ComponentBuilder, export_func_name: &str, export_func_ty: FunctionType, + export_param_names: &[String], core_export_func_path: SymbolPath, protocol_export_kind: Option, diagnostics: &DiagnosticsHandler, @@ -55,6 +62,11 @@ pub fn generate_export_lifting_function( } let export_func_ident = Ident::new(export_func_name.to_string().into(), SourceSpan::default()); + let export_metadata = ComponentExportMetadata { + ty: &export_func_ty, + param_names: export_param_names, + protocol_export_kind, + }; let core_export_module_path = core_export_func_path.without_leaf(); let core_module_ref = component_builder @@ -77,22 +89,21 @@ pub fn generate_export_lifting_function( generate_lifting_with_transformation( component_builder, export_func_ident, - &export_func_ty, + &export_metadata, cross_ctx_export_sig_flat, core_export_func_ref, core_export_func_sig, &core_export_func_path, - protocol_export_kind, diagnostics, )?; } else { generate_direct_lifting( component_builder, export_func_ident, + &export_metadata, core_export_func_ref, core_export_func_sig, cross_ctx_export_sig_flat, - protocol_export_kind, )?; } @@ -132,12 +143,11 @@ pub fn generate_export_lifting_function( fn generate_lifting_with_transformation( component_builder: &mut ComponentBuilder, export_func_ident: Ident, - export_func_ty: &FunctionType, + export_metadata: &ComponentExportMetadata<'_>, cross_ctx_export_sig_flat: Signature, core_export_func_ref: midenc_hir::dialects::builtin::FunctionRef, core_export_func_sig: Signature, core_export_func_path: &SymbolPath, - protocol_export_kind: Option, diagnostics: &DiagnosticsHandler, ) -> WasmResult<()> { assert_eq!( @@ -154,7 +164,7 @@ fn generate_lifting_with_transformation( // Extract flattened result types from the exported component-level function type let context = { core_export_func_ref.borrow().as_operation().context_rc() }; - let flattened_results = flatten_types(&context, &export_func_ty.results).map_err(|e| { + let flattened_results = flatten_types(&context, &export_metadata.ty.results).map_err(|e| { let message = format!( "Failed to flatten result types for exported function {core_export_func_path}: {e}" ); @@ -176,7 +186,13 @@ fn generate_lifting_with_transformation( }; let export_func_ref = component_builder.define_function(export_func_ident, Visibility::Public, new_func_sig)?; - annotate_protocol_export(export_func_ref, protocol_export_kind); + annotate_protocol_export(export_func_ref, export_metadata.protocol_export_kind); + annotate_component_export_debug_signature( + export_func_ref, + export_func_ident.name.as_str(), + export_metadata.ty, + export_metadata.param_names, + ); let (span, context) = { let export_func = export_func_ref.borrow(); @@ -216,11 +232,11 @@ fn generate_lifting_with_transformation( // Load results using the recursive function from canon_abi_utils assert_eq!( - export_func_ty.results.len(), + export_metadata.ty.results.len(), 1, "expected a single result in the component-level export function" ); - let result_type = &export_func_ty.results[0]; + let result_type = &export_metadata.ty.results[0]; load(&mut fb, result_ptr, result_type, &mut return_values, span)?; @@ -273,17 +289,23 @@ fn generate_lifting_with_transformation( fn generate_direct_lifting( component_builder: &mut ComponentBuilder, export_func_ident: Ident, + export_metadata: &ComponentExportMetadata<'_>, core_export_func_ref: midenc_hir::dialects::builtin::FunctionRef, core_export_func_sig: Signature, cross_ctx_export_sig_flat: Signature, - protocol_export_kind: Option, ) -> WasmResult<()> { let export_func_ref = component_builder.define_function( export_func_ident, Visibility::Public, cross_ctx_export_sig_flat.clone(), )?; - annotate_protocol_export(export_func_ref, protocol_export_kind); + annotate_protocol_export(export_func_ref, export_metadata.protocol_export_kind); + annotate_component_export_debug_signature( + export_func_ref, + export_func_ident.name.as_str(), + export_metadata.ty, + export_metadata.param_names, + ); let (span, context) = { let export_func = export_func_ref.borrow(); @@ -349,3 +371,43 @@ fn annotate_protocol_export( None => {} } } + +fn annotate_component_export_debug_signature( + mut export_func_ref: midenc_hir::dialects::builtin::FunctionRef, + export_func_name: &str, + export_func_ty: &FunctionType, + export_param_names: &[String], +) { + let context = { + let export_func = export_func_ref.borrow(); + export_func.as_operation().context_rc() + }; + + let file = midenc_hir::interner::Symbol::intern(""); + let mut compile_unit = DICompileUnit::new(midenc_hir::interner::Symbol::intern("wit"), file); + compile_unit.producer = Some(midenc_hir::interner::Symbol::intern("midenc-frontend-wasm")); + + let param_names = export_param_names + .iter() + .map(|name| midenc_hir::interner::Symbol::intern(name.as_str())); + let subprogram = + DISubprogram::new(midenc_hir::interner::Symbol::intern(export_func_name), file, 1, Some(1)) + .with_function_type(FunctionType { + abi: export_func_ty.abi, + params: export_func_ty.params.clone(), + results: export_func_ty.results.clone(), + }) + .with_param_names(param_names); + + let cu_attr = context + .create_attribute::(compile_unit) + .as_attribute_ref(); + let sp_attr = context + .create_attribute::(subprogram) + .as_attribute_ref(); + + let mut export_func = export_func_ref.borrow_mut(); + let op = export_func.as_operation_mut(); + op.set_attribute("di.compile_unit", cu_attr); + op.set_attribute("di.subprogram", sp_attr); +} diff --git a/frontend/wasm/src/component/translator.rs b/frontend/wasm/src/component/translator.rs index c65110229..82739fd60 100644 --- a/frontend/wasm/src/component/translator.rs +++ b/frontend/wasm/src/component/translator.rs @@ -479,6 +479,7 @@ impl<'a> ComponentTranslator<'a> { let type_func_idx = types.convert_component_func_type(frame.types, canon_lift.ty).unwrap(); let component_types = types.resources_mut_and_types().1; + let type_func = component_types[type_func_idx].clone(); let func_ty = convert_lifted_func_ty(CanonicalAbiMode::Export, &type_func_idx, component_types); let core_export_func_path = self.core_module_export_func_path(frame, canon_lift); @@ -491,6 +492,7 @@ impl<'a> ComponentTranslator<'a> { &mut self.result, name, func_ty, + &type_func.param_names, core_export_func_path, protocol_export_kind, self.context.diagnostics(), @@ -688,6 +690,7 @@ impl<'a> ComponentTranslator<'a> { TypeDef::ComponentInstance(type_component_instance_idx) => type_component_instance_idx, _ => panic!("expected component instance"), }; + types.register_component_instance_export_type_names(ty, Some(name.0)); frame .component_instances .push(ComponentInstanceDef::Import(ComponentInstanceImport { diff --git a/frontend/wasm/src/component/types/mod.rs b/frontend/wasm/src/component/types/mod.rs index 1562cc890..8f3e2e6f5 100644 --- a/frontend/wasm/src/component/types/mod.rs +++ b/frontend/wasm/src/component/types/mod.rs @@ -282,6 +282,7 @@ pub struct ComponentTypes { options: PrimaryMap, results: PrimaryMap, resource_tables: PrimaryMap, + interface_type_names: FxHashMap, module_types: ModuleTypes, } @@ -325,6 +326,10 @@ impl ComponentTypes { InterfaceType::Result(i) => &self[*i].abi, } } + + pub fn interface_type_name(&self, ty: &InterfaceType) -> Option<&str> { + self.interface_type_names.get(ty).map(String::as_str) + } } macro_rules! impl_index { @@ -473,6 +478,7 @@ impl ComponentTypesBuilder { id: component_types::ComponentFuncTypeId, ) -> Result { let ty = &types[id]; + let param_names = ty.params.iter().map(|(name, _ty)| name.to_string()).collect(); let params = ty .params .iter() @@ -485,10 +491,53 @@ impl ComponentTypesBuilder { let ty = TypeFunc { params: self.new_tuple_type(params), results: self.new_tuple_type(results), + param_names, }; Ok(self.add_func_type(ty)) } + pub fn register_component_instance_export_type_names( + &mut self, + instance_idx: TypeComponentInstanceIndex, + namespace: Option<&str>, + ) { + let exports = self.component_types[instance_idx] + .exports + .iter() + .map(|(name, ty)| (name.clone(), *ty)) + .collect::>(); + + for (name, ty) in exports { + let qualified_name = namespace + .filter(|namespace| !namespace.is_empty()) + .map(|namespace| format!("{}/{}", namespace.trim_end_matches('/'), name)) + .unwrap_or(name); + self.register_type_name(ty, qualified_name); + } + } + + fn register_type_name(&mut self, ty: TypeDef, name: String) { + match ty { + TypeDef::Interface(interface_ty) => { + self.component_types.interface_type_names.entry(interface_ty).or_insert(name); + } + TypeDef::ComponentInstance(instance_idx) => { + self.register_component_instance_export_type_names(instance_idx, Some(&name)); + } + TypeDef::Component(component_idx) => { + let exports = self.component_types[component_idx] + .exports + .iter() + .map(|(export_name, ty)| (export_name.clone(), *ty)) + .collect::>(); + for (export_name, ty) in exports { + self.register_type_name(ty, format!("{}/{}", name, export_name)); + } + } + TypeDef::ComponentFunc(_) | TypeDef::Module(_) | TypeDef::Resource(_) => {} + } + } + /// Converts a wasmparser `ComponentEntityType` pub fn convert_component_entity_type( &mut self, @@ -1005,6 +1054,8 @@ pub struct TypeFunc { pub params: TypeTupleIndex, /// Results of the function represented as a tuple. pub results: TypeTupleIndex, + /// Source/component names of the parameters, in declaration order. + pub param_names: Box<[String]>, } /// All possible interface types that values can have. @@ -1756,11 +1807,15 @@ pub fn interface_type_to_ir( InterfaceType::String => todo!(), InterfaceType::ErrorContext => todo!("the async proposal is not currently supported"), InterfaceType::Record(idx) => { - let tys = component_types.records[*idx] - .fields - .iter() - .map(|f| interface_type_to_ir(&f.ty, component_types)); - midenc_hir::Type::from(midenc_hir::StructType::new(tys)) + let fields = component_types.records[*idx].fields.iter().map(|f| { + (Arc::::from(f.name.as_str()), interface_type_to_ir(&f.ty, component_types)) + }); + let struct_ty = if let Some(name) = component_types.interface_type_name(ty) { + midenc_hir::StructType::named(Arc::from(name), fields) + } else { + midenc_hir::StructType::new(fields) + }; + midenc_hir::Type::from(struct_ty) } // TODO: This is a stub to make `enum` in WIT generation work. Use proper type when ready. InterfaceType::Variant(_) => midenc_hir::Type::U32, diff --git a/hir/src/attributes/debug.rs b/hir/src/attributes/debug.rs index 694083775..44493f8f1 100644 --- a/hir/src/attributes/debug.rs +++ b/hir/src/attributes/debug.rs @@ -1,4 +1,4 @@ -use alloc::{format, vec::Vec}; +use alloc::{format, sync::Arc, vec::Vec}; use crate::{ AttrPrinter, Type, @@ -86,6 +86,8 @@ pub struct DISubprogram { pub column: Option, pub is_definition: bool, pub is_local: bool, + pub ty: Option, + pub param_names: Vec, } impl Default for DISubprogram { @@ -98,6 +100,8 @@ impl Default for DISubprogram { column: None, is_definition: false, is_local: false, + ty: None, + param_names: Vec::new(), } } } @@ -112,8 +116,23 @@ impl DISubprogram { column, is_definition: true, is_local: false, + ty: None, + param_names: Vec::new(), } } + + pub fn with_function_type(mut self, ty: crate::FunctionType) -> Self { + self.ty = Some(Type::Function(Arc::new(ty))); + self + } + + pub fn with_param_names(mut self, names: I) -> Self + where + I: IntoIterator, + { + self.param_names = names.into_iter().collect(); + self + } } impl AttrPrinter for DISubprogramAttr { @@ -137,6 +156,14 @@ impl PrettyPrint for DISubprogram { if let Some(linkage) = self.linkage_name { doc = doc + const_text(", linkage = ") + text(linkage.as_str()); } + if let Some(ty) = &self.ty { + doc = doc + const_text(", ty = ") + ty.render(); + } + if !self.param_names.is_empty() { + let names = + self.param_names.iter().map(|name| name.as_str()).collect::>().join(", "); + doc = doc + const_text(", params = [") + text(names) + const_text("]"); + } if self.is_definition { doc += const_text(", definition"); } diff --git a/midenc-compile/src/debug_info.rs b/midenc-compile/src/debug_info.rs index 6888019db..8371efd7c 100644 --- a/midenc-compile/src/debug_info.rs +++ b/midenc-compile/src/debug_info.rs @@ -3,11 +3,11 @@ //! This module provides utilities for collecting debug information from the HIR //! and building debug sections that can be serialized into the MASP package. -use alloc::{collections::BTreeMap, format, string::ToString, sync::Arc}; +use alloc::{collections::BTreeMap, format, string::ToString, sync::Arc, vec::Vec}; use miden_debug_types::{ColumnNumber, LineNumber}; use miden_mast_package::debug_info::{ - DebugFileInfo, DebugFunctionInfo, DebugFunctionsSection, DebugPrimitiveType, + DebugFieldInfo, DebugFileInfo, DebugFunctionInfo, DebugFunctionsSection, DebugPrimitiveType, DebugSourcesSection, DebugTypeIdx, DebugTypeInfo, DebugTypesSection, DebugVariableInfo, }; use midenc_dialect_debuginfo as debuginfo; @@ -37,6 +37,8 @@ enum TypeKey { Primitive(u8), // Use discriminant instead of the enum directly Pointer(u32), Array(u32, Option), + Struct(u32, u32, Vec<(u32, u32, u32)>), + Function(Option, Vec), Unknown, } @@ -185,6 +187,11 @@ impl DebugInfoBuilder { if let Some(linkage_idx) = linkage_name_idx { func_info = func_info.with_linkage_name(linkage_idx); } + if let Some(ref ty) = subprogram.ty { + let type_idx = self.add_type(ty); + func_info = func_info.with_type(type_idx); + self.collect_subprogram_parameters(&subprogram, ty, &mut func_info); + } // Collect local variables from function body self.collect_variables_from_function_body(function, Some(&mut func_info)); @@ -250,6 +257,32 @@ impl DebugInfoBuilder { Some(var_info) } + fn collect_subprogram_parameters( + &mut self, + subprogram: &midenc_hir::DISubprogram, + ty: &Type, + func_info: &mut DebugFunctionInfo, + ) { + let Type::Function(func_ty) = ty else { + return; + }; + + for (idx, param_ty) in func_ty.params().iter().enumerate() { + let name = subprogram + .param_names + .get(idx) + .copied() + .unwrap_or_else(|| midenc_hir::interner::Symbol::intern(format!("arg{idx}"))); + let name_idx = self.functions.add_string(Arc::from(name.as_str())); + let type_idx = self.add_type(param_ty); + let line = LineNumber::new(subprogram.line).unwrap_or_default(); + let column = ColumnNumber::new(subprogram.column.unwrap_or(1)).unwrap_or_default(); + let var_info = DebugVariableInfo::new(name_idx, type_idx, line, column) + .with_arg_index((idx as u32) + 1); + func_info.add_variable(var_info); + } + } + /// Builds and returns the final debug info sections. pub fn build(self) -> DebugInfoSections { DebugInfoSections { @@ -298,12 +331,147 @@ fn hir_type_to_debug_type(ty: &Type, builder: &mut DebugInfoBuilder) -> DebugTyp } } // For types we don't have direct mappings for, use Unknown - Type::Struct(_) | Type::List(_) | Type::Function(_) | Type::Enum(_) => { - DebugTypeInfo::Unknown + Type::Struct(struct_ty) => { + let name = struct_ty.name(); + if name.as_deref().is_some_and(is_component_felt_type_name) { + return DebugTypeInfo::Primitive(DebugPrimitiveType::Felt); + } + if name.as_deref().is_some_and(is_component_word_type_name) { + return DebugTypeInfo::Primitive(DebugPrimitiveType::Word); + } + + let name_idx = + builder.types.add_string(Arc::from(name.as_deref().unwrap_or(""))); + let use_debug_layout = name.is_some(); + let mut next_offset = 0u32; + let fields: Vec = struct_ty + .fields() + .iter() + .enumerate() + .map(|(idx, field)| { + let field_name = field + .name + .as_deref() + .map(Arc::::from) + .unwrap_or_else(|| Arc::from(format!("field{idx}").as_str())); + let name_idx = builder.types.add_string(field_name); + let type_idx = builder.add_type(&field.ty); + let offset = if use_debug_layout { + let offset = next_offset; + next_offset = next_offset.saturating_add( + builder + .types + .get_type(type_idx) + .map(|ty| debug_type_size(ty, builder)) + .unwrap_or(0), + ); + offset + } else { + field.offset + }; + DebugFieldInfo { + name_idx, + type_idx, + offset, + } + }) + .collect(); + + DebugTypeInfo::Struct { + name_idx, + size: if use_debug_layout { + fields_size(fields.as_slice(), builder) + } else { + struct_ty.size() as u32 + }, + fields, + } + } + Type::Function(func_ty) => { + let return_type_idx = match func_ty.results().len() { + 0 => None, + 1 => Some(builder.add_type(&func_ty.results()[0])), + _ => Some(builder.add_tuple_type("return", func_ty.results())), + }; + let param_type_indices = + func_ty.params().iter().map(|ty| builder.add_type(ty)).collect(); + DebugTypeInfo::Function { + return_type_idx, + param_type_indices, + } + } + Type::List(_) | Type::Enum(_) => DebugTypeInfo::Unknown, + } +} + +impl DebugInfoBuilder { + fn add_tuple_type(&mut self, name: &str, fields: &[Type]) -> DebugTypeIdx { + let name_idx = self.types.add_string(Arc::from(name)); + let mut offset = 0u32; + let fields: Vec = fields + .iter() + .enumerate() + .map(|(idx, ty)| { + let name_idx = self.types.add_string(Arc::from(format!("field{idx}").as_str())); + let type_idx = self.add_type(ty); + let field = DebugFieldInfo { + name_idx, + type_idx, + offset, + }; + offset = offset.saturating_add( + self.types.get_type(type_idx).map(|ty| debug_type_size(ty, self)).unwrap_or(0), + ); + field + }) + .collect(); + self.types.add_type(DebugTypeInfo::Struct { + name_idx, + size: fields_size(fields.as_slice(), self), + fields, + }) + } +} + +fn fields_size(fields: &[DebugFieldInfo], builder: &DebugInfoBuilder) -> u32 { + fields + .iter() + .filter_map(|field| builder.types.get_type(field.type_idx).map(|ty| (field.offset, ty))) + .map(|(offset, ty)| offset.saturating_add(debug_type_size(ty, builder))) + .max() + .unwrap_or_default() +} + +fn debug_type_size(ty: &DebugTypeInfo, builder: &DebugInfoBuilder) -> u32 { + match ty { + DebugTypeInfo::Primitive(prim) => prim.size_in_bytes(), + DebugTypeInfo::Pointer { .. } => 4, + DebugTypeInfo::Array { + element_type_idx, + count, + } => { + let Some(count) = count else { + return 0; + }; + let Some(element_type) = builder.types.get_type(*element_type_idx) else { + return 0; + }; + count.saturating_mul(debug_type_size(element_type, builder)) } + DebugTypeInfo::Struct { size, .. } => *size, + DebugTypeInfo::Function { .. } => 4, + DebugTypeInfo::Unknown => 0, } } +fn is_component_felt_type_name(name: &str) -> bool { + name == "felt" || name.ends_with("/felt") || name.ends_with("::felt") +} + +fn is_component_word_type_name(name: &str) -> bool { + name == "word" || name.ends_with("/word") || name.ends_with("::word") +} + /// Creates a key for type deduplication. fn type_to_key(ty: &DebugTypeInfo) -> TypeKey { match ty { @@ -313,9 +481,26 @@ fn type_to_key(ty: &DebugTypeInfo) -> TypeKey { element_type_idx, count, } => TypeKey::Array(element_type_idx.as_u32(), *count), + DebugTypeInfo::Struct { + name_idx, + size, + fields, + } => TypeKey::Struct( + *name_idx, + *size, + fields + .iter() + .map(|field| (field.name_idx, field.type_idx.as_u32(), field.offset)) + .collect(), + ), + DebugTypeInfo::Function { + return_type_idx, + param_type_indices, + } => TypeKey::Function( + return_type_idx.map(DebugTypeIdx::as_u32), + param_type_indices.iter().map(|idx| idx.as_u32()).collect(), + ), DebugTypeInfo::Unknown => TypeKey::Unknown, - // For complex types like structs and functions, we don't deduplicate - _ => TypeKey::Unknown, } } From a9f01da4b6bf09ee766d6fe6805c00daacff6389 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 29 Apr 2026 12:34:30 -0400 Subject: [PATCH 26/32] chore: merge debugdump into objtool --- .gitignore | 2 +- Cargo.lock | 11 +- Cargo.toml | 17 + Makefile.toml | 18 +- dialects/scf/Cargo.toml | 2 +- tests/lit/debugdump/lit.suite.toml | 2 +- tests/lit/debugdump/locations-source-loc.wat | 2 +- tests/lit/debugdump/locations.wat | 2 +- tests/lit/debugdump/simple.wat | 4 +- tests/lit/debugdump/summary.wat | 4 +- tests/lit/lit.suite.toml | 2 +- tools/debugdump/Cargo.toml | 25 -- tools/objtool/Cargo.toml | 3 +- tools/objtool/src/decorators.rs | 2 + tools/objtool/src/dump.rs | 51 +++ .../main.rs => objtool/src/dump/debuginfo.rs} | 308 ++++++++---------- tools/objtool/src/lib.rs | 1 + tools/objtool/src/main.rs | 34 +- 18 files changed, 232 insertions(+), 258 deletions(-) delete mode 100644 tools/debugdump/Cargo.toml create mode 100644 tools/objtool/src/dump.rs rename tools/{debugdump/src/main.rs => objtool/src/dump/debuginfo.rs} (80%) diff --git a/.gitignore b/.gitignore index 3547a9b09..be40d7fec 100644 --- a/.gitignore +++ b/.gitignore @@ -24,5 +24,5 @@ book/ # Ignore Cargo.lock in test projects examples/**/Cargo.lock tests/**/Cargo.lock -**/src/bindings.rs + *.lit_test_times.txt* diff --git a/Cargo.lock b/Cargo.lock index b85fe988e..8cb9ae486 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2736,16 +2736,6 @@ dependencies = [ "thiserror 2.0.18", ] -[[package]] -name = "miden-debugdump" -version = "0.8.1" -dependencies = [ - "clap", - "miden-core", - "miden-mast-package", - "miden-thiserror", -] - [[package]] name = "miden-field" version = "0.23.0" @@ -2949,6 +2939,7 @@ dependencies = [ "miden-assembly-syntax", "miden-core", "miden-mast-package", + "miden-thiserror", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 9ffa8d694..12b56e9a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -162,6 +162,23 @@ miden-integration-tests = { path = "tests/integration" } midenc-expect-test = { path = "tools/expect-test" } miden-field = { version = "^0.24" } +[patch.crates-io] +#miden-assembly = { git = "https://github.com/0xMiden/miden-vm", rev = "614cd7f9b52f45238b0ab59c71ebb49325051e5d" } +#miden-assembly = { path = "../miden-vm/assembly" } +#miden-assembly-syntax = { path = "../miden-vm/assembly-syntax" } +#miden-core = { git = "https://github.com/0xMiden/miden-vm", rev = "614cd7f9b52f45238b0ab59c71ebb49325051e5d" } +#miden-core = { path = "../miden-vm/core" } +# miden-client = { git = "https://github.com/0xMiden/miden-client", rev = "0a5add565d1388f77cd182f3639c16aa8f7ec674" } +# miden-debug = { git = "https://github.com/0xMiden/miden-debug", branch = "main" } +#miden-debug-types = { path = "../miden-vm/crates/debug/types" } +#miden-processor = { git = "https://github.com/0xMiden/miden-vm", rev = "614cd7f9b52f45238b0ab59c71ebb49325051e5d" } +#miden-processor = { path = "../miden-vm/processor" } +#miden-mast-package = { git = "https://github.com/0xMiden/miden-vm", rev = "614cd7f9b52f45238b0ab59c71ebb49325051e5d" } +#miden-mast-package = { path = "../miden-vm/package" } +#miden-protocol = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } +#miden-standards = { git = "https://github.com/0xMiden/protocol", rev = "a53bbe2209f506df87876c8b9c9a1730214f456b" } +#miden-tx = { tag = "v0.14.0-beta.4", git = "https://github.com/0xMiden/miden-base" } + [profile.dev] lto = false # Needed for 'inventory' to work diff --git a/Makefile.toml b/Makefile.toml index 15afa53e6..feb3092d6 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -235,20 +235,6 @@ args = [ "${MIDENC_BIN_DIR}", ] -[tasks.miden-debugdump] -category = "Build" -description = "Builds miden-debugdump and installs it to the bin folder" -command = "cargo" -args = [ - "-Z", - "unstable-options", - "build", - "-p", - "miden-debugdump", - "--artifact-dir", - "${MIDENC_BIN_DIR}", -] - [tasks.miden-objtool] category = "Build" description = "Builds miden-objtool and installs it to the bin folder" @@ -458,7 +444,7 @@ args = [ "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/bin", "${CARGO_MAKE_WORKSPACE_WORKING_DIRECTORY}/tests/lit", ] -dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt", "miden-debugdump"] +dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt", "miden-objtool"] [tasks.lit] category = "Test" @@ -469,7 +455,7 @@ args = [ "lit", "${@}", ] -dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt", "miden-debugdump"] +dependencies = ["litcheck", "midenc", "cargo-miden", "hir-opt", "miden-objtool"] [tasks.litcheck] diff --git a/dialects/scf/Cargo.toml b/dialects/scf/Cargo.toml index 19985c6ae..8e3c13ee4 100644 --- a/dialects/scf/Cargo.toml +++ b/dialects/scf/Cargo.toml @@ -29,4 +29,4 @@ bitvec.workspace = true midenc-expect-test = { path = "../../tools/expect-test" } midenc-hir = { path = "../../hir", features = ["logging"] } midenc-dialect-debuginfo = { path = "../debuginfo" } -env_logger = "0.11" +env_logger.workspace = true diff --git a/tests/lit/debugdump/lit.suite.toml b/tests/lit/debugdump/lit.suite.toml index a0ec4334b..00831fd3c 100644 --- a/tests/lit/debugdump/lit.suite.toml +++ b/tests/lit/debugdump/lit.suite.toml @@ -4,7 +4,7 @@ working_dir = "../../../" [substitutions] "midenc" = "$$MIDENC_BIN_DIR/midenc" -"miden-debugdump" = "$$MIDENC_BIN_DIR/miden-debugdump" +"miden-objtool" = "$$MIDENC_BIN_DIR/miden-objtool" "%cargo" = "cargo +$$CARGO_MAKE_RUSTUP_TOOLCHAIN_NAME" "%target_dir" = "$$CARGO_TARGET_DIR" diff --git a/tests/lit/debugdump/locations-source-loc.wat b/tests/lit/debugdump/locations-source-loc.wat index 6df3ceeb8..380ec9d73 100644 --- a/tests/lit/debugdump/locations-source-loc.wat +++ b/tests/lit/debugdump/locations-source-loc.wat @@ -2,7 +2,7 @@ ;; from a real Rust project compiled with debug info. ;; ;; RUN: %cargo build --target-dir %target_dir/debugdump-source-location --release --target wasm32-unknown-unknown --manifest-path tests/lit/source-location/test-project/Cargo.toml 2>&1 -;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%target_dir/debugdump-source-location/wasm32-unknown-unknown/release/source_location_test.wasm' --lib --debug full -o \"\$TMPFILE\" && miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%target_dir/debugdump-source-location/wasm32-unknown-unknown/release/source_location_test.wasm' --lib --debug full -o \"\$TMPFILE\" && miden-objtool dump debug-info \"\$TMPFILE\" --section locations" | filecheck %s ;; Check header ;; CHECK: .debug_loc contents (DebugVar entries from MAST): diff --git a/tests/lit/debugdump/locations.wat b/tests/lit/debugdump/locations.wat index 3ba06eb57..37bd8d372 100644 --- a/tests/lit/debugdump/locations.wat +++ b/tests/lit/debugdump/locations.wat @@ -1,5 +1,5 @@ ;; Test that .debug_loc section is present and handles empty case -;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-debugdump \"\$TMPFILE\" --section locations" | filecheck %s +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-objtool dump debug-info \"\$TMPFILE\" --section locations" | filecheck %s ;; Check header for .debug_loc section ;; CHECK: .debug_loc contents (DebugVar entries from MAST): diff --git a/tests/lit/debugdump/simple.wat b/tests/lit/debugdump/simple.wat index 0d2d9903a..d3ebbd6d2 100644 --- a/tests/lit/debugdump/simple.wat +++ b/tests/lit/debugdump/simple.wat @@ -1,5 +1,5 @@ -;; Test that miden-debugdump correctly parses and displays debug info from a .masp file -;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-debugdump \"\$TMPFILE\"" | filecheck %s +;; Test that miden-objtool correctly parses and displays debug info from a .masp file +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-objtool dump debug-info \"\$TMPFILE\"" | filecheck %s ;; Check header ;; CHECK: DEBUG INFO DUMP: diff --git a/tests/lit/debugdump/summary.wat b/tests/lit/debugdump/summary.wat index 8498a9778..102425333 100644 --- a/tests/lit/debugdump/summary.wat +++ b/tests/lit/debugdump/summary.wat @@ -1,5 +1,5 @@ -;; Test that miden-debugdump --summary shows only summary output -;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-debugdump \"\$TMPFILE\" --summary" | filecheck %s +;; Test that miden-objtool --summary shows only summary output +;; RUN: /bin/sh -c "TMPDIR=$(mktemp -d) && TMPFILE=\"\$TMPDIR/out.masp\" && midenc '%s' --exe --debug full -o \"\$TMPFILE\" && miden-objtool dump debuginfo \"\$TMPFILE\" --summary" | filecheck %s ;; Check summary is present ;; CHECK: .debug_info summary: diff --git a/tests/lit/lit.suite.toml b/tests/lit/lit.suite.toml index e8b52875d..834e059fb 100644 --- a/tests/lit/lit.suite.toml +++ b/tests/lit/lit.suite.toml @@ -4,7 +4,7 @@ patterns = ["*.wat", "*.masm", "*.stderr"] [substitutions] "midenc" = "$$MIDENC_BIN_DIR/midenc" "hir-opt" = "$$MIDENC_BIN_DIR/hir-opt" -"miden-debugdump" = "$$MIDENC_BIN_DIR/miden-debugdump" +"miden-objtool" = "$$MIDENC_BIN_DIR/miden-objtool" "%cargo" = "cargo +$$CARGO_MAKE_RUSTUP_TOOLCHAIN_NAME" "%target_dir" = "$$CARGO_TARGET_DIR" diff --git a/tools/debugdump/Cargo.toml b/tools/debugdump/Cargo.toml deleted file mode 100644 index 930d9812a..000000000 --- a/tools/debugdump/Cargo.toml +++ /dev/null @@ -1,25 +0,0 @@ -[package] -name = "miden-debugdump" -version.workspace = true -rust-version.workspace = true -authors.workspace = true -description = "A tool to dump debug information from MASP packages" -repository.workspace = true -homepage.workspace = true -documentation.workspace = true -categories = ["development-tools", "command-line-utilities"] -keywords = ["debug", "miden", "dwarfdump"] -license.workspace = true -readme.workspace = true -edition.workspace = true -publish.workspace = true - -[[bin]] -name = "miden-debugdump" -path = "src/main.rs" - -[dependencies] -miden-mast-package.workspace = true -miden-core.workspace = true -clap.workspace = true -thiserror.workspace = true diff --git a/tools/objtool/Cargo.toml b/tools/objtool/Cargo.toml index e2c99c4e8..d8b1cc15a 100644 --- a/tools/objtool/Cargo.toml +++ b/tools/objtool/Cargo.toml @@ -25,6 +25,7 @@ bench = false [dependencies] anyhow.workspace = true clap.workspace = true -miden-core.workspace = true miden-assembly-syntax = { workspace = true, features = ["std"] } +miden-core.workspace = true miden-mast-package.workspace = true +thiserror.workspace = true diff --git a/tools/objtool/src/decorators.rs b/tools/objtool/src/decorators.rs index f0b018471..85bae0147 100644 --- a/tools/objtool/src/decorators.rs +++ b/tools/objtool/src/decorators.rs @@ -9,8 +9,10 @@ use miden_core::{ use miden_mast_package::{Package, TargetType}; #[derive(Debug, Clone, Args)] +#[command(arg_required_else_help = true)] pub struct DecoratorsCommand { /// Path to the input .masp file + #[arg(required = true)] pub path: PathBuf, } diff --git a/tools/objtool/src/dump.rs b/tools/objtool/src/dump.rs new file mode 100644 index 000000000..4a4b518cc --- /dev/null +++ b/tools/objtool/src/dump.rs @@ -0,0 +1,51 @@ +mod debuginfo; + +use clap::{Subcommand, ValueEnum}; + +/// Dump useful information from assembled Miden packages +#[derive(Debug, Subcommand)] +#[command(name = "debuginfo", rename_all = "kebab-case")] +pub enum Dump { + /// Dump debug information encoded in a .masp file + DebugInfo(debuginfo::Config), +} + +/// The set of known sections that we've added dump support for +#[derive(Debug, Clone, Copy, ValueEnum)] +pub enum Section { + /// Show string table + Strings, + /// Show type information + Types, + /// Show source file information + Files, + /// Show function debug information + Functions, + /// Show variable information within functions + Variables, + /// Show variable location decorators from MAST (similar to DWARF .debug_loc) + Locations, +} + +#[derive(Debug, thiserror::Error)] +pub enum DumpError { + #[error("failed to read file: {0}")] + Io(#[from] std::io::Error), + #[error("failed to parse package: {0}")] + Parse(String), + #[error("no debug_info section found in package")] + NoDebugInfo, +} + +impl From for DumpError { + #[inline] + fn from(err: miden_core::serde::DeserializationError) -> Self { + Self::Parse(err.to_string()) + } +} + +pub fn run(command: &Dump) -> Result<(), DumpError> { + match command { + Dump::DebugInfo(config) => debuginfo::dump(config), + } +} diff --git a/tools/debugdump/src/main.rs b/tools/objtool/src/dump/debuginfo.rs similarity index 80% rename from tools/debugdump/src/main.rs rename to tools/objtool/src/dump/debuginfo.rs index db969dda9..f1721751c 100644 --- a/tools/debugdump/src/main.rs +++ b/tools/objtool/src/dump/debuginfo.rs @@ -1,17 +1,10 @@ -//! miden-debugdump - A tool to dump debug information from MASP packages +//! A command to dump debug information from MASP packages //! -//! Similar to llvm-dwarfdump, this tool parses the `.debug_info` section -//! from compiled MASP packages and displays the debug metadata in a -//! human-readable format. - -use std::{ - collections::BTreeMap, - fs::File, - io::{BufReader, Read}, - path::PathBuf, -}; +//! Similar to llvm-dwarfdump, this tool parses the `.debug_info` section from compiled MASP +//! packages and displays the debug metadata in a human-readable format. +use std::{collections::BTreeMap, path::PathBuf}; -use clap::{Parser, ValueEnum}; +use clap::Args; use miden_core::{ mast::MastForest, operations::{DebugVarInfo, DebugVarLocation}, @@ -25,14 +18,115 @@ use miden_mast_package::{ }, }; -#[derive(Debug, thiserror::Error)] -enum Error { - #[error("failed to read file: {0}")] - Io(#[from] std::io::Error), - #[error("failed to parse package: {0}")] - Parse(String), - #[error("no debug_info section found in package")] - NoDebugInfo, +use super::{DumpError, Section}; + +/// Dump debug information encoded in a .masp file +#[derive(Debug, Args)] +pub struct Config { + /// The input package to dump info from + #[arg(required = true)] + input: PathBuf, + + /// Filter output to a specific section + #[arg(short, long, value_enum)] + section: Option
, + + /// Show all available information + #[arg(short, long)] + verbose: bool, + + /// Show raw indices instead of resolved names + #[arg(long)] + raw: bool, + + /// Only show summary statistics + #[arg(long)] + summary: bool, +} + +pub fn dump(config: &Config) -> Result<(), DumpError> { + // Read the MASP file + let bytes = std::fs::read_to_string(&config.input)?.into_bytes(); + + // Parse the package + let package: Package = Package::read_from(&mut SliceReader::new(&bytes)) + .map_err(|e| DumpError::Parse(e.to_string()))?; + + // Get the MAST forest for location decorators + let mast_forest = package.mast.mast_forest(); + + // Find the three debug sections + let types_section = extract_section::(&package, SectionId::DEBUG_TYPES)?; + let sources_section = + extract_section::(&package, SectionId::DEBUG_SOURCES)?; + let functions_section = + extract_section::(&package, SectionId::DEBUG_FUNCTIONS)?; + + // We need at least one section to proceed + if types_section.is_none() && sources_section.is_none() && functions_section.is_none() { + return Err(DumpError::NoDebugInfo); + } + + // Parse each section (use empty defaults if missing) + let debug_sections = DebugSections { + types: types_section.unwrap_or_default(), + sources: sources_section.unwrap_or_default(), + functions: functions_section.unwrap_or_default(), + }; + + // Print header + println!("{}", "=".repeat(80)); + println!("Package Info:"); + println!(" | Name: {}", &package.name); + println!(" | Version: {}", &package.version); + println!(" | Kind: {}", &package.kind); + println!("Section Versioning:"); + println!(" | Types: {}", debug_sections.types.version); + println!(" | Sources: {}", debug_sections.sources.version); + println!(" | Functions: {}", debug_sections.functions.version); + println!("{}", "=".repeat(80)); + println!(); + + if config.summary { + print_summary(&debug_sections, mast_forest); + return Ok(()); + } + + match config.section { + Some(Section::Strings) => print_strings(&debug_sections), + Some(Section::Types) => print_types(&debug_sections, config.raw), + Some(Section::Files) => print_files(&debug_sections, config.raw), + Some(Section::Functions) => print_functions(&debug_sections, config.raw, config.verbose), + Some(Section::Variables) => print_variables(&debug_sections, config.raw), + Some(Section::Locations) => print_locations(mast_forest, &debug_sections, config.verbose), + None => { + // Print everything + print_summary(&debug_sections, mast_forest); + println!(); + print_strings(&debug_sections); + println!(); + print_types(&debug_sections, config.raw); + println!(); + print_files(&debug_sections, config.raw); + println!(); + print_functions(&debug_sections, config.raw, config.verbose); + println!(); + print_locations(mast_forest, &debug_sections, config.verbose); + } + } + + Ok(()) +} + +fn extract_section(package: &Package, id: SectionId) -> Result, DumpError> +where + T: Deserializable, +{ + let Some(section) = package.sections.iter().find(|s| s.id == id) else { + return Ok(None); + }; + + T::read_from_bytes(§ion.data).map(Some).map_err(DumpError::from) } const FRAME_BASE_LOCAL_MARKER: u32 = 1 << 31; @@ -93,174 +187,34 @@ impl DebugSections { } } -/// A tool to dump debug information from MASP packages -#[derive(Parser, Debug)] -#[command( - name = "miden-debugdump", - about = "Dump debug information from MASP packages", - version, - rename_all = "kebab-case" -)] -struct Cli { - /// Input MASP file to analyze - #[arg(required = true)] - input: PathBuf, - - /// Filter output to specific section - #[arg(short, long, value_enum)] - section: Option, - - /// Show all available information (verbose) - #[arg(short, long)] - verbose: bool, - - /// Show raw indices instead of resolved names - #[arg(long)] - raw: bool, - - /// Only show summary statistics - #[arg(long)] - summary: bool, -} - -#[derive(Debug, Clone, Copy, ValueEnum)] -enum DumpSection { - /// Show string table - Strings, - /// Show type information - Types, - /// Show source file information - Files, - /// Show function debug information - Functions, - /// Show variable information within functions - Variables, - /// Show variable location decorators from MAST (similar to DWARF .debug_loc) - Locations, -} - -fn main() { - if let Err(e) = run() { - eprintln!("error: {e}"); - std::process::exit(1); - } -} - -fn run() -> Result<(), Error> { - let cli = Cli::parse(); - - // Read the MASP file - let file = File::open(&cli.input)?; - let mut reader = BufReader::new(file); - let mut bytes = Vec::new(); - reader.read_to_end(&mut bytes)?; - - // Parse the package - let package: Package = Package::read_from(&mut SliceReader::new(&bytes)) - .map_err(|e| Error::Parse(e.to_string()))?; - - // Get the MAST forest for location decorators - let mast_forest = package.mast.mast_forest(); - - // Find the three debug sections - let types_section = package.sections.iter().find(|s| s.id == SectionId::DEBUG_TYPES); - let sources_section = package.sections.iter().find(|s| s.id == SectionId::DEBUG_SOURCES); - let functions_section = package.sections.iter().find(|s| s.id == SectionId::DEBUG_FUNCTIONS); - - // We need at least one section to proceed - if types_section.is_none() && sources_section.is_none() && functions_section.is_none() { - return Err(Error::NoDebugInfo); - } - - // Parse each section (use empty defaults if missing) - let types: DebugTypesSection = match types_section { - Some(s) => DebugTypesSection::read_from(&mut SliceReader::new(&s.data)) - .map_err(|e| Error::Parse(e.to_string()))?, - None => DebugTypesSection::new(), - }; - let sources: DebugSourcesSection = match sources_section { - Some(s) => DebugSourcesSection::read_from(&mut SliceReader::new(&s.data)) - .map_err(|e| Error::Parse(e.to_string()))?, - None => DebugSourcesSection::new(), - }; - let functions: DebugFunctionsSection = match functions_section { - Some(s) => DebugFunctionsSection::read_from(&mut SliceReader::new(&s.data)) - .map_err(|e| Error::Parse(e.to_string()))?, - None => DebugFunctionsSection::new(), - }; - - let debug_sections = DebugSections { - types, - sources, - functions, - }; - - // Print header - println!("{}", "=".repeat(80)); - println!("DEBUG INFO DUMP: {}", cli.input.display()); - println!("Package: {} (version: {})", package.name, package.version); - println!( - "Debug info versions: types={}, sources={}, functions={}", - debug_sections.types.version, - debug_sections.sources.version, - debug_sections.functions.version, - ); - println!("{}", "=".repeat(80)); +fn print_summary(debug_sections: &DebugSections, mast_forest: &MastForest) { + println!("Summary:"); println!(); - if cli.summary { - print_summary(&debug_sections, mast_forest); - return Ok(()); - } - - match cli.section { - Some(DumpSection::Strings) => print_strings(&debug_sections), - Some(DumpSection::Types) => print_types(&debug_sections, cli.raw), - Some(DumpSection::Files) => print_files(&debug_sections, cli.raw), - Some(DumpSection::Functions) => print_functions(&debug_sections, cli.raw, cli.verbose), - Some(DumpSection::Variables) => print_variables(&debug_sections, cli.raw), - Some(DumpSection::Locations) => print_locations(mast_forest, &debug_sections, cli.verbose), - None => { - // Print everything - print_summary(&debug_sections, mast_forest); - println!(); - print_strings(&debug_sections); - println!(); - print_types(&debug_sections, cli.raw); - println!(); - print_files(&debug_sections, cli.raw); - println!(); - print_functions(&debug_sections, cli.raw, cli.verbose); - println!(); - print_locations(mast_forest, &debug_sections, cli.verbose); - } - } - - Ok(()) -} + println!("Types:"); + println!(" | records: {}", &debug_sections.types.types.len()); + println!(" | strings: {}", &debug_sections.types.strings.len()); + println!(); -fn print_summary(debug_sections: &DebugSections, mast_forest: &MastForest) { - println!(".debug_info summary:"); - println!( - " Strings: {} (types) + {} (sources) + {} (functions)", - debug_sections.types.strings.len(), - debug_sections.sources.strings.len(), - debug_sections.functions.strings.len(), - ); - println!(" Types: {} entries", debug_sections.types.types.len()); - println!(" Files: {} entries", debug_sections.sources.files.len()); - println!(" Functions: {} entries", debug_sections.functions.functions.len()); + println!("Sources:"); + println!(" | records: {}", &debug_sections.sources.files.len()); + println!(" | strings: {}", &debug_sections.sources.strings.len()); + println!(); let total_vars: usize = debug_sections.functions.functions.iter().map(|f| f.variables.len()).sum(); let total_inlined: usize = debug_sections.functions.functions.iter().map(|f| f.inlined_calls.len()).sum(); - println!(" Variables: {} total (across all functions)", total_vars); - println!(" Inlined: {} call sites", total_inlined); + println!("Functions:"); + println!(" | records: {}", &debug_sections.functions.functions.len()); + println!(" | strings: {}", &debug_sections.functions.strings.len()); + println!(" | variables: {total_vars} (total across all functions)"); + println!(" | inlined: {total_inlined} call sites"); + println!(); // Count debug vars in MAST let debug_var_count = mast_forest.debug_info().debug_vars().len(); - println!(" DebugVar entries: {} in MAST", debug_var_count); + println!("Found {debug_var_count} debug variable records"); } fn print_strings(debug_sections: &DebugSections) { diff --git a/tools/objtool/src/lib.rs b/tools/objtool/src/lib.rs index 4e5aaef1a..fee5fba57 100644 --- a/tools/objtool/src/lib.rs +++ b/tools/objtool/src/lib.rs @@ -1 +1,2 @@ pub mod decorators; +pub mod dump; diff --git a/tools/objtool/src/main.rs b/tools/objtool/src/main.rs index e69d3d5fd..7bb14f8f5 100644 --- a/tools/objtool/src/main.rs +++ b/tools/objtool/src/main.rs @@ -1,24 +1,19 @@ -use clap::{Parser, Subcommand}; -use miden_assembly_syntax::{Report, diagnostics::reporting}; -use miden_objtool::decorators; +use clap::Parser; +use miden_assembly_syntax::{ + Report, + diagnostics::{IntoDiagnostic, reporting}, +}; +use miden_objtool::{decorators, dump}; +/// Common utilities for analyzing Miden artifacts #[derive(Debug, Parser)] -#[command( - name = "miden-objtool", - version, - about = "Common utilities for analyzing Miden artifacts", - long_about = None, - arg_required_else_help = true, -)] -struct Cli { - #[command(subcommand)] - command: Commands, -} - -#[derive(Debug, Subcommand)] -enum Commands { +#[command(name = "miden-objtool", version, arg_required_else_help = true)] +enum Cli { /// Compare serialized MAST forest sizes after stripping decorators. Decorators(decorators::DecoratorsCommand), + /// Dump various types of information from assembled packages + #[command(subcommand)] + Dump(dump::Dump), } fn main() -> Result<(), Report> { @@ -31,7 +26,8 @@ fn main() -> Result<(), Report> { reporting::set_panic_hook(); } - match &cli.command { - Commands::Decorators(command) => decorators::run(command).map_err(Report::msg), + match &cli { + Cli::Decorators(command) => decorators::run(command).map_err(Report::msg), + Cli::Dump(command) => dump::run(command).into_diagnostic(), } } From 5ade1689ebfda688d933874c913f8e8e27e4c763 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 29 Apr 2026 14:00:16 -0400 Subject: [PATCH 27/32] chore: remove manual debug info section handling in driver --- Cargo.lock | 3 - midenc-compile/Cargo.toml | 3 - midenc-compile/src/debug_info.rs | 524 -------------------------- midenc-compile/src/lib.rs | 1 - midenc-compile/src/stages/assemble.rs | 13 - midenc-compile/src/stages/codegen.rs | 29 -- midenc-compile/src/stages/rewrite.rs | 4 - 7 files changed, 577 deletions(-) delete mode 100644 midenc-compile/src/debug_info.rs diff --git a/Cargo.lock b/Cargo.lock index 8cb9ae486..af10051f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3312,12 +3312,9 @@ dependencies = [ "inventory", "log", "miden-assembly", - "miden-core", - "miden-debug-types", "miden-mast-package", "miden-thiserror", "midenc-codegen-masm", - "midenc-dialect-debuginfo", "midenc-dialect-hir", "midenc-dialect-scf", "midenc-frontend-wasm", diff --git a/midenc-compile/Cargo.toml b/midenc-compile/Cargo.toml index 3173bc0ab..470c0bd0f 100644 --- a/midenc-compile/Cargo.toml +++ b/midenc-compile/Cargo.toml @@ -32,11 +32,8 @@ log.workspace = true inventory.workspace = true midenc-codegen-masm.workspace = true miden-assembly.workspace = true -miden-core.workspace = true -miden-debug-types.workspace = true miden-mast-package.workspace = true midenc-frontend-wasm.workspace = true -midenc-dialect-debuginfo.workspace = true midenc-dialect-scf.workspace = true midenc-dialect-hir.workspace = true midenc-hir.workspace = true diff --git a/midenc-compile/src/debug_info.rs b/midenc-compile/src/debug_info.rs deleted file mode 100644 index 8371efd7c..000000000 --- a/midenc-compile/src/debug_info.rs +++ /dev/null @@ -1,524 +0,0 @@ -//! Debug info section builder for MASP packages. -//! -//! This module provides utilities for collecting debug information from the HIR -//! and building debug sections that can be serialized into the MASP package. - -use alloc::{collections::BTreeMap, format, string::ToString, sync::Arc, vec::Vec}; - -use miden_debug_types::{ColumnNumber, LineNumber}; -use miden_mast_package::debug_info::{ - DebugFieldInfo, DebugFileInfo, DebugFunctionInfo, DebugFunctionsSection, DebugPrimitiveType, - DebugSourcesSection, DebugTypeIdx, DebugTypeInfo, DebugTypesSection, DebugVariableInfo, -}; -use midenc_dialect_debuginfo as debuginfo; -use midenc_hir::{DILocalVariable, DISubprogramAttr, OpExt, Type, dialects::builtin}; - -/// The output of the debug info collection pass: three separate sections. -pub struct DebugInfoSections { - pub types: DebugTypesSection, - pub sources: DebugSourcesSection, - pub functions: DebugFunctionsSection, -} - -/// Builder for constructing debug info sections from HIR components. -pub struct DebugInfoBuilder { - types: DebugTypesSection, - sources: DebugSourcesSection, - functions: DebugFunctionsSection, - /// Maps source file paths to their indices in the file table - file_indices: BTreeMap, - /// Maps type keys to their indices in the type table - type_indices: BTreeMap, -} - -/// A key for deduplicating types (uses u32 since DebugTypeIdx lacks Ord) -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -enum TypeKey { - Primitive(u8), // Use discriminant instead of the enum directly - Pointer(u32), - Array(u32, Option), - Struct(u32, u32, Vec<(u32, u32, u32)>), - Function(Option, Vec), - Unknown, -} - -impl Default for DebugInfoBuilder { - fn default() -> Self { - Self::new() - } -} - -impl DebugInfoBuilder { - /// Creates a new debug info builder. - pub fn new() -> Self { - Self { - types: DebugTypesSection::new(), - sources: DebugSourcesSection::new(), - functions: DebugFunctionsSection::new(), - file_indices: BTreeMap::new(), - type_indices: BTreeMap::new(), - } - } - - /// Adds a file to the file table and returns its index. - /// - /// The `directory` parameter, if provided, is joined with the path to create - /// a full path. The debug info section stores full paths only. - pub fn add_file(&mut self, path: &str, directory: Option<&str>) -> u32 { - // Build the full path - let full_path = if let Some(dir) = directory { - if path.starts_with('/') || path.starts_with("\\\\") { - // Already absolute - path.to_string() - } else { - format!("{}/{}", dir.trim_end_matches('/'), path) - } - } else { - path.to_string() - }; - - if let Some(&idx) = self.file_indices.get(&full_path) { - return idx; - } - - let path_idx = self.sources.add_string(Arc::from(full_path.as_str())); - let file = DebugFileInfo::new(path_idx); - - let idx = self.sources.add_file(file); - self.file_indices.insert(full_path, idx); - idx - } - - /// Adds a type to the type table and returns its index. - pub fn add_type(&mut self, ty: &Type) -> DebugTypeIdx { - let debug_type = hir_type_to_debug_type(ty, self); - let key = type_to_key(&debug_type); - - if let Some(&idx) = self.type_indices.get(&key) { - return idx; - } - - let idx = self.types.add_type(debug_type); - self.type_indices.insert(key, idx); - idx - } - - /// Adds a primitive type and returns its index. - pub fn add_primitive_type(&mut self, prim: DebugPrimitiveType) -> DebugTypeIdx { - let key = TypeKey::Primitive(prim as u8); - if let Some(&idx) = self.type_indices.get(&key) { - return idx; - } - - let idx = self.types.add_type(DebugTypeInfo::Primitive(prim)); - self.type_indices.insert(key, idx); - idx - } - - /// Collects debug information from an HIR component. - pub fn collect_from_component(&mut self, component: &builtin::Component) { - // Traverse the component and collect debug info from all functions - let region = component.body(); - let block = region.entry(); - - for op in block.body() { - if let Some(module) = op.downcast_ref::() { - self.collect_from_module(module); - } else if let Some(interface) = op.downcast_ref::() { - self.collect_from_interface(interface); - } else if let Some(function) = op.downcast_ref::() { - self.collect_from_function(function); - } - } - } - - fn collect_from_module(&mut self, module: &builtin::Module) { - let region = module.body(); - let block = region.entry(); - - for op in block.body() { - if let Some(function) = op.downcast_ref::() { - self.collect_from_function(function); - } - } - } - - fn collect_from_interface(&mut self, interface: &builtin::Interface) { - let region = interface.body(); - let block = region.entry(); - - for op in block.body() { - if let Some(function) = op.downcast_ref::() { - self.collect_from_function(function); - } - } - } - - fn collect_from_function(&mut self, function: &builtin::Function) { - // Try to get DISubprogram from the function's attributes - let subprogram_attr = - function.get_attribute(midenc_hir::interner::Symbol::intern("di.subprogram")); - - let subprogram = subprogram_attr.and_then(|attr| { - let borrowed = attr.borrow(); - borrowed.downcast_ref::().map(|sp| sp.as_value().clone()) - }); - - let Some(subprogram) = subprogram else { - // No debug info for this function, just collect from body - self.collect_variables_from_function_body(function, None); - return; - }; - - // Add file - let file_idx = self.add_file(subprogram.file.as_str(), None); - - // Add function name - let name_idx = self.functions.add_string(Arc::from(subprogram.name.as_str())); - let linkage_name_idx = subprogram - .linkage_name - .map(|s| self.functions.add_string(Arc::from(s.as_str()))); - - // Create function info - let line = LineNumber::new(subprogram.line).unwrap_or_default(); - let column = ColumnNumber::new(subprogram.column.unwrap_or(1)).unwrap_or_default(); - - let mut func_info = DebugFunctionInfo::new(name_idx, file_idx, line, column); - if let Some(linkage_idx) = linkage_name_idx { - func_info = func_info.with_linkage_name(linkage_idx); - } - if let Some(ref ty) = subprogram.ty { - let type_idx = self.add_type(ty); - func_info = func_info.with_type(type_idx); - self.collect_subprogram_parameters(&subprogram, ty, &mut func_info); - } - - // Collect local variables from function body - self.collect_variables_from_function_body(function, Some(&mut func_info)); - - self.functions.add_function(func_info); - } - - fn collect_variables_from_function_body( - &mut self, - function: &builtin::Function, - func_info: Option<&mut DebugFunctionInfo>, - ) { - // Walk through the function body to find DbgValue operations - let entry = function.entry_block(); - let entry_block = entry.borrow(); - - if let Some(func_info) = func_info { - self.collect_variables_from_block(&entry_block, func_info); - } - } - - fn collect_variables_from_block( - &mut self, - block: &midenc_hir::Block, - func_info: &mut DebugFunctionInfo, - ) { - for op in block.body() { - // Check if this is a DbgValue operation - if let Some(dbg_value) = op.downcast_ref::() - && let Some(var_info) = self.extract_variable_info(dbg_value.variable().as_value()) - { - func_info.add_variable(var_info); - } - - // Recursively process nested regions - for region_idx in 0..op.num_regions() { - let region = op.region(region_idx); - let entry = region.entry(); - self.collect_variables_from_block(&entry, func_info); - } - } - } - - fn extract_variable_info(&mut self, var: &DILocalVariable) -> Option { - let name_idx = self.functions.add_string(Arc::from(var.name.as_str())); - - // Add type if available - let type_idx = if let Some(ref ty) = var.ty { - self.add_type(ty) - } else { - self.add_primitive_type(DebugPrimitiveType::Felt) // Default to felt - }; - - let line = LineNumber::new(var.line).unwrap_or_default(); - let column = ColumnNumber::new(var.column.unwrap_or(1)).unwrap_or_default(); - - let mut var_info = DebugVariableInfo::new(name_idx, type_idx, line, column); - - if let Some(arg_index) = var.arg_index { - var_info = var_info.with_arg_index(arg_index + 1); - } - - Some(var_info) - } - - fn collect_subprogram_parameters( - &mut self, - subprogram: &midenc_hir::DISubprogram, - ty: &Type, - func_info: &mut DebugFunctionInfo, - ) { - let Type::Function(func_ty) = ty else { - return; - }; - - for (idx, param_ty) in func_ty.params().iter().enumerate() { - let name = subprogram - .param_names - .get(idx) - .copied() - .unwrap_or_else(|| midenc_hir::interner::Symbol::intern(format!("arg{idx}"))); - let name_idx = self.functions.add_string(Arc::from(name.as_str())); - let type_idx = self.add_type(param_ty); - let line = LineNumber::new(subprogram.line).unwrap_or_default(); - let column = ColumnNumber::new(subprogram.column.unwrap_or(1)).unwrap_or_default(); - let var_info = DebugVariableInfo::new(name_idx, type_idx, line, column) - .with_arg_index((idx as u32) + 1); - func_info.add_variable(var_info); - } - } - - /// Builds and returns the final debug info sections. - pub fn build(self) -> DebugInfoSections { - DebugInfoSections { - types: self.types, - sources: self.sources, - functions: self.functions, - } - } - - /// Returns whether any debug info has been collected. - pub fn is_empty(&self) -> bool { - self.functions.is_empty() && self.types.is_empty() && self.sources.is_empty() - } -} - -/// Converts an HIR Type to a DebugTypeInfo. -fn hir_type_to_debug_type(ty: &Type, builder: &mut DebugInfoBuilder) -> DebugTypeInfo { - match ty { - Type::Unknown => DebugTypeInfo::Unknown, - Type::Never => DebugTypeInfo::Primitive(DebugPrimitiveType::Void), - Type::I1 => DebugTypeInfo::Primitive(DebugPrimitiveType::Bool), - Type::I8 => DebugTypeInfo::Primitive(DebugPrimitiveType::I8), - Type::U8 => DebugTypeInfo::Primitive(DebugPrimitiveType::U8), - Type::I16 => DebugTypeInfo::Primitive(DebugPrimitiveType::I16), - Type::U16 => DebugTypeInfo::Primitive(DebugPrimitiveType::U16), - Type::I32 => DebugTypeInfo::Primitive(DebugPrimitiveType::I32), - Type::U32 => DebugTypeInfo::Primitive(DebugPrimitiveType::U32), - Type::I64 => DebugTypeInfo::Primitive(DebugPrimitiveType::I64), - Type::U64 => DebugTypeInfo::Primitive(DebugPrimitiveType::U64), - Type::I128 => DebugTypeInfo::Primitive(DebugPrimitiveType::I128), - Type::U128 => DebugTypeInfo::Primitive(DebugPrimitiveType::U128), - Type::U256 => DebugTypeInfo::Unknown, // No direct mapping for U256 - Type::F64 => DebugTypeInfo::Primitive(DebugPrimitiveType::F64), - Type::Felt => DebugTypeInfo::Primitive(DebugPrimitiveType::Felt), - Type::Ptr(ptr_type) => { - let pointee_idx = builder.add_type(ptr_type.pointee()); - DebugTypeInfo::Pointer { - pointee_type_idx: pointee_idx, - } - } - Type::Array(array_type) => { - let element_idx = builder.add_type(array_type.element_type()); - DebugTypeInfo::Array { - element_type_idx: element_idx, - count: Some(array_type.len() as u32), - } - } - // For types we don't have direct mappings for, use Unknown - Type::Struct(struct_ty) => { - let name = struct_ty.name(); - if name.as_deref().is_some_and(is_component_felt_type_name) { - return DebugTypeInfo::Primitive(DebugPrimitiveType::Felt); - } - if name.as_deref().is_some_and(is_component_word_type_name) { - return DebugTypeInfo::Primitive(DebugPrimitiveType::Word); - } - - let name_idx = - builder.types.add_string(Arc::from(name.as_deref().unwrap_or(""))); - let use_debug_layout = name.is_some(); - let mut next_offset = 0u32; - let fields: Vec = struct_ty - .fields() - .iter() - .enumerate() - .map(|(idx, field)| { - let field_name = field - .name - .as_deref() - .map(Arc::::from) - .unwrap_or_else(|| Arc::from(format!("field{idx}").as_str())); - let name_idx = builder.types.add_string(field_name); - let type_idx = builder.add_type(&field.ty); - let offset = if use_debug_layout { - let offset = next_offset; - next_offset = next_offset.saturating_add( - builder - .types - .get_type(type_idx) - .map(|ty| debug_type_size(ty, builder)) - .unwrap_or(0), - ); - offset - } else { - field.offset - }; - DebugFieldInfo { - name_idx, - type_idx, - offset, - } - }) - .collect(); - - DebugTypeInfo::Struct { - name_idx, - size: if use_debug_layout { - fields_size(fields.as_slice(), builder) - } else { - struct_ty.size() as u32 - }, - fields, - } - } - Type::Function(func_ty) => { - let return_type_idx = match func_ty.results().len() { - 0 => None, - 1 => Some(builder.add_type(&func_ty.results()[0])), - _ => Some(builder.add_tuple_type("return", func_ty.results())), - }; - let param_type_indices = - func_ty.params().iter().map(|ty| builder.add_type(ty)).collect(); - DebugTypeInfo::Function { - return_type_idx, - param_type_indices, - } - } - Type::List(_) | Type::Enum(_) => DebugTypeInfo::Unknown, - } -} - -impl DebugInfoBuilder { - fn add_tuple_type(&mut self, name: &str, fields: &[Type]) -> DebugTypeIdx { - let name_idx = self.types.add_string(Arc::from(name)); - let mut offset = 0u32; - let fields: Vec = fields - .iter() - .enumerate() - .map(|(idx, ty)| { - let name_idx = self.types.add_string(Arc::from(format!("field{idx}").as_str())); - let type_idx = self.add_type(ty); - let field = DebugFieldInfo { - name_idx, - type_idx, - offset, - }; - offset = offset.saturating_add( - self.types.get_type(type_idx).map(|ty| debug_type_size(ty, self)).unwrap_or(0), - ); - field - }) - .collect(); - self.types.add_type(DebugTypeInfo::Struct { - name_idx, - size: fields_size(fields.as_slice(), self), - fields, - }) - } -} - -fn fields_size(fields: &[DebugFieldInfo], builder: &DebugInfoBuilder) -> u32 { - fields - .iter() - .filter_map(|field| builder.types.get_type(field.type_idx).map(|ty| (field.offset, ty))) - .map(|(offset, ty)| offset.saturating_add(debug_type_size(ty, builder))) - .max() - .unwrap_or_default() -} - -fn debug_type_size(ty: &DebugTypeInfo, builder: &DebugInfoBuilder) -> u32 { - match ty { - DebugTypeInfo::Primitive(prim) => prim.size_in_bytes(), - DebugTypeInfo::Pointer { .. } => 4, - DebugTypeInfo::Array { - element_type_idx, - count, - } => { - let Some(count) = count else { - return 0; - }; - let Some(element_type) = builder.types.get_type(*element_type_idx) else { - return 0; - }; - count.saturating_mul(debug_type_size(element_type, builder)) - } - DebugTypeInfo::Struct { size, .. } => *size, - DebugTypeInfo::Function { .. } => 4, - DebugTypeInfo::Unknown => 0, - } -} - -fn is_component_felt_type_name(name: &str) -> bool { - name == "felt" || name.ends_with("/felt") || name.ends_with("::felt") -} - -fn is_component_word_type_name(name: &str) -> bool { - name == "word" || name.ends_with("/word") || name.ends_with("::word") -} - -/// Creates a key for type deduplication. -fn type_to_key(ty: &DebugTypeInfo) -> TypeKey { - match ty { - DebugTypeInfo::Primitive(p) => TypeKey::Primitive(*p as u8), - DebugTypeInfo::Pointer { pointee_type_idx } => TypeKey::Pointer(pointee_type_idx.as_u32()), - DebugTypeInfo::Array { - element_type_idx, - count, - } => TypeKey::Array(element_type_idx.as_u32(), *count), - DebugTypeInfo::Struct { - name_idx, - size, - fields, - } => TypeKey::Struct( - *name_idx, - *size, - fields - .iter() - .map(|field| (field.name_idx, field.type_idx.as_u32(), field.offset)) - .collect(), - ), - DebugTypeInfo::Function { - return_type_idx, - param_type_indices, - } => TypeKey::Function( - return_type_idx.map(DebugTypeIdx::as_u32), - param_type_indices.iter().map(|idx| idx.as_u32()).collect(), - ), - DebugTypeInfo::Unknown => TypeKey::Unknown, - } -} - -/// Builds debug info sections from an HIR component if debug info is enabled. -pub fn build_debug_info_sections( - component: &builtin::Component, - emit_debug_decorators: bool, -) -> Option { - if !emit_debug_decorators { - return None; - } - - let mut builder = DebugInfoBuilder::new(); - builder.collect_from_component(component); - - if builder.is_empty() { - None - } else { - Some(builder.build()) - } -} diff --git a/midenc-compile/src/lib.rs b/midenc-compile/src/lib.rs index 4ddbffa4a..b08d01d34 100644 --- a/midenc-compile/src/lib.rs +++ b/midenc-compile/src/lib.rs @@ -6,7 +6,6 @@ extern crate alloc; extern crate std; mod compiler; -pub mod debug_info; mod stage; mod stages; diff --git a/midenc-compile/src/stages/assemble.rs b/midenc-compile/src/stages/assemble.rs index 66daedebc..eed1ad7b7 100644 --- a/midenc-compile/src/stages/assemble.rs +++ b/midenc-compile/src/stages/assemble.rs @@ -85,7 +85,6 @@ fn build_package( .expect("package dependencies should be unique"); let account_component_metadata_bytes = outputs.account_component_metadata_bytes.clone(); - let debug_info_bytes = outputs.debug_info_bytes.clone(); let mut sections = Vec::new(); @@ -93,18 +92,6 @@ fn build_package( sections.push(Section::new(SectionId::ACCOUNT_COMPONENT_METADATA, bytes)); } - if let Some((types_bytes, sources_bytes, functions_bytes)) = debug_info_bytes { - log::debug!( - "adding debug sections to package (types={} sources={} functions={} bytes)", - types_bytes.len(), - sources_bytes.len(), - functions_bytes.len(), - ); - sections.push(Section::new(SectionId::DEBUG_TYPES, types_bytes)); - sections.push(Section::new(SectionId::DEBUG_SOURCES, sources_bytes)); - sections.push(Section::new(SectionId::DEBUG_FUNCTIONS, functions_bytes)); - } - Package { name, // proper version will be implemented in https://github.com/0xMiden/compiler/issues/1068 diff --git a/midenc-compile/src/stages/codegen.rs b/midenc-compile/src/stages/codegen.rs index 4f920ad22..909b24140 100644 --- a/midenc-compile/src/stages/codegen.rs +++ b/midenc-compile/src/stages/codegen.rs @@ -20,8 +20,6 @@ pub struct CodegenOutput { pub link_packages: BTreeMap>, /// The serialized AccountComponentMetadata (name, description, storage layout, etc.) pub account_component_metadata_bytes: Option>, - /// The serialized debug sections (types, sources, functions) - pub debug_info_bytes: Option<(Vec, Vec, Vec)>, } /// Perform code generation on the possibly-linked output of previous stages @@ -75,38 +73,11 @@ impl Stage for CodegenStage { session.emit(OutputMode::Text, masm_component.as_ref()).into_diagnostic()?; } - // Build debug info sections if debug decorators are enabled - let debug_info_bytes = if session.options.emit_debug_decorators() { - use miden_core::serde::Serializable; - - log::debug!("collecting debug info for debug sections"); - let debug_sections = - crate::debug_info::build_debug_info_sections(&component.borrow(), true); - debug_sections.map(|sections| { - let mut types_bytes = alloc::vec::Vec::new(); - sections.types.write_into(&mut types_bytes); - let mut sources_bytes = alloc::vec::Vec::new(); - sections.sources.write_into(&mut sources_bytes); - let mut functions_bytes = alloc::vec::Vec::new(); - sections.functions.write_into(&mut functions_bytes); - log::debug!( - "built debug sections: types={} sources={} functions={} bytes", - types_bytes.len(), - sources_bytes.len(), - functions_bytes.len(), - ); - (types_bytes, sources_bytes, functions_bytes) - }) - } else { - None - }; - Ok(CodegenOutput { component: Arc::from(masm_component), link_libraries, link_packages, account_component_metadata_bytes: linker_output.account_component_metadata_bytes, - debug_info_bytes, }) } } diff --git a/midenc-compile/src/stages/rewrite.rs b/midenc-compile/src/stages/rewrite.rs index 6b9161070..ee2ca94ae 100644 --- a/midenc-compile/src/stages/rewrite.rs +++ b/midenc-compile/src/stages/rewrite.rs @@ -111,13 +111,9 @@ impl Stage for ApplyRewritesStage { // Emit HIR if requested let session = context.session(); if session.should_emit(midenc_session::OutputType::Hir) { - log::debug!(target: "driver", "emitting HIR component"); session .emit(midenc_session::OutputMode::Text, &*input.component.borrow()) .into_diagnostic()?; - log::debug!(target: "driver", "HIR component emitted successfully"); - } else { - log::debug!(target: "driver", "HIR emission not requested"); } if context.session().rewrite_only() { From 551eb38e4e548ecb670252f1b9812dab990297ad Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Thu, 30 Apr 2026 11:26:03 -0400 Subject: [PATCH 28/32] refactor(debuginfo): various improvements to variable tracking impl The initial debug info implementation in #820 defined a new debuginfo dialect which provides the ability to emit tracking information for local variables in the source program. There were a couple of issues that I wanted to address with it before merging, so that has been done here in this commit: * Move the new debug dialect into `midenc-hir`, as debug info attributes are essentially core/builtin attributes, and having them in a downstream dialect was awkward: not only did we need to place them in the `builtin` dialect rather than `di`, but debug info ops are somewhat special in a way that most other ops are not - this warranted making them part of the core IR crate, though still as a separate dialect. * Renamed the dialect to `di` from `debuginfo`, as this better mirrors the naming conventions in DWARF, from which these ops are derived in part. * Renamed the debug dialect attribute types to remove the `DI` prefix, as it is redundant. * Added a new `Transparent` operation trait, which is used to indicate that an operation is "transparent" with regards to its value uses, allowing those uses to be excluded in specific situations, such as when determining liveness of operation results. Transparent ops are only permitted to have at most a single operand, and may not produce results - these restrictions make dealing with transparent ops much more straightforward when rewriting the IR. * Added a new effect type, `DebugEffect`, which is used to represent effects that an operation may have on the state of an attached debugger. This can be used in the future to ensure that operations are not reordered with respect to such effects on a common resource. This largely mirrors MemoryEffect for now. There is a corresponding DebugEffectOpInterface alias added as well. * Made all ops of the debug info dialect implement `Transparent`, as well as both DebugEffectOpInterface and MemoryEffectOpInterface. The memory effects are modeled as empty (i.e. the ops are considered pure), which means that those ops are trivially dead by default - however I have also modified region DCE to explicitly leave transparent ops alone unless we have determined that the defining op of their input operand is dead. * Added a new method to `Value`, called `has_real_uses`, which returns true if the value is used and at least one of those uses is from a non-transparent op. The existing `is_used` method returns true regardless of transparency if any use of the value exists. * Modified various places where `Value::is_used` was being used, when we should use `Value::has_real_uses` to avoid pessimizing a canonicalization based on the presence of debug info ops. * Modified `Rewriter::erase_op` to be more precise about verifying that the op to erase has no _real_ uses. If transparent users still exist, they are removed before erasing the original target op - this is always safe as transparent ops can only ever use a single value. * Added tests to ensure that transparent ops do not interfere with dead code elimination. * Simplified the places where debug ops were special-cased to instead work in terms of transparency (unless the special-casing truly needed to be specific to a given op). * Implemented AttrPrinter/AttrParser for all debug dialect attributes * Implemented OpPrinter/OpParser for all debug dialect operations * Implemented `Serializable`/`Deserializable` for debug dialect's `ExpressionOp` and `Expression` types, so that complex expressions can be serialized for use by the debugger. * Modified how the frontend was computing storage locations to use `Expression` directly, rather than `VariableStorage`, which could not represent the full set of location expressions, and was largely redundant with `ExpressionOp`. We still only handle a subset of possible expression types, but it will be easier to support more going forward. --- Cargo.lock | 14 +- Cargo.toml | 1 - Makefile.toml | 1 + codegen/masm/Cargo.toml | 1 - codegen/masm/src/emitter.rs | 7 - codegen/masm/src/lib.rs | 22 +- codegen/masm/src/lower/component.rs | 38 +- codegen/masm/src/lower/lowering.rs | 130 +- dialects/debuginfo/Cargo.toml | 21 - dialects/debuginfo/src/lib.rs | 125 -- dialects/debuginfo/src/ops.rs | 141 -- dialects/scf/Cargo.toml | 1 - .../if_remove_unused_results.rs | 2 +- .../while_remove_unused_args.rs | 10 +- .../canonicalization/while_unused_result.rs | 2 +- dialects/scf/src/transforms/cfg_to_scf.rs | 10 +- ..._to_scf_debug_value_preservation_after.hir | 8 +- ...to_scf_debug_value_preservation_before.hir | 8 +- eval/Cargo.toml | 1 - eval/src/eval.rs | 4 +- eval/src/lib.rs | 6 +- .../counter-contract/counter_contract.masm | 1698 ----------------- frontend/wasm/Cargo.toml | 1 - frontend/wasm/src/component/lift_exports.rs | 25 +- frontend/wasm/src/module/debug_info.rs | 151 +- .../wasm/src/module/function_builder_ext.rs | 49 +- hir-analysis/src/analyses/liveness.rs | 18 +- hir-macros/src/operation.rs | 2 + hir-transform/src/sink.rs | 138 +- hir/Cargo.toml | 3 +- hir/src/attributes.rs | 3 +- hir/src/attributes/debug.rs | 383 ---- hir/src/dialects.rs | 1 + hir/src/dialects/builtin/builders.rs | 3 - hir/src/dialects/debuginfo.rs | 71 + hir/src/dialects/debuginfo/attributes.rs | 15 + .../debuginfo/attributes/compile_unit.rs | 125 ++ .../debuginfo/attributes/expression.rs | 471 +++++ .../debuginfo/attributes/subprogram.rs | 223 +++ .../dialects/debuginfo/attributes/variable.rs | 146 ++ .../src/dialects/debuginfo}/builders.rs | 54 +- hir/src/dialects/debuginfo/ops.rs | 121 ++ .../src/dialects/debuginfo}/transform.rs | 154 +- hir/src/ir/effects.rs | 3 +- hir/src/ir/effects/debug.rs | 53 + hir/src/ir/region/transforms/dce.rs | 196 +- ...nt_ops_do_not_interfere_with_dce_after.hir | 5 + ...t_ops_do_not_interfere_with_dce_before.hir | 7 + hir/src/ir/traits.rs | 52 + hir/src/ir/value.rs | 9 +- hir/src/lib.rs | 6 +- hir/src/patterns/rewriter.rs | 22 +- .../src/mockchain/basic_wallet.rs | 10 +- .../expected/debug_variable_locations.hir | 8 +- .../src/rust_masm_tests/examples.rs | 8 +- 55 files changed, 2003 insertions(+), 2784 deletions(-) delete mode 100644 dialects/debuginfo/Cargo.toml delete mode 100644 dialects/debuginfo/src/lib.rs delete mode 100644 dialects/debuginfo/src/ops.rs delete mode 100644 examples/counter-contract/counter_contract.masm delete mode 100644 hir/src/attributes/debug.rs create mode 100644 hir/src/dialects/debuginfo.rs create mode 100644 hir/src/dialects/debuginfo/attributes.rs create mode 100644 hir/src/dialects/debuginfo/attributes/compile_unit.rs create mode 100644 hir/src/dialects/debuginfo/attributes/expression.rs create mode 100644 hir/src/dialects/debuginfo/attributes/subprogram.rs create mode 100644 hir/src/dialects/debuginfo/attributes/variable.rs rename {dialects/debuginfo/src => hir/src/dialects/debuginfo}/builders.rs (55%) create mode 100644 hir/src/dialects/debuginfo/ops.rs rename {dialects/debuginfo/src => hir/src/dialects/debuginfo}/transform.rs (60%) create mode 100644 hir/src/ir/effects/debug.rs create mode 100644 hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_after.hir create mode 100644 hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_before.hir diff --git a/Cargo.lock b/Cargo.lock index af10051f1..30feb7eed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3288,7 +3288,6 @@ dependencies = [ "miden-thiserror", "midenc-dialect-arith", "midenc-dialect-cf", - "midenc-dialect-debuginfo", "midenc-dialect-hir", "midenc-dialect-scf", "midenc-dialect-ub", @@ -3341,15 +3340,6 @@ dependencies = [ "midenc-hir", ] -[[package]] -name = "midenc-dialect-debuginfo" -version = "0.8.1" -dependencies = [ - "log", - "midenc-hir", - "paste", -] - [[package]] name = "midenc-dialect-hir" version = "0.8.1" @@ -3375,7 +3365,6 @@ dependencies = [ "log", "midenc-dialect-arith", "midenc-dialect-cf", - "midenc-dialect-debuginfo", "midenc-dialect-ub", "midenc-expect-test", "midenc-hir", @@ -3432,7 +3421,6 @@ dependencies = [ "miden-thiserror", "midenc-dialect-arith", "midenc-dialect-cf", - "midenc-dialect-debuginfo", "midenc-dialect-hir", "midenc-dialect-ub", "midenc-dialect-wasm", @@ -3473,6 +3461,7 @@ dependencies = [ "memchr", "miden-core", "miden-thiserror", + "midenc-expect-test", "midenc-hir-macros", "midenc-hir-symbol", "midenc-hir-type", @@ -3510,7 +3499,6 @@ dependencies = [ "miden-thiserror", "midenc-dialect-arith", "midenc-dialect-cf", - "midenc-dialect-debuginfo", "midenc-dialect-hir", "midenc-dialect-scf", "midenc-dialect-ub", diff --git a/Cargo.toml b/Cargo.toml index 12b56e9a4..3eb86697f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,7 +142,6 @@ midenc-dialect-arith = { version = "0.8.1", path = "dialects/arith" } midenc-dialect-hir = { version = "0.8.1", path = "dialects/hir" } midenc-dialect-scf = { version = "0.8.1", path = "dialects/scf" } midenc-dialect-cf = { version = "0.8.1", path = "dialects/cf" } -midenc-dialect-debuginfo = { version = "0.8.1", path = "dialects/debuginfo" } midenc-dialect-ub = { version = "0.8.1", path = "dialects/ub" } midenc-dialect-wasm = { version = "0.8.1", path = "dialects/wasm" } midenc-hir = { version = "0.8.1", path = "hir" } diff --git a/Makefile.toml b/Makefile.toml index feb3092d6..732a050ed 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -436,6 +436,7 @@ dependencies = ["cargo-miden"] category = "Test" description = "Runs the lit/filecheck test suite" command = "litcheck" +env = { MIDENC_BIN_DIR = "${MIDENC_BIN_DIR}" } args = [ "lit", "run", diff --git a/codegen/masm/Cargo.toml b/codegen/masm/Cargo.toml index b4b023132..94b121665 100644 --- a/codegen/masm/Cargo.toml +++ b/codegen/masm/Cargo.toml @@ -36,7 +36,6 @@ midenc-hir.workspace = true midenc-hir-analysis.workspace = true midenc-dialect-arith.workspace = true midenc-dialect-cf.workspace = true -midenc-dialect-debuginfo.workspace = true midenc-dialect-hir.workspace = true midenc-dialect-scf.workspace = true midenc-dialect-ub.workspace = true diff --git a/codegen/masm/src/emitter.rs b/codegen/masm/src/emitter.rs index 14c2071ab..cf05b7239 100644 --- a/codegen/masm/src/emitter.rs +++ b/codegen/masm/src/emitter.rs @@ -116,13 +116,6 @@ impl BlockEmitter<'_> { //self.drop_unused_operands_at(op); let Some(lowering) = op.as_trait::() else { - // Skip debug info ops that have no lowering (e.g. debuginfo.kill, - // debuginfo.declare) rather than panicking. These ops carry no - // semantic meaning for code generation. - if op.name().dialect().as_str() == "debuginfo" { - log::trace!(target: "codegen", "skipping debug info op with no lowering: {}", op.name()); - return; - } panic!("illegal operation: no lowering has been defined for '{}'", op.name()); }; diff --git a/codegen/masm/src/lib.rs b/codegen/masm/src/lib.rs index ec369b674..78a70f236 100644 --- a/codegen/masm/src/lib.rs +++ b/codegen/masm/src/lib.rs @@ -27,12 +27,14 @@ pub mod masm { use midenc_dialect_arith as arith; use midenc_dialect_cf as cf; -use midenc_dialect_debuginfo as debuginfo; use midenc_dialect_hir as hir; use midenc_dialect_scf as scf; use midenc_dialect_ub as ub; use midenc_dialect_wasm as wasm; -use midenc_hir::{dialects::builtin, inventory}; +use midenc_hir::{ + dialects::{builtin, debuginfo}, + inventory, +}; pub(crate) use self::lower::HirLowering; pub use self::{ @@ -45,9 +47,6 @@ pub use self::{ inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( lower_builtin_ops )); -inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( - lower_debuginfo_ops -)); inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( lower_arith_ops )); @@ -66,6 +65,9 @@ inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( lower_wasm_ops )); +inventory::submit!(::midenc_hir::DialectRegistrationHookInfo::new::( + lower_debuginfo_ops +)); fn lower_builtin_ops(info: &mut midenc_hir::DialectInfo) { info.register_operation_trait::(); @@ -73,10 +75,6 @@ fn lower_builtin_ops(info: &mut midenc_hir::DialectInfo) { info.register_operation_trait::(); } -fn lower_debuginfo_ops(info: &mut midenc_hir::DialectInfo) { - info.register_operation_trait::(); -} - fn lower_arith_ops(info: &mut midenc_hir::DialectInfo) { info.register_operation_trait::(); info.register_operation_trait::(); @@ -179,3 +177,9 @@ fn lower_wasm_ops(info: &mut midenc_hir::DialectInfo) { info.register_operation_trait::(); info.register_operation_trait::(); } + +fn lower_debuginfo_ops(info: &mut midenc_hir::DialectInfo) { + info.register_operation_trait::(); + info.register_operation_trait::(); + info.register_operation_trait::(); +} diff --git a/codegen/masm/src/lower/component.rs b/codegen/masm/src/lower/component.rs index 4817cf8b2..341ad045b 100644 --- a/codegen/masm/src/lower/component.rs +++ b/codegen/masm/src/lower/component.rs @@ -5,8 +5,12 @@ use miden_assembly_syntax::{ast::Attribute, parser::WordValue}; use miden_core::operations::DebugVarLocation; use midenc_hir::{ FunctionIdent, Op, OpExt, SourceSpan, Span, Symbol, TraceTarget, ValueRef, - decode_frame_base_local_index, diagnostics::IntoDiagnostic, dialects::builtin, - encode_frame_base_local_offset, pass::AnalysisManager, + diagnostics::IntoDiagnostic, + dialects::{ + builtin, + debuginfo::attributes::{decode_frame_base_local_index, encode_frame_base_local_offset}, + }, + pass::AnalysisManager, }; use midenc_hir_analysis::analyses::LivenessAnalysis; use midenc_session::{ @@ -665,14 +669,13 @@ impl MasmFunctionBuilder { // This matches locaddr.N which computes -(aligned_num_locals - N). patch_debug_var_locals_in_block(&mut body, aligned_num_locals, stack_pointer_addr); - // Strip DebugVar-only procedure bodies. - // The Miden assembler rejects procedures whose bodies contain only decorators - // (like DebugVar) and no real instructions, because decorators don't affect - // MAST digests — two empty procedures with different decorators would be - // indistinguishable. If there are no real instructions, the debug info is - // meaningless anyway, so just drop it. + // If a function body after lowering produces a MASM procedure with an empty body aside + // from debug decorators, then we must emit a `nop` at the end of the block which will + // act as the anchor for those decorators. Such a procedure is basically useless, as it is + // just passing through arguments as results - but the assembler currently rejects empty + // procedures (not counting decorators), so we must handle this edge case. if !block_has_real_instructions(&body) { - body = masm::Block::new(body.span(), vec![]); + body.push(masm::Op::Inst(Span::unknown(masm::Instruction::Nop))); } let mut procedure = masm::Procedure::new(span, visibility, name, num_locals, body); @@ -696,7 +699,12 @@ impl MasmFunctionBuilder { /// body contains only DebugVar ops, the assembler will reject it. fn block_has_real_instructions(block: &masm::Block) -> bool { block.iter().any(|op| match op { - masm::Op::Inst(inst) => inst.has_textual_representation(), + masm::Op::Inst(inst) => !matches!( + inst.inner(), + masm::Instruction::Debug(_) + | masm::Instruction::DebugVar(_) + | masm::Instruction::Trace(_) + ), masm::Op::If { then_blk, else_blk, .. } => block_has_real_instructions(then_blk) || block_has_real_instructions(else_blk), @@ -707,12 +715,12 @@ fn block_has_real_instructions(block: &masm::Block) -> bool { /// Recursively patch DebugVar locations in a block. /// -/// Converts `Local(idx)` where idx is the raw WASM local index to `Local(offset)` -/// where offset = idx - aligned_num_locals (the FMP-relative offset, typically negative). -/// This matches the assembler's `locaddr.N` formula: `FMP - aligned_num_locals + N`. +/// Converts `Local(idx)` where idx is the raw WASM local index to `Local(offset)` where +/// `offset = idx - aligned_num_locals` (the FMP-relative offset, typically negative). This matches +/// the assembler's `locaddr.N` formula, i.e. `FMP - aligned_num_locals + N`. /// -/// Also resolves `FrameBase { global_index, byte_offset }` by replacing the WASM -/// global index with the resolved Miden memory address of the stack pointer. +/// Also resolves `FrameBase { global_index, byte_offset }` by replacing the WASM global index with +/// the resolved Miden memory address of the stack pointer. fn patch_debug_var_locals_in_block( block: &mut masm::Block, aligned_num_locals: u16, diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index cdbba0836..236706593 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1,13 +1,12 @@ use midenc_dialect_arith as arith; use midenc_dialect_cf as cf; -use midenc_dialect_debuginfo as debuginfo; use midenc_dialect_hir as hir; use midenc_dialect_scf as scf; use midenc_dialect_ub as ub; use midenc_dialect_wasm as wasm; use midenc_hir::{ Op, OpExt, Span, SymbolTable, Type, Value, ValueRange, ValueRef, - dialects::builtin, + dialects::{builtin, debuginfo}, traits::{BinaryOp, Commutative}, }; use midenc_session::diagnostics::{Report, Severity, Spanned}; @@ -1281,7 +1280,7 @@ impl HirLowering for debuginfo::DebugValue { Felt, operations::{DebugVarInfo, DebugVarLocation}, }; - use midenc_hir::DIExpressionOp; + use midenc_hir::dialects::debuginfo::attributes::ExpressionOp; // Get the variable info let var = self.variable(); @@ -1295,11 +1294,11 @@ impl HirLowering for debuginfo::DebugValue { let has_location_expr = expr.operations.first().is_some_and(|op| { matches!( op, - DIExpressionOp::WasmStack(_) - | DIExpressionOp::WasmLocal(_) - | DIExpressionOp::ConstU64(_) - | DIExpressionOp::ConstS64(_) - | DIExpressionOp::FrameBase { .. } + ExpressionOp::WasmStack(_) + | ExpressionOp::WasmLocal(_) + | ExpressionOp::ConstU64(_) + | ExpressionOp::ConstS64(_) + | ExpressionOp::FrameBase { .. } ) }); if !has_location_expr && emitter.stack.find(&value).is_none() { @@ -1310,33 +1309,46 @@ impl HirLowering for debuginfo::DebugValue { // be determined (value dropped and no expression info), in which case // we skip emitting the decorator entirely rather than emitting a // placeholder — the debugger would have nothing useful to show. - let value_location = if let Some(first_op) = expr.operations.first() { - match first_op { - DIExpressionOp::WasmStack(offset) => Some(DebugVarLocation::Stack(*offset as u8)), - DIExpressionOp::WasmLocal(idx) => { + let value_location = match expr.operations.as_slice() { + [] => emitter + .stack + .find(&value) + .map(|pos| emitter.stack.effective_index(pos) as u8) + .map(DebugVarLocation::Stack), + [first] => match first { + ExpressionOp::WasmStack(offset) => Some(DebugVarLocation::Stack(*offset as u8)), + ExpressionOp::WasmLocal(idx) => { // WASM locals are always stored in memory via FMP in Miden. - // Store raw WASM local index; the FMP offset will be computed - // later in MasmFunctionBuilder::build() when num_locals is known. + // Store raw WASM local index; the FMP offset will be computed later in + // MasmFunctionBuilder::build() when num_locals is known. i16::try_from(*idx).ok().map(DebugVarLocation::Local) } - DIExpressionOp::WasmGlobal(_) | DIExpressionOp::Deref => { - emitter.stack.find(&value).map(|pos| DebugVarLocation::Stack(pos as u8)) - } - DIExpressionOp::ConstU64(val) => Some(DebugVarLocation::Const(Felt::new(*val))), - DIExpressionOp::ConstS64(val) => { + ExpressionOp::WasmGlobal(_) | ExpressionOp::Deref => emitter + .stack + .find(&value) + .map(|pos| emitter.stack.effective_index(pos) as u8) + .map(DebugVarLocation::Stack), + ExpressionOp::ConstU64(val) => Some(DebugVarLocation::Const(Felt::new(*val))), + ExpressionOp::ConstS64(val) => { Some(DebugVarLocation::Const(Felt::new(*val as u64))) } - DIExpressionOp::FrameBase { + ExpressionOp::FrameBase { global_index, byte_offset, } => Some(DebugVarLocation::FrameBase { global_index: *global_index, byte_offset: *byte_offset, }), - _ => emitter.stack.find(&value).map(|pos| DebugVarLocation::Stack(pos as u8)), + _ => emitter + .stack + .find(&value) + .map(|pos| emitter.stack.effective_index(pos) as u8) + .map(DebugVarLocation::Stack), + }, + _ => { + use miden_core::serde::Serializable; + Some(DebugVarLocation::Expression(expr.as_value().to_bytes())) } - } else { - emitter.stack.find(&value).map(|pos| DebugVarLocation::Stack(pos as u8)) }; let Some(value_location) = value_location else { @@ -1370,6 +1382,78 @@ impl HirLowering for debuginfo::DebugValue { } } +impl HirLowering for debuginfo::DebugDeclare { + fn schedule_operands(&self, _emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + // Debug value operations are purely observational — they do not consume their + // operand from the stack. Skip operand scheduling entirely; the emit() method + // will look up the value's current stack position (if any) on its own. + Ok(()) + } + + fn required_operands(&self) -> ValueRange<'_, 4> { + // No operands need to be scheduled on the stack for debug ops. + ValueRange::Empty + } + + fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + use miden_core::operations::{DebugVarInfo, DebugVarLocation}; + + let var = self.variable(); + let address = self.address().as_value_ref(); + + let Some(index) = emitter.stack.find(&address) else { + // Do nothing if the stack value is no longer live at this point + return Ok(()); + }; + + let mut debug_var = + DebugVarInfo::new(var.name.to_string(), DebugVarLocation::Stack(index as u8)); + + // Set arg_index if this is a parameter + if let Some(arg_index) = var.arg_index { + debug_var.set_arg_index(arg_index + 1); // Convert to 1-based + } + + // Set source location + if let Some(line) = core::num::NonZeroU32::new(var.line) { + use miden_assembly::debuginfo::{ColumnNumber, FileLineCol, LineNumber, Uri}; + let uri = Uri::new(var.file.as_str()); + let file_line_col = FileLineCol::new( + uri, + LineNumber::new(line.get()).unwrap_or_default(), + var.column.and_then(ColumnNumber::new).unwrap_or_default(), + ); + debug_var.set_location(file_line_col); + } + + // Emit the instruction + let inst = masm::Instruction::DebugVar(debug_var); + emitter.emit_op(masm::Op::Inst(Span::new(self.span(), inst))); + + Ok(()) + } +} + +impl HirLowering for debuginfo::DebugKill { + fn schedule_operands(&self, _emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + // Debug value operations are purely observational — they do not consume their + // operand from the stack. Skip operand scheduling entirely; the emit() method + // will look up the value's current stack position (if any) on its own. + Ok(()) + } + + fn required_operands(&self) -> ValueRange<'_, 4> { + // No operands need to be scheduled on the stack for debug ops. + ValueRange::Empty + } + + fn emit(&self, _emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { + // TODO(pauls): Either add new decorator, or emit a special trace event for kills, and + // map debug variable name to the event out of band + Ok(()) + } +} + impl HirLowering for builtin::GlobalSymbol { fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { let context = self.as_operation().context(); diff --git a/dialects/debuginfo/Cargo.toml b/dialects/debuginfo/Cargo.toml deleted file mode 100644 index 8c89f4d1d..000000000 --- a/dialects/debuginfo/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "midenc-dialect-debuginfo" -description = "Miden IR Debug Info Dialect" -version.workspace = true -rust-version.workspace = true -authors.workspace = true -repository.workspace = true -categories.workspace = true -keywords.workspace = true -license.workspace = true -readme.workspace = true -edition.workspace = true - -[features] -default = ["std"] -std = ["midenc-hir/std"] - -[dependencies] -midenc-hir.workspace = true -paste.workspace = true -log.workspace = true diff --git a/dialects/debuginfo/src/lib.rs b/dialects/debuginfo/src/lib.rs deleted file mode 100644 index 441cefe1e..000000000 --- a/dialects/debuginfo/src/lib.rs +++ /dev/null @@ -1,125 +0,0 @@ -#![no_std] -#![feature(debug_closure_helpers)] -#![feature(unboxed_closures)] -#![feature(fn_traits)] -#![feature(ptr_metadata)] -#![feature(specialization)] -#![allow(incomplete_features)] -#![deny(warnings)] - -//! # DebugInfo Dialect -//! -//! A first-class dialect for tracking source-level debug information through -//! compiler transformations. Inspired by [Mojo's DebugInfo dialect], this -//! dialect makes debug variable tracking a first-class citizen of the IR, -//! using SSA use-def chains to enforce correctness. -//! -//! ## Motivation -//! -//! Traditional approaches to debug info in MLIR-like compilers (e.g., Flang/FIR) -//! treat debug information as metadata or attributes — second-class citizens that -//! transforms are free to silently drop. The consequences: -//! -//! - Transforms can silently lose debug info with no verifier catching it -//! - No mechanism forces transform authors to update debug info -//! - Debug info quality degrades as the optimizer gets more aggressive -//! -//! ## Approach: SSA-Based Debug Info -//! -//! This dialect defines debug operations as real IR operations with SSA operands: -//! -//! - **`debuginfo.value`** — Records the current value of a source variable. -//! Uses an SSA value operand, so deleting the value without updating debug -//! uses is a hard error. -//! -//! - **`debuginfo.declare`** — Records the storage address of a source variable. -//! Similarly uses an SSA operand for the address. -//! -//! - **`debuginfo.kill`** — Marks a variable as dead, giving the debugger precise -//! lifetime boundaries instead of scope-based heuristics. -//! -//! ## Transform Hooks -//! -//! The [`transform`] module provides utilities that make it easy for transform -//! authors to maintain debug info: -//! -//! - **Simple replacements** are handled automatically via `replace_all_uses_with` -//! - **Complex transforms** use [`salvage_debug_info`](transform::salvage_debug_info) -//! where the transform author only describes the *inverse* of their transformation -//! - **Value deletion** without a replacement emits `debuginfo.kill` automatically -//! -//! ## Design Pillars (from Mojo) -//! -//! 1. **SSA use-def chains** — debug values participate in standard use-def tracking -//! 2. **Expression trees** — `DIExpressionAttr` describes how to recover source values -//! from transformed IR values (encode the inverse transformation) -//! 3. **Explicit lifetimes** — `debuginfo.kill` for precise variable death points -//! -//! [Mojo's DebugInfo dialect]: https://llvm.org/devmtg/2024-04/slides/TechnicalTalks/MojoDebugging.pdf - -extern crate alloc; - -#[cfg(any(feature = "std", test))] -extern crate std; - -mod builders; -mod ops; -pub mod transform; - -use midenc_hir::{ - AttributeRef, Builder, Dialect, DialectInfo, DialectRegistration, OperationRef, SourceSpan, - Type, -}; - -pub use self::{builders::DebugInfoOpBuilder, ops::*}; - -/// The DebugInfo dialect — first-class debug variable tracking. -/// -/// This dialect provides operations for tracking source-level variables through -/// compiler transformations using SSA semantics. Unlike metadata-based approaches, -/// debug info here participates in standard use-def chains, making it impossible -/// for transforms to silently drop debug information. -#[derive(Debug)] -pub struct DebugInfoDialect { - info: DialectInfo, -} - -impl DebugInfoDialect { - #[inline] - pub fn num_registered(&self) -> usize { - self.registered_ops().len() - } -} - -impl DialectRegistration for DebugInfoDialect { - const NAMESPACE: &'static str = "debuginfo"; - - #[inline] - fn init(info: DialectInfo) -> Self { - Self { info } - } - - fn register_operations(info: &mut DialectInfo) { - info.register_operation::(); - info.register_operation::(); - info.register_operation::(); - } -} - -impl Dialect for DebugInfoDialect { - #[inline] - fn info(&self) -> &DialectInfo { - &self.info - } - - fn materialize_constant( - &self, - _builder: &mut dyn Builder, - _attr: AttributeRef, - _ty: &Type, - _span: SourceSpan, - ) -> Option { - // Debug info operations don't produce values that can be constants - None - } -} diff --git a/dialects/debuginfo/src/ops.rs b/dialects/debuginfo/src/ops.rs deleted file mode 100644 index f60a4dcd9..000000000 --- a/dialects/debuginfo/src/ops.rs +++ /dev/null @@ -1,141 +0,0 @@ -use midenc_hir::{ - DIExpressionAttr, DILocalVariableAttr, UnsafeIntrusiveEntityRef, derive::operation, - traits::AnyType, -}; - -// Note: DILocalVariableAttr and DIExpressionAttr are now the generated wrapper -// types from #[derive(DialectAttribute)] on DILocalVariable and DIExpression. -use crate::DebugInfoDialect; - -pub type DebugValueRef = UnsafeIntrusiveEntityRef; -pub type DebugDeclareRef = UnsafeIntrusiveEntityRef; -pub type DebugKillRef = UnsafeIntrusiveEntityRef; - -/// Records the current value of a source-level variable. -/// -/// This is the core operation of the debuginfo dialect. It creates a first-class -/// SSA use of the value, which means: -/// -/// - If a transform deletes the value without updating its debug uses, that's a -/// hard error (not a silent drop like with metadata-based approaches). -/// - Standard MLIR-style use-def tracking automatically enforces this — transforms -/// must call `replace_all_uses_with` or explicitly handle debug uses. -/// -/// The `variable` attribute identifies the source variable, and the `expression` -/// attribute describes how to recover the source-level value from the IR value -/// (e.g., "dereference this pointer" if the value was promoted to an alloca). -/// -/// # Example -/// -/// ```text -/// debuginfo.value %0 #[variable = di.local_variable(name = x, ...)] -/// #[expression = di.expression(DW_OP_WASM_local 0)] -/// ``` -#[operation(dialect = DebugInfoDialect)] -pub struct DebugValue { - #[operand] - value: AnyType, - #[attr] - variable: DILocalVariableAttr, - #[attr] - expression: DIExpressionAttr, -} - -/// Records the storage location (address) of a source-level variable. -/// -/// Unlike `DebugValue` which tracks values, `DebugDeclare` tracks the address -/// where a variable is stored. This is useful for variables that live in memory -/// (e.g., stack allocations) where the address itself doesn't change, but the -/// value at that address may be updated through stores. -/// -/// Like `DebugValue`, this creates a real SSA use of the address value, -/// preventing silent drops during transforms. -#[operation(dialect = DebugInfoDialect)] -pub struct DebugDeclare { - #[operand] - address: AnyType, - #[attr] - variable: DILocalVariableAttr, -} - -/// Marks a source-level variable as dead at this program point. -/// -/// This provides explicit lifetime boundaries for variables, giving the debugger -/// precise information about when a variable is no longer valid. Without this, -/// debuggers must rely on scope-based heuristics which can be inaccurate after -/// optimizations. -/// -/// After a `debuginfo.kill`, the debugger should report the variable as -/// "optimized out" or "not available" until the next `debuginfo.value` or -/// `debuginfo.declare` for the same variable. -/// -/// # Example -/// -/// ```text -/// debuginfo.kill #[variable = di.local_variable(name = x, ...)] -/// ``` -#[operation(dialect = DebugInfoDialect)] -pub struct DebugKill { - #[attr] - variable: DILocalVariableAttr, -} - -#[cfg(test)] -mod tests { - use alloc::rc::Rc; - - use midenc_hir::{Builder, Context, DILocalVariable, SourceSpan, Type, interner::Symbol}; - - use crate::{DebugInfoDialect, DebugInfoOpBuilder}; - - fn make_variable() -> DILocalVariable { - let mut variable = - DILocalVariable::new(Symbol::intern("x"), Symbol::intern("main.rs"), 12, Some(7)); - variable.arg_index = Some(0); - variable.ty = Some(Type::I32); - variable - } - - #[test] - fn debug_value_carries_metadata() { - let context = Rc::new(Context::default()); - context.get_or_register_dialect::(); - - let block = context.create_block_with_params([Type::I32]); - let arg = block.borrow().arguments()[0]; - let value = arg.borrow().as_value_ref(); - - let mut builder = context.clone().builder(); - builder.set_insertion_point_to_end(block); - - let variable = make_variable(); - let debug_value = builder - .debug_value(value, variable.clone(), SourceSpan::UNKNOWN) - .expect("failed to create debuginfo.value op"); - - assert_eq!(debug_value.borrow().variable().as_value(), &variable); - assert_eq!(block.borrow().back(), Some(debug_value.as_operation_ref())); - - let op = debug_value.as_operation_ref(); - let printed = alloc::string::ToString::to_string(&*op.borrow()); - assert!(printed.contains("di.local_variable")); - } - - #[test] - fn debug_kill_carries_variable() { - let context = Rc::new(Context::default()); - context.get_or_register_dialect::(); - - let block = context.create_block_with_params([Type::I32]); - - let mut builder = context.clone().builder(); - builder.set_insertion_point_to_end(block); - - let variable = make_variable(); - let debug_kill = builder - .debug_kill(variable.clone(), SourceSpan::UNKNOWN) - .expect("failed to create debuginfo.kill op"); - - assert_eq!(debug_kill.borrow().variable().as_value(), &variable); - } -} diff --git a/dialects/scf/Cargo.toml b/dialects/scf/Cargo.toml index 8e3c13ee4..e96f131e0 100644 --- a/dialects/scf/Cargo.toml +++ b/dialects/scf/Cargo.toml @@ -28,5 +28,4 @@ bitvec.workspace = true # NOTE: Use local paths for dev-only dependency to avoid relying on crates.io during packaging midenc-expect-test = { path = "../../tools/expect-test" } midenc-hir = { path = "../../hir", features = ["logging"] } -midenc-dialect-debuginfo = { path = "../debuginfo" } env_logger.workspace = true diff --git a/dialects/scf/src/canonicalization/if_remove_unused_results.rs b/dialects/scf/src/canonicalization/if_remove_unused_results.rs index eaec0cc43..a1a802182 100644 --- a/dialects/scf/src/canonicalization/if_remove_unused_results.rs +++ b/dialects/scf/src/canonicalization/if_remove_unused_results.rs @@ -74,7 +74,7 @@ impl RewritePattern for IfRemoveUnusedResults { .results() .iter() .copied() - .filter(|result| result.borrow().is_used()) + .filter(|result| result.borrow().has_real_uses()) .collect::>(); // Replace the operation if only a subset of its results have uses. diff --git a/dialects/scf/src/canonicalization/while_remove_unused_args.rs b/dialects/scf/src/canonicalization/while_remove_unused_args.rs index 52da21c80..cb6766c02 100644 --- a/dialects/scf/src/canonicalization/while_remove_unused_args.rs +++ b/dialects/scf/src/canonicalization/while_remove_unused_args.rs @@ -46,7 +46,13 @@ impl RewritePattern for WhileRemoveUnusedArgs { return Ok(false); }; - if while_op.before().entry().arguments().iter().all(|arg| arg.borrow().is_used()) { + if while_op + .before() + .entry() + .arguments() + .iter() + .all(|arg| arg.borrow().has_real_uses()) + { // All the arguments are used (nothing to remove) return Ok(false); } @@ -67,7 +73,7 @@ impl RewritePattern for WhileRemoveUnusedArgs { let before_arg = before_arg.borrow(); let yield_value = yield_op.yielded()[i]; let init_value = while_op.inits()[i]; - if before_arg.is_used() { + if before_arg.has_real_uses() { args_to_erase.push(false); new_yields.push(yield_value.borrow().as_value_ref()); new_inits.push(init_value.borrow().as_value_ref()); diff --git a/dialects/scf/src/canonicalization/while_unused_result.rs b/dialects/scf/src/canonicalization/while_unused_result.rs index 4892bc334..f4eee480e 100644 --- a/dialects/scf/src/canonicalization/while_unused_result.rs +++ b/dialects/scf/src/canonicalization/while_unused_result.rs @@ -107,7 +107,7 @@ impl RewritePattern for WhileUnusedResult { let after_arg = after_args[i]; let term_arg = forwarded[i]; - if !result.is_used() && !after_arg.borrow().is_used() { + if !result.has_real_uses() && !after_arg.borrow().has_real_uses() { need_update = true; } else { new_results_indices.push(i); diff --git a/dialects/scf/src/transforms/cfg_to_scf.rs b/dialects/scf/src/transforms/cfg_to_scf.rs index 26b4180e9..db1a32629 100644 --- a/dialects/scf/src/transforms/cfg_to_scf.rs +++ b/dialects/scf/src/transforms/cfg_to_scf.rs @@ -844,8 +844,10 @@ mod tests { /// automatically updates the SSA operands of debug value ops. #[test] fn cfg_to_scf_debug_value_preservation() -> Result<(), Report> { - use midenc_dialect_debuginfo::{DebugInfoDialect, DebugInfoOpBuilder}; - use midenc_hir::{DILocalVariable, interner::Symbol}; + use midenc_hir::{ + dialects::debuginfo::{DIBuilder, DebugInfoDialect, attributes::Variable}, + interner::Symbol, + }; let mut test = Test::new("cfg_to_scf_debug_value_preservation", &[Type::U32], &[Type::U32]); test.context().get_or_register_dialect::(); @@ -862,9 +864,9 @@ mod tests { let input = block.borrow().arguments()[0].upcast(); let input_var = - DILocalVariable::new(Symbol::intern("input"), Symbol::intern("test.rs"), 1, Some(1)); + Variable::new(Symbol::intern("input"), Symbol::intern("test.rs"), 1, Some(1)); let result_var = - DILocalVariable::new(Symbol::intern("result"), Symbol::intern("test.rs"), 2, Some(1)); + Variable::new(Symbol::intern("result"), Symbol::intern("test.rs"), 2, Some(1)); let zero = builder.u32(0, span); let is_zero = builder.eq(input, zero, span)?; diff --git a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir index 78268a0b0..be95193fb 100644 --- a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir +++ b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_after.hir @@ -1,16 +1,16 @@ builtin.function public extern("C") @cfg_to_scf_debug_value_preservation(%0: u32) -> u32 { %2 = arith.constant 0 : u32; %3 = arith.eq %0, %2; - "debuginfo.debug_value"(%0) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + di.debug_value %0 <{ variable = #di.variable<{ name = "input", file = "test.rs", line = 1, column = 1 }>, expression = #di.expression<[]> }> : (u32); %8 = scf.if %3 then { %4 = arith.incr %0; - "debuginfo.debug_value"(%4) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + di.debug_value %4 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); scf.yield %4 : (u32); } else { %5 = arith.mul %0, %0 <{ overflow = #builtin.overflow }>; - "debuginfo.debug_value"(%5) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + di.debug_value %5 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); scf.yield %5 : (u32); } : (i1) -> (u32); - "debuginfo.debug_value"(%8) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + di.debug_value %8 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); builtin.ret %8 : (u32); }; \ No newline at end of file diff --git a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir index 8b5c2c435..fd85982ed 100644 --- a/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir +++ b/dialects/scf/src/transforms/expected/cfg_to_scf_debug_value_preservation_before.hir @@ -1,17 +1,17 @@ builtin.function public extern("C") @cfg_to_scf_debug_value_preservation(%0: u32) -> u32 { %2 = arith.constant 0 : u32; %3 = arith.eq %0, %2; - "debuginfo.debug_value"(%0) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + di.debug_value %0 <{ variable = #di.variable<{ name = "input", file = "test.rs", line = 1, column = 1 }>, expression = #di.expression<[]> }> : (u32); cf.cond_br %3 ^block1, ^block2 : (i1); ^block1: %4 = arith.incr %0; - "debuginfo.debug_value"(%4) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + di.debug_value %4 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); cf.br ^block3:(%4); ^block2: %5 = arith.mul %0, %0 <{ overflow = #builtin.overflow }>; - "debuginfo.debug_value"(%5) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + di.debug_value %5 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); cf.br ^block3:(%5); ^block3(%1: u32): - "debuginfo.debug_value"(%1) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : u32 -> (); + di.debug_value %1 <{ variable = #di.variable<{ name = "result", file = "test.rs", line = 2, column = 1 }>, expression = #di.expression<[]> }> : (u32); builtin.ret %1 : (u32); }; \ No newline at end of file diff --git a/eval/Cargo.toml b/eval/Cargo.toml index c65b69cb6..9f3b7b231 100644 --- a/eval/Cargo.toml +++ b/eval/Cargo.toml @@ -22,7 +22,6 @@ log.workspace = true miden-core.workspace = true midenc-dialect-arith.workspace = true midenc-dialect-cf.workspace = true -midenc-dialect-debuginfo.workspace = true midenc-dialect-scf.workspace = true midenc-dialect-hir.workspace = true midenc-dialect-ub.workspace = true diff --git a/eval/src/eval.rs b/eval/src/eval.rs index 38aceaaa8..22fe4cb26 100644 --- a/eval/src/eval.rs +++ b/eval/src/eval.rs @@ -5,7 +5,6 @@ use alloc::{ use midenc_dialect_arith as arith; use midenc_dialect_cf as cf; -use midenc_dialect_debuginfo as debuginfo; use midenc_dialect_hir as hir; use midenc_dialect_scf as scf; use midenc_dialect_ub as ub; @@ -13,7 +12,8 @@ use midenc_dialect_wasm::{self as wasm}; use midenc_hir::{ AttributeRef, Felt, Immediate, ImmediateAttr, Op, OperationRef, Overflow, RegionBranchPoint, RegionBranchTerminatorOpInterface, Report, SmallVec, SourceSpan, Spanned, SuccessorInfo, Type, - Value as _, ValueRange, dialects::builtin, + Value as _, ValueRange, + dialects::{builtin, debuginfo}, }; use midenc_session::diagnostics::Severity; diff --git a/eval/src/lib.rs b/eval/src/lib.rs index 6f242de64..4950eeb8b 100644 --- a/eval/src/lib.rs +++ b/eval/src/lib.rs @@ -15,12 +15,14 @@ mod value; use midenc_dialect_arith as arith; use midenc_dialect_cf as cf; -use midenc_dialect_debuginfo as debuginfo; use midenc_dialect_hir as hir; use midenc_dialect_scf as scf; use midenc_dialect_ub as ub; use midenc_dialect_wasm as wasm; -use midenc_hir::{dialects::builtin, inventory}; +use midenc_hir::{ + dialects::{builtin, debuginfo}, + inventory, +}; pub use self::{ eval::{ControlFlowEffect, Eval, Initialize}, diff --git a/examples/counter-contract/counter_contract.masm b/examples/counter-contract/counter_contract.masm deleted file mode 100644 index fe56c288d..000000000 --- a/examples/counter-contract/counter_contract.masm +++ /dev/null @@ -1,1698 +0,0 @@ -# mod miden:counter-contract/counter-contract@0.1.0 - -@callconv("canon-lift") -pub proc get-count( - -) -> felt - exec.::miden:counter-contract/counter-contract@0.1.0::init - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::miden:counter-contract/counter-contract@0.1.0#get-count - trace.252 - nop - exec.::std::sys::truncate_stack -end - -@callconv("canon-lift") -pub proc increment-count( - -) -> felt - exec.::miden:counter-contract/counter-contract@0.1.0::init - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::miden:counter-contract/counter-contract@0.1.0#increment-count - trace.252 - nop - exec.::std::sys::truncate_stack -end - -proc init - push.1179648 - trace.240 - exec.::intrinsics::mem::heap_init - trace.252 - push.[7028007876379170725,18060021366771303825,13412364500725888848,14178532912296021363] - adv.push_mapval - push.262144 - push.1 - trace.240 - exec.::std::mem::pipe_preimage_to_memory - trace.252 - drop - push.1048576 - u32assert - mem_store.278536 - push.0 - u32assert - mem_store.278537 -end - -# mod miden:counter-contract/counter-contract@0.1.0::counter_contract - -@callconv("C") -proc __wasm_call_ctors( - -) - nop -end - -@callconv("C") -proc _RNvNtCs2bNbiPwbrt9_16counter_contract8bindings40___link_custom_section_describing_imports( - -) - nop -end - -@callconv("C") -proc _RNvXs2_Cs2bNbiPwbrt9_16counter_contractNtB5_15CounterContractNtNtNtNtNtNtB5_8bindings7exports5miden16counter_contract16counter_contract5Guest15increment_count( - -) -> felt - push.1114144 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_sw - trace.252 - nop - push.160 - u32wrapping_sub - push.1114144 - dup.1 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_sw - trace.252 - nop - push.92 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.1 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.88 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.0 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.84 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.0 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.80 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.0 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.80 - dup.1 - u32wrapping_add - dup.1 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB4_4WordINtNtCs3n4EX6Qeqp9_4core7convert4FromANtNtB6_4felt4Feltj4_E4from - trace.252 - nop - dup.0 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs6_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB5_4WordINtNtCs3n4EX6Qeqp9_4core7convert5AsRefBW_E6as_ref - trace.252 - nop - push.0 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4feltNtB5_4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromhE4from - trace.252 - nop - push.12 - dup.2 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.8 - dup.3 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.4 - dup.4 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - movup.4 - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.32 - dup.6 - u32wrapping_add - movup.2 - swap.3 - movdn.2 - swap.1 - swap.4 - swap.1 - swap.5 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::miden::active_account::get_map_item - trace.252 - nop - push.40 - dup.1 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.88 - dup.3 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.32 - dup.1 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.80 - dup.3 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.80 - dup.1 - u32wrapping_add - push.144 - dup.2 - u32wrapping_add - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvMNtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB2_4Word7reverse - trace.252 - nop - push.144 - dup.1 - u32wrapping_add - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs3_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtNtB7_4felt4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromNtB5_4WordE4from - trace.252 - nop - push.1 - add - push.12 - dup.2 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.8 - dup.3 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.4 - dup.4 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - dup.4 - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.64 - dup.6 - u32wrapping_add - dup.5 - swap.1 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB5_4WordINtNtCs3n4EX6Qeqp9_4core7convert4FromNtNtB7_4felt4FeltE4from - trace.252 - nop - push.0 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4feltNtB5_4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromhE4from - trace.252 - nop - push.76 - dup.7 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.72 - dup.8 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.68 - dup.9 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.64 - dup.10 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.80 - dup.11 - u32wrapping_add - swap.9 - swap.1 - swap.8 - swap.2 - swap.7 - swap.3 - swap.6 - swap.4 - swap.5 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::miden::native_account::set_map_item - trace.252 - nop - push.88 - dup.2 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.120 - dup.4 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.80 - dup.2 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.112 - dup.4 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.24 - push.80 - dup.3 - u32wrapping_add - u32wrapping_add - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.136 - dup.4 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.96 - dup.2 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.128 - dup.4 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.112 - dup.2 - u32wrapping_add - push.32 - dup.3 - u32wrapping_add - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvMNtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB2_4Word7reverse - trace.252 - nop - push.128 - dup.2 - u32wrapping_add - push.144 - dup.3 - u32wrapping_add - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvMNtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB2_4Word7reverse - trace.252 - nop - push.152 - dup.2 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.24 - push.32 - dup.5 - u32wrapping_add - u32wrapping_add - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - dup.2 - dup.2 - movup.2 - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.144 - dup.4 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.48 - dup.6 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - dup.2 - dup.2 - movup.2 - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.16 - dup.6 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.24 - dup.4 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.16 - dup.2 - u32wrapping_add - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs3_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtNtB7_4felt4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromNtB5_4WordE4from - trace.252 - nop - drop - push.160 - movup.2 - u32wrapping_add - push.1114144 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_sw - trace.252 - nop -end - -@callconv("C") -proc _RNvXs2_Cs2bNbiPwbrt9_16counter_contractNtB5_15CounterContractNtNtNtNtNtNtB5_8bindings7exports5miden16counter_contract16counter_contract5Guest9get_count( - -) -> felt - push.1114144 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_sw - trace.252 - nop - push.64 - u32wrapping_sub - push.1114144 - dup.1 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_sw - trace.252 - nop - push.60 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.1 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.56 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.0 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.52 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.0 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.48 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.0 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.48 - dup.1 - u32wrapping_add - dup.1 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB4_4WordINtNtCs3n4EX6Qeqp9_4core7convert4FromANtNtB6_4felt4Feltj4_E4from - trace.252 - nop - dup.0 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs6_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB5_4WordINtNtCs3n4EX6Qeqp9_4core7convert5AsRefBW_E6as_ref - trace.252 - nop - push.0 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4feltNtB5_4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromhE4from - trace.252 - nop - push.12 - dup.2 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.8 - dup.3 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.4 - dup.4 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - movup.4 - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop - push.32 - dup.6 - u32wrapping_add - movup.2 - swap.3 - movdn.2 - swap.1 - swap.4 - swap.1 - swap.5 - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::miden::active_account::get_map_item - trace.252 - nop - push.40 - dup.1 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.56 - dup.3 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.32 - dup.1 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.48 - dup.3 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.48 - dup.1 - u32wrapping_add - push.16 - dup.2 - u32wrapping_add - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvMNtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB2_4Word7reverse - trace.252 - nop - push.16 - dup.1 - u32wrapping_add - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs3_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtNtB7_4felt4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromNtB5_4WordE4from - trace.252 - nop - push.64 - movup.2 - u32wrapping_add - push.1114144 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_sw - trace.252 - nop -end - -@callconv("C") -proc miden:counter-contract/counter-contract@0.1.0#get-count( - -) -> felt - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvNtCsjM1oOs60QyD_11wit_bindgen2rt14run_ctors_once - trace.252 - nop - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_Cs2bNbiPwbrt9_16counter_contractNtB5_15CounterContractNtNtNtNtNtNtB5_8bindings7exports5miden16counter_contract16counter_contract5Guest9get_count - trace.252 - nop -end - -@callconv("C") -proc miden:counter-contract/counter-contract@0.1.0#increment-count( - -) -> felt - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvNtCsjM1oOs60QyD_11wit_bindgen2rt14run_ctors_once - trace.252 - nop - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::_RNvXs2_Cs2bNbiPwbrt9_16counter_contractNtB5_15CounterContractNtNtNtNtNtNtB5_8bindings7exports5miden16counter_contract16counter_contract5Guest15increment_count - trace.252 - nop -end - -@callconv("C") -proc _RNvNtCsjM1oOs60QyD_11wit_bindgen2rt14run_ctors_once( - -) - push.1114148 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_sw - trace.252 - nop - push.1048584 - u32wrapping_add - u32divmod.4 - swap.1 - swap.1 - dup.1 - mem_load - swap.1 - push.8 - u32wrapping_mul - u32shr - swap.1 - drop - push.255 - u32and - push.0 - swap.1 - neq - if.true - nop - else - push.1114148 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_sw - trace.252 - nop - trace.240 - nop - exec.::miden:counter-contract/counter-contract@0.1.0::counter_contract::__wasm_call_ctors - trace.252 - nop - push.1 - push.1048584 - movup.2 - u32wrapping_add - u32divmod.4 - swap.1 - dup.0 - mem_load - dup.2 - push.8 - u32wrapping_mul - push.255 - swap.1 - u32shl - u32not - swap.1 - u32and - movup.3 - movup.3 - push.8 - u32wrapping_mul - u32shl - u32or - swap.1 - mem_store - end -end - -@callconv("C") -proc _RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4feltNtB5_4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromhE4from( - i32 -) -> felt - push.255 - u32and -end - -@callconv("C") -proc _RNvXs3_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtNtB7_4felt4FeltINtNtCs3n4EX6Qeqp9_4core7convert4FromNtB5_4WordE4from( - i32 -) -> felt - push.12 - swap.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_felt - trace.252 - nop -end - -@callconv("C") -proc _RNvMNtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB2_4Word7reverse( - i32, - i32 -) - dup.1 - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.8 - dup.3 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - swap.2 - movup.2 - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - push.8 - movup.2 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - movup.2 - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - swap.2 - movup.2 - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop -end - -@callconv("C") -proc _RNvXs2_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB5_4WordINtNtCs3n4EX6Qeqp9_4core7convert4FromNtNtB7_4felt4FeltE4from( - i32, - felt -) - push.12 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.8 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.0 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.4 - dup.1 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.0 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - push.0 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop -end - -@callconv("C") -proc _RNvXs6_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB5_4WordINtNtCs3n4EX6Qeqp9_4core7convert5AsRefBW_E6as_ref( - i32 -) -> i32 - nop -end - -@callconv("C") -proc _RNvXs_NtNtCsl8LGlN1ny35_16miden_stdlib_sys10intrinsics4wordNtB4_4WordINtNtCs3n4EX6Qeqp9_4core7convert4FromANtNtB6_4felt4Feltj4_E4from( - i32, - i32 -) - push.8 - dup.2 - add - u32assert - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - push.8 - dup.3 - add - u32assert - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop - swap.1 - push.4 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::load_dw - trace.252 - nop - swap.1 - movup.2 - push.8 - dup.1 - swap.1 - u32mod - u32assert - assertz - u32divmod.4 - swap.1 - movup.2 - movdn.3 - trace.240 - nop - exec.::intrinsics::mem::store_dw - trace.252 - nop -end - -@callconv("C") -proc miden::active_account::get_map_item(felt, felt, felt, felt, felt, i32) - trace.240 - nop - exec.::miden::active_account::get_map_item - trace.252 - nop - movup.4 - dup.0 - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.4 - dup.1 - add - u32assert - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.8 - dup.1 - add - u32assert - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.12 - add - u32assert - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop -end - -@callconv("C") -proc miden::native_account::set_map_item( - felt, - felt, - felt, - felt, - felt, - felt, - felt, - felt, - felt, - i32 -) - trace.240 - nop - exec.::miden::native_account::set_map_item - trace.252 - nop - movup.8 - dup.0 - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.4 - dup.1 - add - u32assert - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.8 - dup.1 - add - u32assert - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.12 - dup.1 - add - u32assert - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.16 - dup.1 - add - u32assert - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.20 - dup.1 - add - u32assert - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.24 - dup.1 - add - u32assert - movup.2 - swap.1 - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop - push.28 - add - u32assert - u32divmod.4 - swap.1 - trace.240 - nop - exec.::intrinsics::mem::store_felt - trace.252 - nop -end - diff --git a/frontend/wasm/Cargo.toml b/frontend/wasm/Cargo.toml index 7102ceaae..6dc71310d 100644 --- a/frontend/wasm/Cargo.toml +++ b/frontend/wasm/Cargo.toml @@ -24,7 +24,6 @@ indexmap = "2.7" log.workspace = true miden-core.workspace = true midenc-hir.workspace = true -midenc-dialect-debuginfo.workspace = true midenc-dialect-hir.workspace = true midenc-dialect-cf.workspace = true midenc-dialect-ub.workspace = true diff --git a/frontend/wasm/src/component/lift_exports.rs b/frontend/wasm/src/component/lift_exports.rs index 26f8e4b01..2ddb961fc 100644 --- a/frontend/wasm/src/component/lift_exports.rs +++ b/frontend/wasm/src/component/lift_exports.rs @@ -5,11 +5,14 @@ use midenc_dialect_cf::ControlFlowOpBuilder; use midenc_dialect_hir::HirOpBuilder; use midenc_frontend_wasm_metadata::ProtocolExportKind; use midenc_hir::{ - DICompileUnit, DISubprogram, FunctionType, Ident, Op, OpExt, SmallVec, SourceSpan, SymbolPath, - ValueRange, ValueRef, Visibility, - dialects::builtin::{ - BuiltinOpBuilder, ComponentBuilder, ModuleBuilder, - attributes::{Signature, UnitAttr}, + FunctionType, Ident, Op, OpExt, SmallVec, SourceSpan, SymbolPath, ValueRange, ValueRef, + Visibility, + dialects::{ + builtin::{ + BuiltinOpBuilder, ComponentBuilder, ModuleBuilder, + attributes::{Signature, UnitAttr}, + }, + debuginfo::attributes::{CompileUnit, CompileUnitAttr, Subprogram, SubprogramAttr}, }, }; use midenc_session::{DiagnosticsHandler, diagnostics::Severity}; @@ -384,14 +387,14 @@ fn annotate_component_export_debug_signature( }; let file = midenc_hir::interner::Symbol::intern(""); - let mut compile_unit = DICompileUnit::new(midenc_hir::interner::Symbol::intern("wit"), file); + let mut compile_unit = CompileUnit::new(midenc_hir::interner::Symbol::intern("wit"), file); compile_unit.producer = Some(midenc_hir::interner::Symbol::intern("midenc-frontend-wasm")); let param_names = export_param_names .iter() .map(|name| midenc_hir::interner::Symbol::intern(name.as_str())); let subprogram = - DISubprogram::new(midenc_hir::interner::Symbol::intern(export_func_name), file, 1, Some(1)) + Subprogram::new(midenc_hir::interner::Symbol::intern(export_func_name), file, 1, Some(1)) .with_function_type(FunctionType { abi: export_func_ty.abi, params: export_func_ty.params.clone(), @@ -399,12 +402,8 @@ fn annotate_component_export_debug_signature( }) .with_param_names(param_names); - let cu_attr = context - .create_attribute::(compile_unit) - .as_attribute_ref(); - let sp_attr = context - .create_attribute::(subprogram) - .as_attribute_ref(); + let cu_attr = context.create_attribute::(compile_unit).as_attribute_ref(); + let sp_attr = context.create_attribute::(subprogram).as_attribute_ref(); let mut export_func = export_func_ref.borrow_mut(); let op = export_func.as_operation_mut(); diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs index 1d9c0e43e..5a924ca9d 100644 --- a/frontend/wasm/src/module/debug_info.rs +++ b/frontend/wasm/src/module/debug_info.rs @@ -7,8 +7,11 @@ use cranelift_entity::EntityRef; use gimli::{self, AttributeValue, read::Operation}; use log::debug; use midenc_hir::{ - DICompileUnit, DIExpression, DIExpressionOp, DILocalVariable, DISubprogram, FxHashMap, - SourceSpan, encode_frame_base_local_index, interner::Symbol, + FxHashMap, SourceSpan, + dialects::debuginfo::attributes::{ + CompileUnit, Expression, ExpressionOp, Subprogram, Variable, encode_frame_base_local_index, + }, + interner::Symbol, }; use midenc_session::diagnostics::{DiagnosticsHandler, IntoDiagnostic}; @@ -25,7 +28,7 @@ pub struct LocationDescriptor { pub start: u64, /// Exclusive end offset. `None` indicates the location is valid until the end of the function. pub end: Option, - pub storage: VariableStorage, + pub storage: Expression, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -54,21 +57,21 @@ impl VariableStorage { } } - pub fn to_expression_op(&self) -> DIExpressionOp { + pub fn to_expression_op(&self) -> ExpressionOp { match self { - VariableStorage::Local(idx) => DIExpressionOp::WasmLocal(*idx), - VariableStorage::Global(idx) => DIExpressionOp::WasmGlobal(*idx), - VariableStorage::Stack(idx) => DIExpressionOp::WasmStack(*idx), - VariableStorage::ConstU64(val) => DIExpressionOp::ConstU64(*val), + VariableStorage::Local(idx) => ExpressionOp::WasmLocal(*idx), + VariableStorage::Global(idx) => ExpressionOp::WasmGlobal(*idx), + VariableStorage::Stack(idx) => ExpressionOp::WasmStack(*idx), + VariableStorage::ConstU64(val) => ExpressionOp::ConstU64(*val), VariableStorage::FrameBase { global_index, byte_offset, - } => DIExpressionOp::FrameBase { + } => ExpressionOp::FrameBase { global_index: *global_index, byte_offset: *byte_offset, }, VariableStorage::Unsupported => { - DIExpressionOp::Unsupported(Symbol::intern("unsupported")) + ExpressionOp::Unsupported(Symbol::intern("unsupported")) } } } @@ -76,15 +79,15 @@ impl VariableStorage { #[derive(Clone)] pub struct LocalDebugInfo { - pub attr: DILocalVariable, + pub attr: Variable, pub locations: Vec, - pub expression: Option, + pub expression: Option, } #[derive(Clone)] pub struct FunctionDebugInfo { - pub compile_unit: DICompileUnit, - pub subprogram: DISubprogram, + pub compile_unit: CompileUnit, + pub subprogram: Subprogram, pub locals: Vec>, pub function_span: Option, pub location_schedule: Vec, @@ -103,11 +106,11 @@ struct DwarfLocalData { pub struct LocationScheduleEntry { pub offset: u64, pub var_index: usize, - pub storage: VariableStorage, + pub storage: Expression, } impl FunctionDebugInfo { - pub fn local_attr(&self, index: usize) -> Option<&DILocalVariable> { + pub fn local_attr(&self, index: usize) -> Option<&Variable> { self.locals.get(index).and_then(|info| info.as_ref().map(|data| &data.attr)) } } @@ -174,11 +177,11 @@ fn build_function_debug_info( let (file_symbol, directory_symbol) = determine_file_symbols(parsed_module, addr2line, body); let (line, column) = determine_location(addr2line, body.body_offset); - let mut compile_unit = DICompileUnit::new(Symbol::intern("wasm"), file_symbol); + let mut compile_unit = CompileUnit::new(Symbol::intern("wasm"), file_symbol); compile_unit.directory = directory_symbol; compile_unit.producer = Some(Symbol::intern("midenc-frontend-wasm")); - let mut subprogram = DISubprogram::new(func_name, compile_unit.file, line, column); + let mut subprogram = Subprogram::new(func_name, compile_unit.file, line, column); subprogram.is_definition = true; let wasm_signature = module_types[module.functions[func_index].signature].clone(); @@ -245,7 +248,7 @@ fn build_local_debug_info( func_index: FuncIndex, wasm_signature: &WasmFuncType, body: &FunctionBodyData, - subprogram: &DISubprogram, + subprogram: &Subprogram, diagnostics: &DiagnosticsHandler, dwarf_locals: Option<&FxHashMap>, frame_base_vars: Option<&Vec>, @@ -279,7 +282,7 @@ fn build_local_debug_info( name_symbol = symbol; } let mut attr = - DILocalVariable::new(name_symbol, subprogram.file, subprogram.line, subprogram.column); + Variable::new(name_symbol, subprogram.file, subprogram.line, subprogram.column); attr.arg_index = Some(param_idx as u32); if let Ok(ty) = ir_type(*wasm_ty, diagnostics) { attr.ty = Some(ty); @@ -299,8 +302,7 @@ fn build_local_debug_info( // Create expression from the first location if available let expression = if !locations.is_empty() { - let ops = vec![locations[0].storage.to_expression_op()]; - Some(DIExpression::with_ops(ops)) + Some(locations[0].storage.clone()) } else { None }; @@ -330,12 +332,8 @@ fn build_local_debug_info( { name_symbol = symbol; } - let mut attr = DILocalVariable::new( - name_symbol, - subprogram.file, - subprogram.line, - subprogram.column, - ); + let mut attr = + Variable::new(name_symbol, subprogram.file, subprogram.line, subprogram.column); let wasm_ty = convert_valtype(ty); if let Ok(ir_ty) = ir_type(wasm_ty, diagnostics) { attr.ty = Some(ir_ty); @@ -356,8 +354,7 @@ fn build_local_debug_info( // Create expression from the first location if available let expression = if !locations.is_empty() { - let ops = vec![locations[0].storage.to_expression_op()]; - Some(DIExpression::with_ops(ops)) + Some(locations[0].storage.clone()) } else { None }; @@ -377,14 +374,13 @@ fn build_local_debug_info( if let Some(fb_vars) = frame_base_vars { for fb_var in fb_vars { let name = fb_var.name.unwrap_or_else(|| Symbol::intern("?")); - let mut attr = - DILocalVariable::new(name, subprogram.file, subprogram.line, subprogram.column); + let mut attr = Variable::new(name, subprogram.file, subprogram.line, subprogram.column); if let Some(line) = fb_var.decl_line.filter(|l| *l != 0) { attr.line = line; } attr.column = fb_var.decl_column; let expression = if !fb_var.locations.is_empty() { - Some(DIExpression::with_ops(vec![fb_var.locations[0].storage.to_expression_op()])) + Some(fb_var.locations[0].storage.clone()) } else { None }; @@ -406,8 +402,11 @@ fn build_location_schedule(locals: &[Option]) -> Vec>( // For WasmLocal storage, use the index directly. // For FrameBase (DW_OP_fbreg), use the parameter order as // fallback since formal params map to WASM locals 0..N. - let local_index = storage.as_local().or(fallback_index); + let local_index = match storage.operations.as_slice() { + [ExpressionOp::WasmLocal(index)] => Some(*index), + _ => fallback_index, + }; if let Some(local_index) = local_index { locations.push(LocationDescriptor { start: low_pc, @@ -790,7 +792,8 @@ fn decode_variable_entry>( decl_column, }; return Ok(Some((local_index, data))); - } else if matches!(&storage, VariableStorage::FrameBase { .. }) { + } else if matches!(storage.operations.as_slice(), [ExpressionOp::FrameBase { .. }]) + { // FrameBase-only variable (no WASM local index, e.g. local `sum` // in debug builds). Collect separately instead of dropping. locations.push(LocationDescriptor { @@ -823,10 +826,12 @@ fn decode_variable_entry>( let storage_expr = entry.data; if let Some(storage) = decode_storage_from_expression(&storage_expr, unit, frame_base_global)? - && (storage.as_local().is_some() - || matches!(&storage, VariableStorage::FrameBase { .. })) + && matches!( + storage.operations.as_slice(), + [ExpressionOp::WasmLocal(_) | ExpressionOp::FrameBase { .. }] + ) { - if matches!(&storage, VariableStorage::FrameBase { .. }) { + if matches!(storage.operations.as_slice(), [ExpressionOp::FrameBase { .. }]) { has_frame_base = true; } locations.push(LocationDescriptor { @@ -840,7 +845,12 @@ fn decode_variable_entry>( return Ok(None); } // Try to find a WASM local index from any location descriptor - if let Some(local_index) = locations.iter().find_map(|desc| desc.storage.as_local()) { + if let Some(local_index) = + locations.iter().find_map(|desc| match desc.storage.operations.as_slice() { + [ExpressionOp::WasmLocal(index)] => Some(*index), + _ => None, + }) + { let data = DwarfLocalData { name: name_symbol, locations, @@ -871,34 +881,67 @@ fn decode_storage_from_expression>( expr: &gimli::Expression, unit: &gimli::Unit, frame_base_global: Option, -) -> gimli::Result> { +) -> gimli::Result> { let mut operations = expr.clone().operations(unit.encoding()); - let mut storage = None; + let mut storage = vec![]; while let Some(op) = operations.next()? { match op { - Operation::WasmLocal { index } => storage = Some(VariableStorage::Local(index)), - Operation::WasmGlobal { index } => storage = Some(VariableStorage::Global(index)), - Operation::WasmStack { index } => storage = Some(VariableStorage::Stack(index)), + Operation::WasmLocal { index } => storage.push(ExpressionOp::WasmLocal(index)), + Operation::WasmGlobal { index } => storage.push(ExpressionOp::WasmGlobal(index)), + Operation::WasmStack { index } => storage.push(ExpressionOp::WasmStack(index)), Operation::UnsignedConstant { value } => { - storage = Some(VariableStorage::ConstU64(value)) + storage.push(ExpressionOp::ConstU64(value)); + } + Operation::SignedConstant { value } => { + storage.push(ExpressionOp::ConstS64(value)); + } + Operation::PlusConstant { value } => { + storage.push(ExpressionOp::PlusUConst(value)); + } + Operation::StackValue => { + storage.push(ExpressionOp::StackValue); } - Operation::StackValue => {} Operation::FrameOffset { offset } => { - // DW_OP_fbreg(offset): variable is at frame_base + offset in - // WASM linear memory. The frame base is a WASM global - // (typically __stack_pointer = global 0). + // DW_OP_fbreg(offset): variable is at frame_base + offset in WASM linear memory. + // The frame base is a WASM global (typically __stack_pointer = global 0). if let Some(global_index) = frame_base_global { - storage = Some(VariableStorage::FrameBase { + storage.push(ExpressionOp::FrameBase { global_index, byte_offset: offset, }); } } - _ => {} + Operation::Address { address } => { + storage.push(ExpressionOp::Address { address }); + } + Operation::Piece { + size_in_bits, + bit_offset, + } => { + storage.push(ExpressionOp::BitPiece { + size: size_in_bits, + offset: bit_offset.unwrap_or_default(), + }); + } + Operation::Register { .. } => { + storage.push(ExpressionOp::Unsupported(Symbol::intern("DW_OP_breg(N)"))); + } + Operation::RegisterOffset { .. } => { + storage.push(ExpressionOp::Unsupported(Symbol::intern("DW_OP_bregx"))); + } + op => { + log::trace!(target: "dwarf", "unhandled expression op {op:?}"); + // Bail if we observe unhandled ops, as we cannot properly represent the expression + return Ok(None); + } } } - Ok(storage) + if storage.is_empty() { + Ok(None) + } else { + Ok(Some(Expression::with_ops(storage))) + } } fn func_local_index(func_index: FuncIndex, module: &Module) -> Option { diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index 4ed2a696a..ff32256a9 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -6,16 +6,21 @@ use cranelift_entity::{EntityRef as _, SecondaryMap}; use log::warn; use midenc_dialect_arith::ArithOpBuilder; use midenc_dialect_cf::ControlFlowOpBuilder; -use midenc_dialect_debuginfo::DebugInfoOpBuilder; use midenc_dialect_hir::HirOpBuilder; use midenc_dialect_ub::UndefinedBehaviorOpBuilder; use midenc_dialect_wasm::WasmOpBuilder; use midenc_hir::{ BlockRef, Builder, Context, EntityRef, FxHashMap, FxHashSet, Ident, Listener, ListenerType, Op, OpBuilder, OperationRef, ProgramPoint, RegionRef, SmallVec, SourceSpan, Type, ValueRef, - dialects::builtin::{ - BuiltinOpBuilder, FunctionBuilder, FunctionRef, - attributes::{LocalVariable, Signature}, + dialects::{ + builtin::{ + BuiltinOpBuilder, FunctionBuilder, FunctionRef, + attributes::{LocalVariable, Signature}, + }, + debuginfo::{ + DIBuilder, + attributes::{CompileUnitAttr, Expression, ExpressionOp, SubprogramAttr}, + }, }, interner::Symbol, traits::{BranchOpInterface, Terminator}, @@ -200,12 +205,12 @@ impl FunctionBuilderExt<'_, B> { // If DWARF didn't provide a location expression, synthesize one from the // wasm local index — we know this variable is stored as a wasm local. let expr = expr_opt.or_else(|| { - let ops = vec![midenc_hir::DIExpressionOp::WasmLocal(idx as u32)]; - Some(midenc_hir::DIExpression::with_ops(ops)) + let ops = vec![ExpressionOp::WasmLocal(idx as u32)]; + Some(Expression::with_ops(ops)) }); if let Err(err) = - DebugInfoOpBuilder::builder_mut(self).debug_value_with_expr(value, attr, expr, span) + DIBuilder::builder_mut(self).debug_value_with_expr(value, attr, expr, span) { warn!("failed to emit dbg.value for local {idx}: {err:?}"); } @@ -268,8 +273,6 @@ impl FunctionBuilderExt<'_, B> { } fn emit_scheduled_dbg_value(&mut self, entry: LocationScheduleEntry, span: SourceSpan) { - use crate::module::debug_info::VariableStorage; - // Skip variables already emitted as parameters to avoid duplicates. if self.param_dbg_emitted && self.param_values.iter().any(|(v, _)| v.index() == entry.var_index) @@ -280,7 +283,8 @@ impl FunctionBuilderExt<'_, B> { // Only emit debug values for variables that have already been defined. // Calling try_use_var on an undefined variable would insert block // parameters (phis) as a side effect, corrupting the CFG. - let is_frame_base = matches!(&entry.storage, VariableStorage::FrameBase { .. }); + let is_frame_base = + matches!(entry.storage.operations.as_slice(), [ExpressionOp::FrameBase { .. }]); if !is_frame_base && !self.defined_vars.contains(&(entry.var_index as u32)) { return; } @@ -291,10 +295,10 @@ impl FunctionBuilderExt<'_, B> { Err(_) => { if is_frame_base { // FrameBase-only variables have no WASM local, so no SSA value - // exists for them. The debuginfo.value op requires an SSA operand, - // so we attach an existing parameter value as an anchor. The MASM - // lowering ignores this operand when the DIExpression contains - // FrameBase — the location is fully described by the expression. + // exists for them. The di.value op requires an SSA operand, so we attach an + // existing parameter value as an anchor. The MASM lowering ignores this operand + // when the DIExpression contains FrameBase — the location is fully described by + // the expression. if let Some((_, v)) = self.param_values.first() { let anchor = *v; self.def_var(var, anchor); @@ -314,9 +318,10 @@ impl FunctionBuilderExt<'_, B> { }; // Create expression from the scheduled location - let expression = { - let ops = vec![entry.storage.to_expression_op()]; - Some(midenc_hir::DIExpression::with_ops(ops)) + let expression = if entry.storage.is_empty() { + None + } else { + Some(entry.storage) }; let Some(info) = self.debug_info.as_ref() else { @@ -331,8 +336,8 @@ impl FunctionBuilderExt<'_, B> { return; }; - if let Err(err) = DebugInfoOpBuilder::builder_mut(self) - .debug_value_with_expr(value, attr, expression, span) + if let Err(err) = + DIBuilder::builder_mut(self).debug_value_with_expr(value, attr, expression, span) { warn!("failed to emit scheduled dbg.value for local {idx}: {err:?}"); } @@ -659,10 +664,10 @@ impl FunctionBuilderExt<'_, B> { let info = info.borrow(); let context = self.inner.builder().context_rc(); let cu_attr = context - .create_attribute::(info.compile_unit.clone()) + .create_attribute::(info.compile_unit.clone()) .as_attribute_ref(); let sp_attr = context - .create_attribute::(info.subprogram.clone()) + .create_attribute::(info.subprogram.clone()) .as_attribute_ref(); let mut func = self.inner.func.borrow_mut(); let op = func.as_operation_mut(); @@ -766,7 +771,7 @@ impl<'f, B: ?Sized + Builder> BuiltinOpBuilder<'f, B> for FunctionBuilderExt<'f, } } -impl<'f, B: ?Sized + Builder> DebugInfoOpBuilder<'f, B> for FunctionBuilderExt<'f, B> { +impl<'f, B: ?Sized + Builder> DIBuilder<'f, B> for FunctionBuilderExt<'f, B> { #[inline(always)] fn builder(&self) -> &B { self.inner.builder() diff --git a/hir-analysis/src/analyses/liveness.rs b/hir-analysis/src/analyses/liveness.rs index c328ed314..24828c7f0 100644 --- a/hir-analysis/src/analyses/liveness.rs +++ b/hir-analysis/src/analyses/liveness.rs @@ -8,6 +8,7 @@ use midenc_hir::{ RegionBranchOpInterface, RegionBranchPoint, RegionRef, Report, Spanned, SymbolTable, ValueRef, dominance::DominanceInfo, pass::{Analysis, AnalysisManager, PreservedAnalyses}, + traits::Transparent, }; pub use self::next_use_set::NextUseSet; @@ -22,14 +23,6 @@ use crate::{ /// The distance penalty applied to an edge which exits a loop pub const LOOP_EXIT_DISTANCE: u32 = 100_000; -/// Returns `true` if the operation belongs to the debuginfo dialect. -/// -/// Debug info ops (debuginfo.debug_value, etc.) are purely observational — their -/// operands are not real uses and must not keep values alive. -fn is_debug_info_op(op: &Operation) -> bool { - op.name().dialect().as_str() == "debuginfo" -} - /// This analysis computes what values are live, and the distance to next use, for all program /// points in the given operation. It computes both live-in and live-out sets, in order to answer /// liveness questions about the state of the program at an operation, as well as questions about @@ -369,11 +362,10 @@ impl DenseBackwardDataFlowAnalysis for Liveness { } // Set the next-use distance of any operands to 0. - // Skip debug info ops: their operands are observational metadata and must - // not keep values alive, otherwise scf.if branches can end up with - // mismatched operand-stack sizes when one branch has a real use and the - // other only a debug use. - if !is_debug_info_op(op) { + // + // Ignore transparent operations, as such operations are purely informational, and are not + // considered to keep their operands live. + if !op.implements::() { for operand in op.operands().all().iter() { temp_live_in.insert(operand.borrow().as_value_ref(), 0); } diff --git a/hir-macros/src/operation.rs b/hir-macros/src/operation.rs index ae9ba14f5..d7876b2ff 100644 --- a/hir-macros/src/operation.rs +++ b/hir-macros/src/operation.rs @@ -1937,6 +1937,7 @@ impl quote::ToTokens for OpBuilderImpl { { #op_builder_new_doc #[inline(always)] + #[allow(unused)] pub fn new(builder: &'a mut B, span: ::midenc_hir::diagnostics::SourceSpan) -> Self { Self { builder, @@ -2237,6 +2238,7 @@ impl quote::ToTokens for OpVerifierImpl { _derived: ::core::marker::PhantomData<(#(&'a dyn #derived_traits,)* #(&'a dyn #implemented_traits),*)>, } impl<'a, T> OpVerifierImpl<'a, T> { + #[allow(unused)] const fn new(op: &'a ::midenc_hir::Operation) -> Self { Self { op, diff --git a/hir-transform/src/sink.rs b/hir-transform/src/sink.rs index d8b6782bf..716c0130a 100644 --- a/hir-transform/src/sink.rs +++ b/hir-transform/src/sink.rs @@ -2,38 +2,23 @@ use alloc::vec::Vec; use midenc_hir::{ Backward, Builder, EntityMut, Forward, FxHashSet, OpBuilder, Operation, OperationName, - OperationRef, ProgramPoint, RawWalk, Region, RegionBranchOpInterface, - RegionBranchTerminatorOpInterface, RegionRef, Report, SmallVec, Usable, Value, ValueRef, + OperationRef, ProgramPoint, RawWalk, Region, RegionBranchOpInterface, RegionRef, Report, + SmallVec, Usable, Value, ValueRef, adt::SmallDenseMap, dominance::DominanceInfo, matchers::{self, Matcher}, pass::{Pass, PassExecutionState, PostPassStatus}, - traits::{ConstantLike, Terminator}, + traits::{ConstantLike, Transparent}, }; -/// Returns `true` if the given operation belongs to the debuginfo dialect. +/// Check whether `operation` is the sole _non-transparent_ user of `value`. /// -/// Debug info operations (debuginfo.value, debuginfo.kill, etc.) are purely -/// observational and should not prevent optimizations such as sinking or DCE. -#[inline] -fn is_debug_info_op(op: &Operation) -> bool { - op.name().dialect().as_str() == "debuginfo" -} - -/// Check whether `operation` is the sole *non-debug-info* user of `value`. -/// -/// Debug uses are excluded because they are observational and should never -/// prevent value-producing operations from being moved or eliminated. -fn is_sole_non_debug_user(value: &dyn Value, operation: OperationRef) -> bool { +/// Ops that implement `Transparent` are excluded, because they are purely informational and their +/// uses are not considered for purposes of computing liveness. +fn is_sole_non_transparent_user(value: &dyn Value, operation: OperationRef) -> bool { value .iter_uses() - .all(|user| user.owner == operation || is_debug_info_op(&user.owner.borrow())) -} - -/// Returns `true` if the only remaining uses of the given value are debug info uses -/// (or the value is entirely unused). -fn has_only_debug_uses(value: &dyn Value) -> bool { - value.iter_uses().all(|user| is_debug_info_op(&user.owner.borrow())) + .all(|user| user.owner == operation && !user.owner.borrow().implements::()) } /// Erase all debug info operations that reference the given value. @@ -41,12 +26,17 @@ fn has_only_debug_uses(value: &dyn Value) -> bool { /// This is used before erasing a defining op whose result is only kept alive by /// debug uses. The debug ops are simply removed; the codegen emitter is also /// hardened to skip orphaned debug ops, so this is a best-effort cleanup. -fn erase_debug_users(value: ValueRef) { - let debug_ops: Vec = { +fn erase_transparent_users(value: ValueRef) { + let debug_ops: SmallVec<[OperationRef; 2]> = { let v = value.borrow(); v.iter_uses() - .filter(|user| is_debug_info_op(&user.owner.borrow())) - .map(|user| user.owner) + .filter_map(|user| { + if user.owner.borrow().implements::() { + Some(user.owner) + } else { + None + } + }) .collect() }; for mut op in debug_ops { @@ -264,9 +254,9 @@ impl Pass for SinkOperandDefs { for operand in op.operands().iter().rev() { let value = operand.borrow(); let value = value.value(); - // Exclude debug info uses when determining whether this is the sole - // user — debug ops are observational and should not prevent sinking. - let is_sole_user = is_sole_non_debug_user(&*value, operation); + // Exclude debug info uses when determining whether this is the sole user — + // transparent ops are observational and should not prevent sinking. + let is_sole_user = is_sole_non_transparent_user(&*value, operation); let Some(defining_op) = value.get_defining_op() else { // Skip block arguments, nothing to move in that situation @@ -321,23 +311,23 @@ impl Pass for SinkOperandDefs { let mut operation = sink_state.operation; let op = operation.borrow(); - // If this operation is unused (or only has debug info uses), remove it - // now if it has no side effects. - let is_memory_effect_free = - op.is_memory_effect_free() || op.implements::(); - let only_debug_uses = - !op.is_used() || op.results().iter().all(|r| has_only_debug_uses(&*r.borrow())); - if only_debug_uses - && is_memory_effect_free - && !op.implements::() - && !op.implements::() - && erased.insert(operation) - { + // Ignore transparent ops - we do not sink them directly + if op.implements::() { + continue; + } + + // If this operation is unused (or only has debug info uses), remove it now if it has no + // side effects. + // + // NOTE: We explicitly DO NOT remove transparent ops here, unless we're removing the + // defining op of the transparent operand + let has_real_uses = op.results().iter().any(|result| result.borrow().has_real_uses()); + if !has_real_uses && op.would_be_trivially_dead() && erased.insert(operation) { log::debug!(target: Self::NAME, "erasing unused, effect-free, non-terminator op {op}"); drop(op); // Erase any remaining debug uses before erasing the defining op for result in operation.borrow().results().iter() { - erase_debug_users(result.borrow().as_value_ref()); + erase_transparent_users(result.borrow().as_value_ref()); } operation.borrow_mut().erase(); continue; @@ -374,9 +364,9 @@ impl Pass for SinkOperandDefs { changed = PostPassStatus::Changed; // If no other non-debug uses of this value remain, then remove // the original operation, as it is now dead. - if has_only_debug_uses(&*operand_value.borrow()) { + if !operand_value.borrow().has_real_uses() { log::trace!(target: Self::NAME, " {operand_value} is no longer used, erasing definition"); - erase_debug_users(operand_value); + erase_transparent_users(operand_value); // Replacements are only ever for op results let mut defining_op = operand_value.borrow().get_defining_op().unwrap(); defining_op.borrow_mut().erase(); @@ -387,7 +377,7 @@ impl Pass for SinkOperandDefs { let value = operand_value.borrow(); // Exclude debug info uses when determining sole-user status. - let is_sole_user = is_sole_non_debug_user(&*value, operation); + let is_sole_user = is_sole_non_transparent_user(&*value, operation); let Some(mut defining_op) = value.get_defining_op() else { // Skip block arguments, nothing to move in that situation @@ -426,8 +416,12 @@ impl Pass for SinkOperandDefs { // The original op can be moved drop(def); drop(value); - defining_op.borrow_mut().move_to(*builder.insertion_point()); + let mut def_op = defining_op.borrow_mut(); + def_op.move_to(*builder.insertion_point()); sink_state.replacements.insert(operand_value, operand_value); + + // Move any transparent users of `defining_op` after it + move_transparent_users_to(&def_op, &[operation]); } } else if !is_sole_user || def.num_results() != 1 || !def.is_memory_effect_free() { // Skip this operand if the defining op cannot be safely moved @@ -450,12 +444,16 @@ impl Pass for SinkOperandDefs { drop(def); drop(value); log::trace!(target: Self::NAME, " defining op can be moved and has no other uses, moving into place"); - defining_op.borrow_mut().move_to(*builder.insertion_point()); + let mut def_op = defining_op.borrow_mut(); + def_op.move_to(*builder.insertion_point()); sink_state.replacements.insert(operand_value, operand_value); + // Move any transparent users of `defining_op` after it + move_transparent_users_to(&def_op, &[operation]); + drop(def_op); + // Enqueue the defining op to be visited before continuing with this op's operands log::trace!(target: Self::NAME, " enqueing defining op for immediate processing"); - //sink_state.ip = *builder.insertion_point(); sink_state.ip = ProgramPoint::before(operation); worklist.push_front(sink_state); worklist.push_front(OpOperandSink::new(defining_op)); @@ -469,6 +467,26 @@ impl Pass for SinkOperandDefs { } } +fn move_transparent_users_to(op: &Operation, exclude: &[OperationRef]) { + use midenc_hir::adt::SmallSet; + + let ip = ProgramPoint::after(op.as_operation_ref()); + let mut visited = SmallSet::<_, 4>::from_iter(exclude.iter().copied()); + for result in op.results().iter() { + let result = result.borrow(); + for user in result.iter_uses() { + if !visited.insert(user.owner) { + continue; + } + let mut user = user.owner; + let mut user = user.borrow_mut(); + if user.implements::() { + user.move_to(ip); + } + } + } +} + struct OpOperandSink { operation: OperationRef, ip: ProgramPoint, @@ -530,12 +548,12 @@ where self.num_sunk } - /// Given a region and an op which dominates the region, returns true if all - /// *non-debug-info* users of the given op are dominated by the entry block - /// of the region, and thus the operation can be sunk into the region. + /// Given a region and an op which dominates the region, returns true if all _non-transparent_ + /// users of the given op are dominated by the entry block of the region, and thus the operation + /// can be sunk into the region. /// - /// Debug info uses are excluded because they are observational and should - /// not prevent control-flow sinking. + /// Transparent uses are excluded because they are observational and should not prevent + /// control-flow sinking. fn all_users_dominated_by(&self, op: &Operation, region: &Region) -> bool { assert!( region.find_ancestor_op(op.as_operation_ref()).is_none(), @@ -547,7 +565,7 @@ where result.iter_uses().all(|user| { // Skip debug info users — they are observational and should not // prevent sinking. - if is_debug_info_op(&user.owner.borrow()) { + if user.owner.borrow().implements::() { return true; } // The user is dominated by the region if its containing block is dominated @@ -590,15 +608,11 @@ where (all_users_dominated_by, should_move_into_region) }; if all_users_dominated_by && should_move_into_region { - // Before moving, erase any debug info ops outside the target region - // that reference results of this op — they would violate dominance - // after the move. - for result in op.borrow().results().iter() { - erase_debug_users(result.borrow().as_value_ref()); - } - (self.move_into_region)(op, region); + // Move all transparent users of `op` into the region after it + move_transparent_users_to(&op.borrow(), &[user.as_operation_ref()]); + self.num_sunk += 1; // Add the op to the work queue diff --git a/hir/Cargo.toml b/hir/Cargo.toml index 49b27171e..727e1a80e 100644 --- a/hir/Cargo.toml +++ b/hir/Cargo.toml @@ -46,5 +46,6 @@ thiserror.workspace = true [dev-dependencies] # NOTE: Use local paths for dev-only dependency to avoid relying on crates.io during packaging -pretty_assertions = "1.0" +midenc-expect-test = { path = "../tools/expect-test" } midenc-log = { path = "../midenc-log" } +pretty_assertions = "1.0" diff --git a/hir/src/attributes.rs b/hir/src/attributes.rs index 52924d175..91fb82bdf 100644 --- a/hir/src/attributes.rs +++ b/hir/src/attributes.rs @@ -1,6 +1,5 @@ mod attribute; -pub mod debug; mod named_attribute; mod traits; -pub use self::{attribute::*, debug::*, named_attribute::*, traits::*}; +pub use self::{attribute::*, named_attribute::*, traits::*}; diff --git a/hir/src/attributes/debug.rs b/hir/src/attributes/debug.rs deleted file mode 100644 index 44493f8f1..000000000 --- a/hir/src/attributes/debug.rs +++ /dev/null @@ -1,383 +0,0 @@ -use alloc::{format, sync::Arc, vec::Vec}; - -use crate::{ - AttrPrinter, Type, - derive::DialectAttribute, - dialects::builtin::BuiltinDialect, - formatter::{Document, PrettyPrint, const_text, text}, - interner::Symbol, - print::AsmPrinter, -}; - -/// Represents the compilation unit associated with debug information. -/// -/// The fields in this struct are intentionally aligned with the subset of -/// DWARF metadata we currently care about when tracking variable locations. -#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] -#[attribute(dialect = BuiltinDialect, implements(AttrPrinter))] -pub struct DICompileUnit { - pub language: Symbol, - pub file: Symbol, - pub directory: Option, - pub producer: Option, - pub optimized: bool, -} - -impl Default for DICompileUnit { - fn default() -> Self { - Self { - language: crate::interner::symbols::Empty, - file: crate::interner::symbols::Empty, - directory: None, - producer: None, - optimized: false, - } - } -} - -impl DICompileUnit { - pub fn new(language: Symbol, file: Symbol) -> Self { - Self { - language, - file, - directory: None, - producer: None, - optimized: false, - } - } -} - -impl AttrPrinter for DICompileUnitAttr { - fn print(&self, printer: &mut AsmPrinter<'_>) { - *printer += self.value.render(); - } -} - -impl PrettyPrint for DICompileUnit { - fn render(&self) -> Document { - let mut doc = const_text("di.compile_unit(") - + text(format!("language = {}", self.language.as_str())) - + const_text(", file = ") - + text(self.file.as_str()); - - if let Some(directory) = self.directory { - doc = doc + const_text(", directory = ") + text(directory.as_str()); - } - if let Some(producer) = self.producer { - doc = doc + const_text(", producer = ") + text(producer.as_str()); - } - if self.optimized { - doc += const_text(", optimized"); - } - - doc + const_text(")") - } -} - -/// Represents a subprogram (function) scope for debug information. -/// The compile unit is not embedded but typically stored separately on the module. -#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] -#[attribute(dialect = BuiltinDialect, implements(AttrPrinter))] -pub struct DISubprogram { - pub name: Symbol, - pub linkage_name: Option, - pub file: Symbol, - pub line: u32, - pub column: Option, - pub is_definition: bool, - pub is_local: bool, - pub ty: Option, - pub param_names: Vec, -} - -impl Default for DISubprogram { - fn default() -> Self { - Self { - name: crate::interner::symbols::Empty, - linkage_name: None, - file: crate::interner::symbols::Empty, - line: 0, - column: None, - is_definition: false, - is_local: false, - ty: None, - param_names: Vec::new(), - } - } -} - -impl DISubprogram { - pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { - Self { - name, - linkage_name: None, - file, - line, - column, - is_definition: true, - is_local: false, - ty: None, - param_names: Vec::new(), - } - } - - pub fn with_function_type(mut self, ty: crate::FunctionType) -> Self { - self.ty = Some(Type::Function(Arc::new(ty))); - self - } - - pub fn with_param_names(mut self, names: I) -> Self - where - I: IntoIterator, - { - self.param_names = names.into_iter().collect(); - self - } -} - -impl AttrPrinter for DISubprogramAttr { - fn print(&self, printer: &mut AsmPrinter<'_>) { - *printer += self.value.render(); - } -} - -impl PrettyPrint for DISubprogram { - fn render(&self) -> Document { - let mut doc = const_text("di.subprogram(") - + text(format!("name = {}", self.name.as_str())) - + const_text(", file = ") - + text(self.file.as_str()) - + const_text(", line = ") - + text(format!("{}", self.line)); - - if let Some(column) = self.column { - doc = doc + const_text(", column = ") + text(format!("{}", column)); - } - if let Some(linkage) = self.linkage_name { - doc = doc + const_text(", linkage = ") + text(linkage.as_str()); - } - if let Some(ty) = &self.ty { - doc = doc + const_text(", ty = ") + ty.render(); - } - if !self.param_names.is_empty() { - let names = - self.param_names.iter().map(|name| name.as_str()).collect::>().join(", "); - doc = doc + const_text(", params = [") + text(names) + const_text("]"); - } - if self.is_definition { - doc += const_text(", definition"); - } - if self.is_local { - doc += const_text(", local"); - } - - doc + const_text(")") - } -} - -/// Represents a local variable debug record. -/// The scope (DISubprogram) is not embedded but instead stored on the containing function. -#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] -#[attribute(dialect = BuiltinDialect, implements(AttrPrinter))] -pub struct DILocalVariable { - pub name: Symbol, - pub arg_index: Option, - pub file: Symbol, - pub line: u32, - pub column: Option, - pub ty: Option, -} - -impl Default for DILocalVariable { - fn default() -> Self { - Self { - name: crate::interner::symbols::Empty, - arg_index: None, - file: crate::interner::symbols::Empty, - line: 0, - column: None, - ty: None, - } - } -} - -impl DILocalVariable { - pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { - Self { - name, - arg_index: None, - file, - line, - column, - ty: None, - } - } -} - -impl AttrPrinter for DILocalVariableAttr { - fn print(&self, printer: &mut AsmPrinter<'_>) { - *printer += self.value.render(); - } -} - -impl PrettyPrint for DILocalVariable { - fn render(&self) -> Document { - let mut doc = const_text("di.local_variable(") - + text(format!("name = {}", self.name.as_str())) - + const_text(", file = ") - + text(self.file.as_str()) - + const_text(", line = ") - + text(format!("{}", self.line)); - - if let Some(column) = self.column { - doc = doc + const_text(", column = ") + text(format!("{}", column)); - } - if let Some(arg_index) = self.arg_index { - doc = doc + const_text(", arg = ") + text(format!("{}", arg_index)); - } - if let Some(ty) = &self.ty { - doc = doc + const_text(", ty = ") + ty.render(); - } - - doc + const_text(")") - } -} - -/// Represents DWARF expression operations for describing variable locations -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub enum DIExpressionOp { - /// DW_OP_WASM_location 0x00 - Variable is in a WebAssembly local - WasmLocal(u32), - /// DW_OP_WASM_location 0x01 - Variable is in a WebAssembly global - WasmGlobal(u32), - /// DW_OP_WASM_location 0x02 - Variable is on the WebAssembly operand stack - WasmStack(u32), - /// DW_OP_constu - Unsigned constant value - ConstU64(u64), - /// DW_OP_consts - Signed constant value - ConstS64(i64), - /// DW_OP_plus_uconst - Add unsigned constant to top of stack - PlusUConst(u64), - /// DW_OP_minus - Subtract top two stack values - Minus, - /// DW_OP_plus - Add top two stack values - Plus, - /// DW_OP_deref - Dereference the address at top of stack - Deref, - /// DW_OP_stack_value - The value on the stack is the value of the variable - StackValue, - /// DW_OP_piece - Describes a piece of a variable - Piece(u64), - /// DW_OP_bit_piece - Describes a piece of a variable in bits - BitPiece { size: u64, offset: u64 }, - /// DW_OP_fbreg - Frame base register + offset. - /// The variable is in WASM linear memory at `value_of(global[global_index]) + byte_offset`. - FrameBase { global_index: u32, byte_offset: i64 }, - /// Placeholder for unsupported operations - Unsupported(Symbol), -} - -/// High-bit marker used to carry a Wasm-local frame base through the existing -/// `FrameBase { global_index, byte_offset }` debug-location shape without -/// changing the VM-facing `DebugVarLocation` ABI. -/// -/// Before MASM lowering completes, the low bits hold a raw Wasm local index. -/// After local patching, the low 16 bits hold the signed FMP-relative offset of -/// the Miden local containing the frame-base byte address. -pub const FRAME_BASE_LOCAL_MARKER: u32 = 1 << 31; - -pub fn encode_frame_base_local_index(local_index: u32) -> Option { - if local_index < FRAME_BASE_LOCAL_MARKER { - Some(FRAME_BASE_LOCAL_MARKER | local_index) - } else { - None - } -} - -pub fn decode_frame_base_local_index(encoded: u32) -> Option { - (encoded & FRAME_BASE_LOCAL_MARKER != 0).then_some(encoded & !FRAME_BASE_LOCAL_MARKER) -} - -pub fn encode_frame_base_local_offset(local_offset: i16) -> u32 { - FRAME_BASE_LOCAL_MARKER | u16::from_le_bytes(local_offset.to_le_bytes()) as u32 -} - -pub fn decode_frame_base_local_offset(encoded: u32) -> Option { - if encoded & FRAME_BASE_LOCAL_MARKER == 0 { - return None; - } - let low_bits = (encoded & 0xffff) as u16; - Some(i16::from_le_bytes(low_bits.to_le_bytes())) -} - -/// Represents a DWARF expression that describes how to compute or locate a variable's value -#[derive(DialectAttribute, Clone, Debug, Default, PartialEq, Eq, Hash)] -#[attribute(dialect = BuiltinDialect, implements(AttrPrinter))] -pub struct DIExpression { - pub operations: Vec, -} - -impl DIExpression { - pub fn new() -> Self { - Self { - operations: Vec::new(), - } - } - - pub fn with_ops(operations: Vec) -> Self { - Self { operations } - } - - pub fn is_empty(&self) -> bool { - self.operations.is_empty() - } -} - -impl AttrPrinter for DIExpressionAttr { - fn print(&self, printer: &mut AsmPrinter<'_>) { - *printer += self.value.render(); - } -} - -impl PrettyPrint for DIExpression { - fn render(&self) -> Document { - if self.operations.is_empty() { - return const_text("di.expression()"); - } - - let mut doc = const_text("di.expression("); - for (i, op) in self.operations.iter().enumerate() { - if i > 0 { - doc += const_text(", "); - } - doc += match op { - DIExpressionOp::WasmLocal(idx) => text(format!("DW_OP_WASM_local {}", idx)), - DIExpressionOp::WasmGlobal(idx) => text(format!("DW_OP_WASM_global {}", idx)), - DIExpressionOp::WasmStack(idx) => text(format!("DW_OP_WASM_stack {}", idx)), - DIExpressionOp::ConstU64(val) => text(format!("DW_OP_constu {}", val)), - DIExpressionOp::ConstS64(val) => text(format!("DW_OP_consts {}", val)), - DIExpressionOp::PlusUConst(val) => text(format!("DW_OP_plus_uconst {}", val)), - DIExpressionOp::Minus => const_text("DW_OP_minus"), - DIExpressionOp::Plus => const_text("DW_OP_plus"), - DIExpressionOp::Deref => const_text("DW_OP_deref"), - DIExpressionOp::StackValue => const_text("DW_OP_stack_value"), - DIExpressionOp::Piece(size) => text(format!("DW_OP_piece {}", size)), - DIExpressionOp::BitPiece { size, offset } => { - text(format!("DW_OP_bit_piece {} {}", size, offset)) - } - DIExpressionOp::FrameBase { - global_index, - byte_offset, - } => { - if let Some(local_index) = decode_frame_base_local_index(*global_index) { - text(format!("DW_OP_fbreg local[{}]{:+}", local_index, byte_offset)) - } else { - text(format!("DW_OP_fbreg global[{}]{:+}", global_index, byte_offset)) - } - } - DIExpressionOp::Unsupported(name) => text(name.as_str()), - }; - } - doc + const_text(")") - } -} diff --git a/hir/src/dialects.rs b/hir/src/dialects.rs index 225bb47dd..abeae970c 100644 --- a/hir/src/dialects.rs +++ b/hir/src/dialects.rs @@ -1,2 +1,3 @@ pub mod builtin; +pub mod debuginfo; pub mod test; diff --git a/hir/src/dialects/builtin/builders.rs b/hir/src/dialects/builtin/builders.rs index 44c2072f9..b9fbb36fd 100644 --- a/hir/src/dialects/builtin/builders.rs +++ b/hir/src/dialects/builtin/builders.rs @@ -86,9 +86,6 @@ pub trait BuiltinOpBuilder<'f, B: ?Sized + Builder> { op_builder(arg) } - // Note: dbg_value / dbg_value_with_expr have moved to DebugInfoOpBuilder - // in the midenc-dialect-debuginfo crate. Use debug_value / debug_value_with_expr there. - fn builder(&self) -> &B; fn builder_mut(&mut self) -> &mut B; } diff --git a/hir/src/dialects/debuginfo.rs b/hir/src/dialects/debuginfo.rs new file mode 100644 index 000000000..5c993a472 --- /dev/null +++ b/hir/src/dialects/debuginfo.rs @@ -0,0 +1,71 @@ +//! This module defines a first-class dialect for tracking source-level debug information through +//! compiler transformations. +//! +//! Inspired by [Mojo's DebugInfo dialect], this dialect makes debug variable tracking a first-class +//! citizen of the IR, using SSA use-def chains to enforce correctness. +//! +//! ## Motivation +//! +//! Traditional approaches to debug info in MLIR-like compilers (e.g. Flang/FIR) treat debug +//! information as metadata or attributes — second-class citizens that transforms are free to +//! silently drop. The consequences: +//! +//! - Transforms can silently lose debug info with no verifier catching it +//! - No mechanism forces transform authors to update debug info +//! - Debug info quality degrades as the optimizer gets more aggressive +//! +//! ## Approach: SSA-Based Debug Info +//! +//! This dialect defines debug operations as real IR operations with SSA operands: +//! +//! - **`di.value`** — Records the current value of a source variable. Uses an SSA value operand, +//! so deleting the value without updating debug uses is a hard error. +//! +//! - **`di.declare`** — Records the storage address of a source variable. Similarly uses an SSA +//! operand for the address. +//! +//! - **`di.kill`** — Marks a variable as dead, giving the debugger precise lifetime boundaries +//! instead of scope-based heuristics. +//! +//! ## Transform Hooks +//! +//! The [`transform`] module provides utilities that make it easy for transform authors to maintain +//! debug info: +//! +//! - **Simple replacements** are handled automatically via `replace_all_uses_with` +//! - **Complex transforms** use [`salvage_debug_info`](transform::salvage_debug_info) where the +//! transform author only describes the *inverse* of their transformation +//! - **Value deletion** without a replacement emits `di.kill` automatically +//! +//! ## Design Pillars (as inherited from Mojo) +//! +//! 1. **SSA use-def chains** — debug values participate in standard use-def tracking +//! 2. **Expression trees** — `DIExpressionAttr` describes how to recover source values from +//! transformed IR values (encode the inverse transformation) +//! 3. **Explicit lifetimes** — `debuginfo.kill` for precise variable death points +//! +//! For historical context, you may be interested in the slides from Mojo's debugging talk, where +//! they discuss its debug info dialect. [You can find that here](https://llvm.org/devmtg/2024-04/slides/TechnicalTalks/MojoDebugging.pdf). +pub mod attributes; +mod builders; +mod ops; +pub mod transform; + +pub use self::{builders::DIBuilder, ops::*}; +use crate::{ + DialectInfo, + derive::{Dialect, DialectRegistration}, +}; + +/// The DebugInfo dialect — first-class debug variable tracking. +/// +/// This dialect provides operations for tracking source-level variables through +/// compiler transformations using SSA semantics. Unlike metadata-based approaches, +/// debug info here participates in standard use-def chains, making it impossible +/// for transforms to silently drop debug information. +#[derive(Debug, Dialect, DialectRegistration)] +#[dialect(name = "di")] +pub struct DebugInfoDialect { + #[dialect(info)] + info: DialectInfo, +} diff --git a/hir/src/dialects/debuginfo/attributes.rs b/hir/src/dialects/debuginfo/attributes.rs new file mode 100644 index 000000000..ded85bb15 --- /dev/null +++ b/hir/src/dialects/debuginfo/attributes.rs @@ -0,0 +1,15 @@ +mod compile_unit; +mod expression; +mod subprogram; +mod variable; + +pub use self::{ + compile_unit::{CompileUnit, CompileUnitAttr}, + expression::{ + Expression, ExpressionAttr, ExpressionOp, FRAME_BASE_LOCAL_MARKER, + decode_frame_base_local_index, decode_frame_base_local_offset, + encode_frame_base_local_index, encode_frame_base_local_offset, + }, + subprogram::{Subprogram, SubprogramAttr}, + variable::{Variable, VariableAttr}, +}; diff --git a/hir/src/dialects/debuginfo/attributes/compile_unit.rs b/hir/src/dialects/debuginfo/attributes/compile_unit.rs new file mode 100644 index 000000000..fb3621011 --- /dev/null +++ b/hir/src/dialects/debuginfo/attributes/compile_unit.rs @@ -0,0 +1,125 @@ +use crate::{ + AttrPrinter, attributes::AttrParser, derive::DialectAttribute, + dialects::debuginfo::DebugInfoDialect, interner::Symbol, print::AsmPrinter, +}; + +/// Represents the compilation unit associated with debug information. +/// +/// The fields in this struct are intentionally aligned with the subset of +/// DWARF metadata we currently care about when tracking variable locations. +#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] +#[attribute(dialect = DebugInfoDialect, implements(AttrPrinter))] +pub struct CompileUnit { + pub language: Symbol, + pub file: Symbol, + pub directory: Option, + pub producer: Option, + pub optimized: bool, +} + +impl Default for CompileUnit { + fn default() -> Self { + Self { + language: crate::interner::symbols::Empty, + file: crate::interner::symbols::Empty, + directory: None, + producer: None, + optimized: false, + } + } +} + +impl CompileUnit { + pub fn new(language: Symbol, file: Symbol) -> Self { + Self { + language, + file, + directory: None, + producer: None, + optimized: false, + } + } +} + +impl AttrPrinter for CompileUnitAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + use crate::formatter::*; + + *printer += const_text("{ "); + + *printer += const_text("language") + const_text(" = "); + printer.print_string(self.language.as_str()); + *printer += const_text(", "); + + *printer += const_text("file") + const_text(" = "); + printer.print_string(self.file.as_str()); + + if let Some(directory) = self.directory { + *printer += const_text(", "); + *printer += const_text("directory") + const_text(" = "); + printer.print_string(directory.as_str()); + } + + if let Some(producer) = self.producer { + *printer += const_text(", "); + *printer += const_text("producer") + const_text(" = "); + printer.print_string(producer.as_str()); + } + + *printer += const_text(", "); + *printer += const_text("optimized") + const_text(" = "); + printer.print_bool(self.optimized); + + *printer += const_text(" }"); + } +} + +impl AttrParser for CompileUnitAttr { + fn parse( + parser: &mut dyn crate::parse::Parser<'_>, + ) -> crate::parse::ParseResult { + use crate::parse::Token; + + parser.parse_lbrace()?; + + parser.parse_custom_keyword("language")?; + parser.parse_equal()?; + let language = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + parser.parse_custom_keyword("file")?; + parser.parse_equal()?; + let file = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + let mut unit = CompileUnit::new(language.into(), file.into()); + + if parser.parse_optional_custom_keyword("directory")?.is_some() { + parser.parse_equal()?; + unit.directory = Some(parser.parse_string()?.into_inner().into()); + parser.parse_comma()?; + } + if parser.parse_optional_custom_keyword("producer")?.is_some() { + parser.parse_equal()?; + unit.producer = Some(parser.parse_string()?.into_inner().into()); + parser.parse_comma()?; + } + if parser.parse_optional_custom_keyword("optimized")?.is_some() { + parser.parse_equal()?; + unit.optimized = parser + .token_stream_mut() + .expect_map("boolean", |tok| match tok { + Token::True => Some(true), + Token::False => Some(false), + _ => None, + })? + .into_inner(); + } + + parser.parse_rbrace()?; + + let attr = parser.context_rc().create_attribute::(unit); + + Ok(attr.as_attribute_ref()) + } +} diff --git a/hir/src/dialects/debuginfo/attributes/expression.rs b/hir/src/dialects/debuginfo/attributes/expression.rs new file mode 100644 index 000000000..64ca59599 --- /dev/null +++ b/hir/src/dialects/debuginfo/attributes/expression.rs @@ -0,0 +1,471 @@ +use alloc::{format, string::ToString, vec::Vec}; + +use crate::{ + AttrPrinter, attributes::AttrParser, derive::DialectAttribute, + dialects::debuginfo::DebugInfoDialect, interner::Symbol, parse::ParserExt, print::AsmPrinter, +}; + +/// Represents DWARF expression operations for describing variable locations +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum ExpressionOp { + /// DW_OP_WASM_location 0x00 - Variable is in a WebAssembly local + WasmLocal(u32) = 0, + /// DW_OP_WASM_location 0x01 - Variable is in a WebAssembly global + WasmGlobal(u32) = 1, + /// DW_OP_WASM_location 0x02 - Variable is on the WebAssembly operand stack + WasmStack(u32) = 2, + /// DW_OP_constu - Unsigned constant value + ConstU64(u64) = 3, + /// DW_OP_consts - Signed constant value + ConstS64(i64) = 4, + /// DW_OP_plus_uconst - Add unsigned constant to top of stack + PlusUConst(u64) = 5, + /// DW_OP_minus - Subtract top two stack values + Minus = 6, + /// DW_OP_plus - Add top two stack values + Plus = 7, + /// DW_OP_deref - Dereference the address at top of stack + Deref = 8, + /// DW_OP_stack_value - The value on the stack is the value of the variable + StackValue = 9, + /// DW_OP_piece - Describes a piece of a variable + Piece(u64) = 10, + /// DW_OP_bit_piece - Describes a piece of a variable in bits + BitPiece { size: u64, offset: u64 } = 11, + /// DW_OP_fbreg - Frame base register + offset. + /// The variable is in WASM linear memory at `value_of(global[global_index]) + byte_offset`. + FrameBase { global_index: u32, byte_offset: i64 } = 12, + /// DW_OP_addr - pushes memory address `address` on the expression operand stack + Address { address: u64 } = 13, + /// Placeholder for unsupported operations + Unsupported(Symbol) = u8::MAX, +} + +impl ExpressionOp { + const fn tag(&self) -> u8 { + // SAFETY: This is safe because we have given this enum a + // primitive representation with #[repr(u8)], with the first + // field of the underlying union-of-structs the discriminant + // + // See the section on "accessing the numeric value of the discriminant" + // here: https://doc.rust-lang.org/std/mem/fn.discriminant.html + unsafe { *(self as *const Self).cast::() } + } +} + +impl miden_core::serde::Serializable for ExpressionOp { + fn write_into(&self, target: &mut W) { + target.write_u8(self.tag()); + match self { + Self::WasmLocal(idx) | Self::WasmGlobal(idx) | Self::WasmStack(idx) => { + target.write_u32(*idx); + } + Self::ConstU64(val) | Self::PlusUConst(val) | Self::Piece(val) => { + target.write_u64(*val); + } + Self::ConstS64(val) => { + target.write_u64(*val as u64); + } + Self::Minus | Self::Plus | Self::Deref | Self::StackValue => (), + Self::BitPiece { size, offset } => { + target.write_u64(*size); + target.write_u64(*offset); + } + Self::FrameBase { + global_index, + byte_offset, + } => { + target.write_u32(*global_index); + target.write_u64(*byte_offset as u64); + } + Self::Address { address } => { + target.write_u64(*address); + } + Self::Unsupported(name) => { + target.write_usize(name.as_str().len()); + target.write_bytes(name.as_str().as_bytes()); + } + } + } +} + +impl miden_core::serde::Deserializable for ExpressionOp { + fn read_from( + source: &mut R, + ) -> Result { + use miden_core::serde::DeserializationError; + + Ok(match source.read_u8()? { + 0 => Self::WasmLocal(u32::read_from(source)?), + 1 => Self::WasmGlobal(u32::read_from(source)?), + 2 => Self::WasmStack(u32::read_from(source)?), + 3 => Self::ConstU64(u64::read_from(source)?), + 4 => Self::ConstS64(u64::read_from(source)? as i64), + 5 => Self::PlusUConst(u64::read_from(source)?), + 6 => Self::Minus, + 7 => Self::Plus, + 8 => Self::Deref, + 9 => Self::StackValue, + 10 => Self::Piece(u64::read_from(source)?), + 11 => { + let size = u64::read_from(source)?; + let offset = u64::read_from(source)?; + Self::BitPiece { size, offset } + } + 12 => { + let global_index = u32::read_from(source)?; + let byte_offset = u64::read_from(source)? as i64; + Self::FrameBase { + global_index, + byte_offset, + } + } + 13 => { + let address = u64::read_from(source)?; + Self::Address { address } + } + u8::MAX => { + let len = usize::read_from(source)?; + let bytes = source.read_slice(len)?; + let s = core::str::from_utf8(bytes) + .map_err(|err| DeserializationError::InvalidValue(err.to_string()))?; + Self::Unsupported(Symbol::intern(s)) + } + invalid => { + return Err(DeserializationError::InvalidValue(format!( + "unknown DIExpressionOp tag '{invalid}'" + ))); + } + }) + } +} + +impl crate::formatter::PrettyPrint for ExpressionOp { + fn render(&self) -> crate::formatter::Document { + use crate::formatter::*; + match self { + Self::WasmLocal(idx) => { + const_text("DW_OP_WASM_local") + const_text("(") + display(idx) + const_text(")") + } + Self::WasmGlobal(idx) => { + const_text("DW_OP_WASM_global") + const_text("(") + display(idx) + const_text(")") + } + Self::WasmStack(idx) => { + const_text("DW_OP_WASM_stack") + const_text("(") + display(idx) + const_text(")") + } + Self::ConstU64(val) => { + const_text("DW_OP_constu") + const_text("(") + display(val) + const_text(")") + } + Self::ConstS64(val) => { + const_text("DW_OP_consts") + const_text("(") + display(val) + const_text(")") + } + Self::PlusUConst(val) => { + const_text("DW_OP_plus_uconst") + const_text("(") + display(val) + const_text(")") + } + Self::Minus => const_text("DW_OP_minus"), + Self::Plus => const_text("DW_OP_plus"), + Self::Deref => const_text("DW_OP_deref"), + Self::StackValue => const_text("DW_OP_stack_value"), + Self::Piece(size) => { + const_text("DW_OP_piece") + const_text("(") + display(*size) + const_text(")") + } + Self::BitPiece { size, offset } => { + const_text("DW_OP_bit_piece") + + const_text("(") + + display(*size) + + const_text(",") + + display(*offset) + + const_text(")") + } + Self::FrameBase { + global_index, + byte_offset, + } => { + if let Some(local_index) = decode_frame_base_local_index(*global_index) { + const_text("DW_OP_fbreg(local, ") + + text(format!("{local_index}{byte_offset:+}")) + + const_text(")") + } else { + const_text("DW_OP_fbreg(global, ") + + text(format!("{global_index}{byte_offset:+}")) + + const_text(")") + } + } + Self::Address { address } => { + const_text("DW_OP_addr") + const_text("(") + display(*address) + const_text(")") + } + Self::Unsupported(name) => const_text(name.as_str()), + } + } +} + +impl ExpressionOp { + fn parse(parser: &mut dyn crate::parse::Parser<'_>) -> crate::parse::ParseResult { + use crate::parse::Token; + + let mut op = parser + .token_stream_mut() + .expect_map("DIExpression operator", |tok| match tok { + Token::BareIdent(id) => match id { + "DW_OP_WASM_local" => Some(ExpressionOp::WasmLocal(0)), + "DW_OP_WASM_global" => Some(ExpressionOp::WasmGlobal(0)), + "DW_OP_WASM_stack" => Some(ExpressionOp::WasmStack(0)), + "DW_OP_constu" => Some(ExpressionOp::ConstU64(0)), + "DW_OP_consts" => Some(ExpressionOp::ConstS64(0)), + "DW_OP_plus_uconst" => Some(ExpressionOp::PlusUConst(0)), + "DW_OP_minus" => Some(ExpressionOp::Minus), + "DW_OP_plus" => Some(ExpressionOp::Plus), + "DW_OP_deref" => Some(ExpressionOp::Deref), + "DW_OP_stack_value" => Some(ExpressionOp::StackValue), + "DW_OP_piece" => Some(ExpressionOp::Piece(0)), + "DW_OP_bit_piece" => Some(ExpressionOp::BitPiece { size: 0, offset: 0 }), + "DW_OP_fbreg" => Some(ExpressionOp::FrameBase { + global_index: 0, + byte_offset: 0, + }), + "DW_OP_addr" => Some(ExpressionOp::Address { address: 0 }), + other => Some(ExpressionOp::Unsupported(Symbol::intern(other))), + }, + _ => None, + })? + .into_inner(); + match &mut op { + ExpressionOp::WasmLocal(idx) + | ExpressionOp::WasmGlobal(idx) + | ExpressionOp::WasmStack(idx) => { + parser.parse_lparen()?; + *idx = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_rparen()?; + } + ExpressionOp::ConstU64(val) + | ExpressionOp::PlusUConst(val) + | ExpressionOp::Piece(val) + | ExpressionOp::Address { address: val } => { + parser.parse_lparen()?; + *val = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_rparen()?; + } + ExpressionOp::ConstS64(val) => { + parser.parse_lparen()?; + *val = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_rparen()?; + } + ExpressionOp::Minus + | ExpressionOp::Plus + | ExpressionOp::Deref + | ExpressionOp::StackValue + | ExpressionOp::Unsupported(_) => (), + ExpressionOp::BitPiece { size, offset } => { + parser.parse_lparen()?; + *size = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_comma()?; + *offset = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_rparen()?; + } + ExpressionOp::FrameBase { + global_index, + byte_offset, + } => { + parser.parse_lparen()?; + parser + .token_stream_mut() + .expect_if("'local' or 'global' modifier", |tok| { + matches!(tok, Token::BareIdent("local" | "global")) + })? + .into_inner(); + parser.parse_comma()?; + let index = parser.parse_decimal_integer::()?.into_inner(); + parser.parse_comma()?; + *byte_offset = parser.parse_decimal_integer::()?.into_inner(); + *global_index = encode_frame_base_local_index(index).unwrap_or(index); + parser.parse_rparen()?; + } + } + + Ok(op) + } +} + +/// Represents a DWARF expression that describes how to compute or locate a variable's value +#[derive(DialectAttribute, Clone, Debug, Default, PartialEq, Eq, Hash)] +#[attribute(dialect = DebugInfoDialect, implements(AttrPrinter))] +pub struct Expression { + pub operations: Vec, +} + +impl Expression { + pub fn new() -> Self { + Self { + operations: Vec::new(), + } + } + + pub fn with_ops(operations: Vec) -> Self { + Self { operations } + } + + pub fn is_empty(&self) -> bool { + self.operations.is_empty() + } +} + +impl miden_core::serde::Serializable for Expression { + fn write_into(&self, target: &mut W) { + target.write_usize(self.operations.len()); + for op in self.operations.iter() { + target.write(op); + } + } +} + +impl miden_core::serde::Deserializable for Expression { + fn read_from( + source: &mut R, + ) -> Result { + let len = usize::read_from(source)?; + let mut expr = Self::with_ops(Vec::with_capacity(len)); + for _ in 0..len { + expr.operations.push(ExpressionOp::read_from(source)?); + } + Ok(expr) + } +} + +impl AttrPrinter for ExpressionAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + use crate::formatter::*; + + if self.operations.is_empty() { + *printer += const_text("[]"); + return; + } + + *printer += const_text("["); + for (i, op) in self.operations.iter().enumerate() { + if i > 0 { + *printer += const_text(", "); + } + match op { + ExpressionOp::WasmLocal(idx) => { + *printer += const_text("DW_OP_WASM_local"); + *printer += const_text("(") + display(*idx) + const_text(")"); + } + ExpressionOp::WasmGlobal(idx) => { + *printer += const_text("DW_OP_WASM_global"); + *printer += const_text("(") + display(*idx) + const_text(")"); + } + ExpressionOp::WasmStack(idx) => { + *printer += const_text("DW_OP_WASM_stack"); + *printer += const_text("(") + display(*idx) + const_text(")"); + } + ExpressionOp::ConstU64(val) => { + *printer += const_text("DW_OP_constu"); + *printer += const_text("(") + display(*val) + const_text(")"); + } + ExpressionOp::ConstS64(val) => { + *printer += const_text("DW_OP_consts"); + *printer += const_text("(") + display(*val) + const_text(")"); + } + ExpressionOp::PlusUConst(val) => { + *printer += const_text("DW_OP_plus_uconst"); + *printer += const_text("(") + display(*val) + const_text(")"); + } + ExpressionOp::Minus => *printer += const_text("DW_OP_minus"), + ExpressionOp::Plus => *printer += const_text("DW_OP_plus"), + ExpressionOp::Deref => *printer += const_text("DW_OP_deref"), + ExpressionOp::StackValue => *printer += const_text("DW_OP_stack_value"), + ExpressionOp::Piece(size) => { + *printer += const_text("DW_OP_piece"); + *printer += const_text("(") + display(*size) + const_text(")"); + } + ExpressionOp::BitPiece { size, offset } => { + *printer += const_text("DW_OP_bit_piece"); + *printer += const_text("(") + + display(*size) + + const_text(",") + + display(*offset) + + const_text(")"); + } + ExpressionOp::FrameBase { + global_index, + byte_offset, + } => { + if let Some(local_index) = decode_frame_base_local_index(*global_index) { + *printer += const_text("DW_OP_fbreg(local, "); + *printer += text(format!("{}{:+}", local_index, byte_offset)); + *printer += const_text(")"); + } else { + *printer += const_text("DW_OP_fbreg(global, "); + *printer += text(format!("{}{:+}", global_index, byte_offset)); + *printer += const_text(")"); + } + } + ExpressionOp::Address { address } => { + *printer += const_text("DW_OP_addr"); + *printer += const_text("(") + display(*address) + const_text(")"); + } + ExpressionOp::Unsupported(name) => *printer += const_text(name.as_str()), + } + } + *printer += const_text("]"); + } +} + +impl AttrParser for ExpressionAttr { + fn parse( + parser: &mut dyn crate::parse::Parser<'_>, + ) -> crate::parse::ParseResult { + use crate::parse::Delimiter; + + let mut ops = Vec::default(); + parser.parse_comma_separated_list( + Delimiter::OptionalBracket, + Some("DIExpression"), + |parser| { + ops.push(ExpressionOp::parse(parser)?); + + Ok(true) + }, + )?; + + let attr = parser + .context_rc() + .create_attribute::(Expression::with_ops(ops)); + + Ok(attr.as_attribute_ref()) + } +} + +/// High-bit marker used to carry a Wasm-local frame base through the existing +/// `FrameBase { global_index, byte_offset }` debug-location shape without +/// changing the VM-facing `DebugVarLocation` ABI. +/// +/// Before MASM lowering completes, the low bits hold a raw Wasm local index. +/// After local patching, the low 16 bits hold the signed FMP-relative offset of +/// the Miden local containing the frame-base byte address. +pub const FRAME_BASE_LOCAL_MARKER: u32 = 1 << 31; + +pub fn encode_frame_base_local_index(local_index: u32) -> Option { + if local_index < FRAME_BASE_LOCAL_MARKER { + Some(FRAME_BASE_LOCAL_MARKER | local_index) + } else { + None + } +} + +pub fn decode_frame_base_local_index(encoded: u32) -> Option { + (encoded & FRAME_BASE_LOCAL_MARKER != 0).then_some(encoded & !FRAME_BASE_LOCAL_MARKER) +} + +pub fn encode_frame_base_local_offset(local_offset: i16) -> u32 { + FRAME_BASE_LOCAL_MARKER | u16::from_le_bytes(local_offset.to_le_bytes()) as u32 +} + +pub fn decode_frame_base_local_offset(encoded: u32) -> Option { + if encoded & FRAME_BASE_LOCAL_MARKER == 0 { + return None; + } + let low_bits = (encoded & 0xffff) as u16; + Some(i16::from_le_bytes(low_bits.to_le_bytes())) +} diff --git a/hir/src/dialects/debuginfo/attributes/subprogram.rs b/hir/src/dialects/debuginfo/attributes/subprogram.rs new file mode 100644 index 000000000..078a47f20 --- /dev/null +++ b/hir/src/dialects/debuginfo/attributes/subprogram.rs @@ -0,0 +1,223 @@ +use alloc::{format, sync::Arc, vec::Vec}; + +use crate::{ + AttrPrinter, Type, attributes::AttrParser, derive::DialectAttribute, + dialects::debuginfo::DebugInfoDialect, interner::Symbol, parse::ParserExt, print::AsmPrinter, +}; + +/// Represents a subprogram (function) scope for debug information. +/// The compile unit is not embedded but typically stored separately on the module. +#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] +#[attribute(dialect = DebugInfoDialect, implements(AttrPrinter))] +pub struct Subprogram { + pub name: Symbol, + pub linkage_name: Option, + pub file: Symbol, + pub line: u32, + pub column: Option, + pub is_definition: bool, + pub is_local: bool, + pub ty: Option, + pub param_names: Vec, +} + +impl Default for Subprogram { + fn default() -> Self { + Self { + name: crate::interner::symbols::Empty, + linkage_name: None, + file: crate::interner::symbols::Empty, + line: 0, + column: None, + is_definition: false, + is_local: false, + ty: None, + param_names: Vec::new(), + } + } +} + +impl Subprogram { + pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { + Self { + name, + linkage_name: None, + file, + line, + column, + is_definition: true, + is_local: false, + ty: None, + param_names: Vec::new(), + } + } + + pub fn with_function_type(mut self, ty: crate::FunctionType) -> Self { + self.ty = Some(Type::Function(Arc::new(ty))); + self + } + + pub fn with_param_names(mut self, names: I) -> Self + where + I: IntoIterator, + { + self.param_names = names.into_iter().collect(); + self + } +} + +impl AttrPrinter for SubprogramAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + use crate::formatter::*; + + *printer += const_text("{ "); + + *printer += const_text("name") + const_text(" = "); + printer.print_string(self.name.as_str()); + *printer += const_text(", "); + + *printer += const_text("file") + const_text(" = "); + printer.print_string(self.file.as_str()); + *printer += const_text(", "); + + *printer += const_text("line") + const_text(" = "); + printer.print_decimal_integer(self.line); + + if let Some(column) = self.column { + *printer += const_text(", "); + *printer += const_text("column") + const_text(" = "); + printer.print_decimal_integer(column); + } + + if let Some(linkage) = self.linkage_name { + *printer += const_text(", "); + *printer += const_text("linkage") + const_text(" = "); + printer.print_string(linkage.as_str()); + } + + if let Some(ty) = &self.ty { + *printer += const_text(", "); + *printer += const_text("ty") + const_text(" = "); + printer.print_type(ty); + } + + if !self.param_names.is_empty() { + let names = self + .param_names + .iter() + .map(|name| const_text(name.as_str())) + .intersperse(const_text(", ")) + .fold(Document::Empty, |acc, item| acc + item); + let names = const_text("[") + names + const_text("]"); + *printer += const_text(", "); + *printer += const_text("params") + const_text(" = ") + names; + } + + *printer += const_text(", "); + *printer += const_text("definition") + const_text(" = "); + printer.print_bool(self.is_definition); + + *printer += const_text(", "); + *printer += const_text("local") + const_text(" = "); + printer.print_bool(self.is_local); + + *printer += const_text(" }"); + } +} + +impl AttrParser for SubprogramAttr { + fn parse( + parser: &mut dyn crate::parse::Parser<'_>, + ) -> crate::parse::ParseResult { + use crate::parse::Token; + + parser.parse_lbrace()?; + + parser.parse_custom_keyword("name")?; + parser.parse_equal()?; + let name = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + parser.parse_custom_keyword("file")?; + parser.parse_equal()?; + let file = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + parser.parse_custom_keyword("line")?; + parser.parse_equal()?; + let line = parser.parse_decimal_integer::()?.into_inner(); + + let mut subprogram = Subprogram::new(name.into(), file.into(), line, None); + + while parser.parse_optional_comma()? { + let (span, prop) = parser + .token_stream_mut() + .expect_map("Subprogram property", |tok| match tok { + Token::BareIdent( + prop @ ("column" | "linkage" | "ty" | "params" | "definition" | "local"), + ) => Some(prop), + _ => None, + })? + .into_parts(); + match prop { + "column" if subprogram.column.is_none() => { + parser.parse_equal()?; + subprogram.column = Some(parser.parse_decimal_integer::()?.into_inner()); + } + "linkage" if subprogram.linkage_name.is_none() => { + parser.parse_equal()?; + subprogram.linkage_name = Some(parser.parse_string()?.into_inner().into()); + } + "ty" if subprogram.ty.is_none() => { + parser.parse_equal()?; + subprogram.ty = Some(parser.parse_type()?.into_inner()); + } + "params" if subprogram.param_names.is_empty() => { + parser.parse_equal()?; + parser.parse_comma_separated_list( + crate::parse::Delimiter::OptionalBracket, + Some("parameter names"), + |parser| { + subprogram.param_names.push(parser.parse_string()?.into_inner().into()); + Ok(true) + }, + )?; + } + "definition" => { + parser.parse_equal()?; + subprogram.is_definition = parser + .token_stream_mut() + .expect_map("boolean", |tok| match tok { + Token::BareIdent("true") => Some(true), + Token::BareIdent("false") => Some(false), + _ => None, + })? + .into_inner(); + } + "local" => { + parser.parse_equal()?; + subprogram.is_local = parser + .token_stream_mut() + .expect_map("boolean", |tok| match tok { + Token::BareIdent("true") => Some(true), + Token::BareIdent("false") => Some(false), + _ => None, + })? + .into_inner(); + } + prop => { + return Err(crate::parse::ParserError::InvalidAttributeValue { + span, + reason: format!("duplicate DILocalVariableAttr property '{prop}'"), + }); + } + } + } + + parser.parse_rbrace()?; + + let attr = parser.context_rc().create_attribute::(subprogram); + + Ok(attr.as_attribute_ref()) + } +} diff --git a/hir/src/dialects/debuginfo/attributes/variable.rs b/hir/src/dialects/debuginfo/attributes/variable.rs new file mode 100644 index 000000000..61347ff12 --- /dev/null +++ b/hir/src/dialects/debuginfo/attributes/variable.rs @@ -0,0 +1,146 @@ +use alloc::format; + +use crate::{ + AttrPrinter, Type, attributes::AttrParser, derive::DialectAttribute, + dialects::debuginfo::DebugInfoDialect, interner::Symbol, parse::ParserExt, print::AsmPrinter, +}; + +/// Represents a local variable debug record. +/// The scope (Subprogram) is not embedded but instead stored on the containing function. +#[derive(DialectAttribute, Clone, Debug, PartialEq, Eq, Hash)] +#[attribute(dialect = DebugInfoDialect, implements(AttrPrinter))] +pub struct Variable { + pub name: Symbol, + pub arg_index: Option, + pub file: Symbol, + pub line: u32, + pub column: Option, + pub ty: Option, +} + +impl Default for Variable { + fn default() -> Self { + Self { + name: crate::interner::symbols::Empty, + arg_index: None, + file: crate::interner::symbols::Empty, + line: 0, + column: None, + ty: None, + } + } +} + +impl Variable { + pub fn new(name: Symbol, file: Symbol, line: u32, column: Option) -> Self { + Self { + name, + arg_index: None, + file, + line, + column, + ty: None, + } + } +} + +impl AttrPrinter for VariableAttr { + fn print(&self, printer: &mut AsmPrinter<'_>) { + use crate::formatter::*; + + *printer += const_text("{ "); + + *printer += const_text("name") + const_text(" = "); + printer.print_string(self.name.as_str()); + *printer += const_text(", "); + + *printer += const_text("file") + const_text(" = "); + printer.print_string(self.file.as_str()); + *printer += const_text(", "); + + *printer += const_text("line") + const_text(" = "); + printer.print_decimal_integer(self.line); + + if let Some(column) = self.column { + *printer += const_text(", "); + *printer += const_text("column") + const_text(" = "); + printer.print_decimal_integer(column); + } + + if let Some(arg_index) = self.arg_index { + *printer += const_text(", "); + *printer += const_text("arg") + const_text(" = "); + printer.print_decimal_integer(arg_index); + } + + if let Some(ty) = &self.ty { + *printer += const_text(", "); + *printer += const_text("ty") + const_text(" = "); + printer.print_type(ty); + } + + *printer += const_text(" }"); + } +} + +impl AttrParser for VariableAttr { + fn parse( + parser: &mut dyn crate::parse::Parser<'_>, + ) -> crate::parse::ParseResult { + use crate::parse::Token; + + parser.parse_lbrace()?; + + parser.parse_custom_keyword("name")?; + parser.parse_equal()?; + let name = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + parser.parse_custom_keyword("file")?; + parser.parse_equal()?; + let file = parser.parse_string()?.into_inner(); + parser.parse_comma()?; + + parser.parse_custom_keyword("line")?; + parser.parse_equal()?; + let line = parser.parse_decimal_integer::()?.into_inner(); + + let mut var = Variable::new(name.into(), file.into(), line, None); + + while parser.parse_optional_comma()? { + let (span, prop) = parser + .token_stream_mut() + .expect_map("DILocalVariable property", |tok| match tok { + Token::BareIdent(prop @ ("column" | "arg" | "ty")) => Some(prop), + _ => None, + })? + .into_parts(); + match prop { + "column" if var.column.is_none() => { + parser.parse_equal()?; + var.column = Some(parser.parse_decimal_integer::()?.into_inner()); + } + "arg" if var.arg_index.is_none() => { + parser.parse_equal()?; + var.column = Some(parser.parse_decimal_integer::()?.into_inner()); + } + "ty" if var.ty.is_none() => { + parser.parse_equal()?; + var.ty = Some(parser.parse_type()?.into_inner()); + } + prop => { + return Err(crate::parse::ParserError::InvalidAttributeValue { + span, + reason: format!("duplicate DILocalVariableAttr property '{prop}'"), + }); + } + } + } + + parser.parse_rbrace()?; + + let attr = parser.context_rc().create_attribute::(var); + + Ok(attr.as_attribute_ref()) + } +} diff --git a/dialects/debuginfo/src/builders.rs b/hir/src/dialects/debuginfo/builders.rs similarity index 55% rename from dialects/debuginfo/src/builders.rs rename to hir/src/dialects/debuginfo/builders.rs index 298493efc..165edb87d 100644 --- a/dialects/debuginfo/src/builders.rs +++ b/hir/src/dialects/debuginfo/builders.rs @@ -1,14 +1,14 @@ use midenc_hir::{ - Builder, BuilderExt, DIExpression, DILocalVariable, Report, SourceSpan, ValueRef, + Builder, BuilderExt, Report, SourceSpan, ValueRef, + dialects::debuginfo::attributes::{Expression, Variable}, }; use super::ops::*; /// Builder trait for creating debug info operations. /// -/// This trait follows the same pattern as other dialect builders -/// (`ArithOpBuilder`, `HirOpBuilder`, etc.) and can be implemented -/// for any type that wraps a `Builder`. +/// This trait follows the same pattern as other dialect builders (`ArithOpBuilder`, `HirOpBuilder`, +/// etc.) and can be implemented for any type that wraps a [Builder]. /// /// # Usage /// @@ -22,33 +22,31 @@ use super::ops::*; /// // Mark a variable as dead: /// builder.debug_kill(variable_attr, span)?; /// ``` -pub trait DebugInfoOpBuilder<'f, B: ?Sized + Builder> { - /// Emit a `debuginfo.value` operation that records the current value of a - /// source-level variable. +pub trait DIBuilder<'f, B: ?Sized + Builder> { + /// Emit a `di.value` operation that records the current value of a source-level variable. /// - /// This creates an SSA use of `value`, ensuring that transforms cannot - /// silently drop the value without updating the debug info. + /// This creates an SSA use of `value`, ensuring that transforms cannot silently drop the value + /// without updating the debug info. fn debug_value( &mut self, value: ValueRef, - variable: DILocalVariable, + variable: Variable, span: SourceSpan, ) -> Result { self.debug_value_with_expr(value, variable, None, span) } - /// Emit a `debuginfo.value` operation with an optional expression that - /// describes how to recover the source-level value from the IR value. + /// Emit a `di.value` operation with an optional expression that describes how to recover the + /// source-level value from the IR value. /// - /// The expression encodes the *inverse* of whatever transformation was - /// applied to the value. For example, if a value was promoted to a stack - /// allocation (pointer), the expression would contain a `deref` operation - /// to recover the original value. + /// The expression encodes the *inverse* of whatever transformation was applied to the value. + /// For example, if a value was promoted to a stack allocation (pointer), the expression would + /// contain a `deref` operation to recover the original value. fn debug_value_with_expr( &mut self, value: ValueRef, - variable: DILocalVariable, - expression: Option, + variable: Variable, + expression: Option, span: SourceSpan, ) -> Result { let expr = expression.unwrap_or_default(); @@ -56,27 +54,22 @@ pub trait DebugInfoOpBuilder<'f, B: ?Sized + Builder> { op_builder(value, variable, expr) } - /// Emit a `debuginfo.declare` operation that records the storage address - /// of a source-level variable. + /// Emit a `di.declare` operation that records the storage address of a source-level variable. fn debug_declare( &mut self, address: ValueRef, - variable: DILocalVariable, + variable: Variable, span: SourceSpan, ) -> Result { let op_builder = self.builder_mut().create::(span); op_builder(address, variable) } - /// Emit a `debuginfo.kill` operation that marks a variable as dead. + /// Emit a `di.kill` operation that marks a variable as dead. /// - /// After this point, the debugger should report the variable as unavailable - /// until the next `debug_value` or `debug_declare` for the same variable. - fn debug_kill( - &mut self, - variable: DILocalVariable, - span: SourceSpan, - ) -> Result { + /// After this point, the debugger should report the variable as unavailable until the next + /// `debug_value` or `debug_declare` for the same variable. + fn debug_kill(&mut self, variable: Variable, span: SourceSpan) -> Result { let op_builder = self.builder_mut().create::(span); op_builder(variable) } @@ -85,8 +78,7 @@ pub trait DebugInfoOpBuilder<'f, B: ?Sized + Builder> { fn builder_mut(&mut self) -> &mut B; } -/// Blanket implementation: any `Builder` can use `DebugInfoOpBuilder` directly. -impl DebugInfoOpBuilder<'_, B> for B { +impl DIBuilder<'_, B> for B { #[inline(always)] fn builder(&self) -> &B { self diff --git a/hir/src/dialects/debuginfo/ops.rs b/hir/src/dialects/debuginfo/ops.rs new file mode 100644 index 000000000..908fd6a6a --- /dev/null +++ b/hir/src/dialects/debuginfo/ops.rs @@ -0,0 +1,121 @@ +use midenc_hir::{ + OpPrinter, UnsafeIntrusiveEntityRef, + derive::{EffectOpInterface, OpParser, OpPrinter, operation}, + dialects::debuginfo::attributes::{ExpressionAttr, VariableAttr}, + effects::{ + DebugEffect, DebugEffectOpInterface, EffectOpInterface, MemoryEffect, + MemoryEffectOpInterface, + }, + smallvec, + traits::{AnyType, Transparent}, +}; + +use super::DebugInfoDialect; + +pub type DebugValueRef = UnsafeIntrusiveEntityRef; +pub type DebugDeclareRef = UnsafeIntrusiveEntityRef; +pub type DebugKillRef = UnsafeIntrusiveEntityRef; + +/// Records the current value of a source-level variable. +/// +/// This is the core operation of the debuginfo dialect. It creates a first-class SSA use of the +/// value, which means: +/// +/// - If a transform deletes the value without updating its debug uses, that's a hard error (not a +/// silent drop like with metadata-based approaches). +/// - Standard MLIR-style use-def tracking automatically enforces this — transforms must call +/// `replace_all_uses_with` or explicitly handle debug uses. +/// +/// The `variable` attribute identifies the source variable, and the `expression` attribute +/// describes how to recover the source-level value from the IR value (e.g., "dereference this +/// pointer" if the value was promoted to an alloca). +/// +/// # Example +/// +/// ```text +/// di.value %0 #[variable = di.local_variable(name = x, ...)] +/// #[expression = di.expression(DW_OP_WASM_local 0)] +/// ``` +#[derive(EffectOpInterface, OpParser, OpPrinter)] +#[operation( + dialect = DebugInfoDialect, + traits(Transparent), + implements(DebugEffectOpInterface, MemoryEffectOpInterface, OpPrinter) +)] +pub struct DebugValue { + #[operand] + #[effects(DebugEffect(DebugEffect::Read, DebugEffect::Write))] + value: AnyType, + #[attr] + #[effects(DebugEffect(DebugEffect::Write))] + variable: VariableAttr, + #[attr] + expression: ExpressionAttr, +} + +impl EffectOpInterface for DebugValue { + fn effects(&self) -> midenc_hir::effects::EffectIterator { + midenc_hir::effects::EffectIterator::from_smallvec(smallvec![]) + } +} + +/// Records the storage location (address) of a source-level variable. +/// +/// Unlike [DebugValue] which tracks values, [DebugDeclare] tracks the address where a variable is +/// stored. This is useful for variables that live in memory (e.g., stack allocations) where the +/// address itself doesn't change, but the value at that address may be updated through stores. +/// +/// Like `DebugValue`, this creates a real SSA use of the address value, preventing silent drops +/// during transforms. +#[derive(EffectOpInterface, OpParser, OpPrinter)] +#[operation( + dialect = DebugInfoDialect, + traits(Transparent), + implements(DebugEffectOpInterface, MemoryEffectOpInterface, OpPrinter) +)] +pub struct DebugDeclare { + #[operand] + #[effects(DebugEffect(DebugEffect::Read))] + address: AnyType, + #[attr] + #[effects(DebugEffect(DebugEffect::Allocate))] + variable: VariableAttr, +} + +impl EffectOpInterface for DebugDeclare { + fn effects(&self) -> midenc_hir::effects::EffectIterator { + midenc_hir::effects::EffectIterator::from_smallvec(smallvec![]) + } +} + +/// Marks a source-level variable as dead at this program point. +/// +/// This provides explicit lifetime boundaries for variables, giving the debugger precise +/// information about when a variable is no longer valid. Without this, debuggers must rely on +/// scope-based heuristics which can be inaccurate after optimizations. +/// +/// After a `debuginfo.kill`, the debugger should report the variable as "optimized out" or "not +/// available" until the next `di.value` or `di.declare` for the same variable. +/// +/// # Example +/// +/// ```text +/// di.kill #[variable = di.local_variable(name = x, ...)] +/// ``` +#[derive(EffectOpInterface, OpParser, OpPrinter)] +#[operation( + dialect = DebugInfoDialect, + traits(Transparent), + implements(DebugEffectOpInterface, MemoryEffectOpInterface, OpPrinter) +)] +pub struct DebugKill { + #[attr] + #[effects(DebugEffect(DebugEffect::Free))] + variable: VariableAttr, +} + +impl EffectOpInterface for DebugKill { + fn effects(&self) -> midenc_hir::effects::EffectIterator { + midenc_hir::effects::EffectIterator::from_smallvec(smallvec![]) + } +} diff --git a/dialects/debuginfo/src/transform.rs b/hir/src/dialects/debuginfo/transform.rs similarity index 60% rename from dialects/debuginfo/src/transform.rs rename to hir/src/dialects/debuginfo/transform.rs index b172bee09..330f31e01 100644 --- a/dialects/debuginfo/src/transform.rs +++ b/hir/src/dialects/debuginfo/transform.rs @@ -1,65 +1,66 @@ //! Transform utilities for maintaining debug info across IR transformations. //! -//! This module provides the "transformation hooks" that make the debuginfo dialect -//! practical. Following Mojo's approach, these utilities make it easy for transform -//! authors to keep debug info valid — they only need to describe the *inverse* of -//! their transformation. +//! This module provides the "transformation hooks" that make the debuginfo dialect practical. +//! Following Mojo's approach, these utilities make it easy for transform authors to keep debug info +//! valid — they only need to describe the *inverse* of their transformation. //! //! # Design Philosophy //! -//! The debuginfo dialect uses SSA use-def chains for debug values, which means -//! transforms *cannot* silently drop debug info. When a transform replaces or -//! deletes a value, any `debuginfo.value` operations using that value must be -//! updated. The standard `replace_all_uses_with` already handles this correctly -//! for simple value replacements. +//! The `di` dialect uses SSA use-def chains for debug values, which means transforms *cannot* +//! silently drop debug info. When a transform replaces or deletes a value, any `di.value` +//! operations using that value must be updated. The standard `replace_all_uses_with` already +//! handles this correctly for simple value replacements. //! -//! For more complex transforms (e.g., promoting a value to memory, splitting a -//! value into pieces), the transform author uses `salvage_debug_info` to describe -//! how the debug expression should be updated to recover the source-level value -//! from the new representation. +//! For more complex transforms (e.g., promoting a value to memory, splitting a value into pieces), +//! the transform author uses `salvage_debug_info` to describe how the debug expression should be +//! updated to recover the source-level value from the new representation. //! //! # Examples //! //! ## Simple value replacement (handled automatically) //! //! When CSE replaces `%1 = add %a, %b` with an existing `%0 = add %a, %b`: -//! ```text -//! // Before: debuginfo.value %1 #[variable = x] -//! // rewriter.replace_all_uses_with(%1, %0) -//! // After: debuginfo.value %0 #[variable = x] -- automatic! +//! +//! ```text,ignore +//! // Before: di.value %1 #[variable = x] +//! rewriter.replace_all_uses_with(%1, %0) +//! // After: di.value %0 #[variable = x] -- automatic! //! ``` //! -//! ## Value promoted to memory (use salvage_debug_info) +//! ## Value promoted to memory (using `salvage_debug_info`) //! //! When a transform promotes a value to a stack allocation: +//! //! ```text -//! // Before: debuginfo.value %val #[variable = x] +//! // Before: di.value %val #[variable = x] //! // Transform creates: %ptr = alloca T //! // store %val, %ptr //! // Call: salvage_debug_info(%val, SalvageAction::Deref { new_value: %ptr }) -//! // After: debuginfo.value %ptr #[variable = x, expression = di.expression(DW_OP_deref)] +//! // After: di.value %ptr #[variable = x, expression = di.expression(DW_OP_deref)] //! ``` - use alloc::vec::Vec; -use midenc_hir::{Builder, DIExpressionOp, Op, OperationRef, Spanned, ValueRef}; +use midenc_hir::{ + Builder, DialectRegistration, Operation, OperationRef, SmallVec, Spanned, ValueRef, + dialects::debuginfo::attributes::ExpressionOp, +}; -use crate::{DebugInfoOpBuilder, ops::DebugValue}; +use super::{DIBuilder, ops::DebugValue}; /// Describes how to recover the original source-level value after a transformation. /// -/// When a transform changes a value's representation, it creates a `SalvageAction` -/// describing the inverse operation. The debuginfo framework then updates the -/// `DIExpressionAttr` accordingly so the debugger can still find the variable's value. +/// When a transform changes a value's representation, it creates a [SalvageAction] describing the +/// inverse operation. The debuginfo framework then updates the `DIExpressionAttr` accordingly so +/// the debugger can still find the variable's value. /// -/// Transform authors only need to pick the right variant — the framework handles -/// updating all affected `debuginfo.value` operations. +/// Transform authors only need to pick the right variant — the framework handles updating all +/// affected `di.value` operations. #[derive(Clone, Debug)] pub enum SalvageAction { /// The value is now behind a pointer; dereference to recover the original. /// - /// Use this when a value is promoted to a stack allocation. - /// The expression will have `DW_OP_deref` prepended. + /// Use this when a value is promoted to a stack allocation. The expression will have + /// `DW_OP_deref` prepended. Deref { /// The new pointer value that replaces the original. new_value: ValueRef, @@ -67,8 +68,8 @@ pub enum SalvageAction { /// A constant offset was added to the value. /// - /// Use this when a value is relocated by a fixed amount (e.g., frame - /// pointer adjustments). The expression will encode the inverse subtraction. + /// Use this when a value is relocated by a fixed amount (e.g., frame pointer adjustments). The + /// expression will encode the inverse subtraction. OffsetBy { /// The new value (original + offset). new_value: ValueRef, @@ -78,14 +79,14 @@ pub enum SalvageAction { /// The value was replaced by a new value with an arbitrary expression. /// - /// Use this for complex transformations where the simple patterns don't apply. - /// The caller provides the full expression describing how to recover the - /// source-level value from the new IR value. + /// Use this for complex transformations where the simple patterns don't apply. The caller + /// provides the full expression describing how to recover the source-level value from the new + /// IR value. WithExpression { /// The new value replacing the original. new_value: ValueRef, /// Expression operations describing the inverse transform. - ops: Vec, + ops: Vec, }, /// The value is now a constant. @@ -98,23 +99,23 @@ pub enum SalvageAction { /// The value was completely removed with no recovery possible. /// - /// Use this as a last resort when the value cannot be recovered. - /// This will emit a `debuginfo.kill` for the affected variable. + /// Use this as a last resort when the value cannot be recovered. This will emit a `di.kill` for + /// the affected variable. Undef, } -/// Salvage debug info for all `debuginfo.value` operations that use `old_value`. +/// Salvage debug info for all `di.value` operations that use `old_value`. /// -/// When a transform is about to delete or replace a value, call this function -/// to update all debug uses. The `action` describes how the debugger can recover -/// the original source-level value from the new representation. +/// When a transform is about to delete or replace a value, call this function to update all debug +/// uses. The `action` describes how the debugger can recover the original source-level value from +/// the new representation. /// -/// This is the main entry point for transform authors who need to update debug -/// info beyond simple `replace_all_uses_with` scenarios. +/// This is the main entry point for transform authors who need to update debug info beyond simple +/// `replace_all_uses_with` scenarios. /// /// # Example /// -/// ```ignore +/// ```rust,ignore /// // Value was promoted to memory: /// let ptr = builder.alloca(ty, span)?; /// builder.store(old_val, ptr, span)?; @@ -130,9 +131,7 @@ pub fn salvage_debug_info( builder: &mut B, ) { // Collect all debug value ops that use the old value - let debug_ops: Vec = debug_value_users(old_value); - - for mut debug_op in debug_ops { + for mut debug_op in debug_value_users(old_value) { apply_salvage_action(&mut debug_op, action, builder); } } @@ -153,7 +152,7 @@ fn apply_salvage_action( let dv = op.downcast_ref::().unwrap(); (dv.variable().as_value().clone(), dv.expression().as_value().clone()) }; - expr.operations.insert(0, DIExpressionOp::Deref); + expr.operations.insert(0, ExpressionOp::Deref); // Erase old op and create new one with updated value and expression debug_op.borrow_mut().erase(); @@ -167,8 +166,8 @@ fn apply_salvage_action( (dv.variable().as_value().clone(), dv.expression().as_value().clone()) }; // To recover: subtract the offset that was added - expr.operations.push(DIExpressionOp::ConstU64(*offset)); - expr.operations.push(DIExpressionOp::Minus); + expr.operations.push(ExpressionOp::ConstU64(*offset)); + expr.operations.push(ExpressionOp::Minus); debug_op.borrow_mut().erase(); let _ = builder.debug_value_with_expr(*new_value, variable, Some(expr), span); @@ -194,11 +193,11 @@ fn apply_salvage_action( }; debug_op.borrow_mut().erase(); - // Emit a kill since we can't create a debuginfo.value without a live SSA operand - // for constants — the constant value is encoded in the expression + // Emit a kill since we can't create a di.value without a live SSA operand for constants + // — the constant value is encoded in the expression let _ = builder.debug_kill(variable, span); - // TODO: in the future, could emit a debuginfo.value with a materialized constant - // and a ConstU64/StackValue expression pair + // TODO: in the future, could emit a di.value with a materialized constant and a + // ConstU64/StackValue expression pair let _ = value; } @@ -217,25 +216,21 @@ fn apply_salvage_action( /// Check if an operation is a debug info operation. /// -/// This is useful for transforms that need to skip or handle debug ops -/// differently (e.g., DCE should not consider debug uses as "real" uses -/// that keep a value alive). -pub fn is_debug_info_op(op: &dyn Op) -> bool { - op.as_operation().is::() - || op.as_operation().is::() - || op.as_operation().is::() +/// This is useful for transforms that need to skip or handle debug ops differently (e.g., DCE +/// should not consider debug uses as "real" uses that keep a value alive). +pub fn is_debug_info_op(op: &Operation) -> bool { + op.dialect().name() == super::DebugInfoDialect::NAMESPACE } -/// Collect all `debuginfo.value` operations that reference the given value. +/// Collect all `di.value` operations that reference the given value. /// -/// Useful for transforms that need to inspect or update debug info for a -/// specific value. -pub fn debug_value_users(value: &ValueRef) -> Vec { +/// Useful for transforms that need to inspect or update debug info for a specific value. +pub fn debug_value_users(value: &ValueRef) -> SmallVec<[OperationRef; 2]> { let value = value.borrow(); - let mut ops = Vec::new(); - for use_ in value.iter_uses() { - if use_.owner.borrow().is::() { - ops.push(use_.owner); + let mut ops = SmallVec::new_const(); + for user in value.iter_uses() { + if user.owner.borrow().is::() { + ops.push(user.owner); } } ops @@ -249,20 +244,11 @@ pub fn collect_debug_ops(op: &OperationRef) -> Vec { } fn collect_debug_ops_recursive(op: &OperationRef, debug_ops: &mut Vec) { - let op = op.borrow(); - - if op.is::() - || op.is::() - || op.is::() - { - debug_ops.push(op.as_operation_ref()); - } + use midenc_hir::{Forward, RawWalk}; - for region in op.regions() { - for block in region.body() { - for inner_op in block.body() { - collect_debug_ops_recursive(&inner_op.as_operation_ref(), debug_ops); - } + op.raw_prewalk_all::(|op: OperationRef| { + if is_debug_info_op(&op.borrow()) { + debug_ops.push(op); } - } + }); } diff --git a/hir/src/ir/effects.rs b/hir/src/ir/effects.rs index 2a492ea92..fe5b0d262 100644 --- a/hir/src/ir/effects.rs +++ b/hir/src/ir/effects.rs @@ -1,3 +1,4 @@ +mod debug; mod instance; mod interface; mod memory; @@ -5,7 +6,7 @@ mod speculation; use core::fmt; -pub use self::{instance::EffectInstance, interface::*, memory::*, speculation::*}; +pub use self::{debug::*, instance::EffectInstance, interface::*, memory::*, speculation::*}; use crate::{DynPartialEq, any::AsAny, eq::PartialEqable}; pub trait Effect: AsAny + fmt::Debug {} diff --git a/hir/src/ir/effects/debug.rs b/hir/src/ir/effects/debug.rs new file mode 100644 index 000000000..6eb2d28f7 --- /dev/null +++ b/hir/src/ir/effects/debug.rs @@ -0,0 +1,53 @@ +use super::*; + +/// Debug effects are similar to memory effects in that they reflect how a debugger may observe the +/// effect during execution/debugging. +/// +/// Similarly, optimizations must avoid reordering operations around debug effects in the same way +/// they must not reorder around memory effects (i.e. an op with a `write` memory effect on some +/// resource must not be reordered before an op with a `read` debug effect on that same resource). +/// In practice, debug operations may declare both memory effects and debug effects, to ensure that +/// transformations which are unaware of debug effects still do the right thing with respect to +/// those operations - but this should be considered a last resort. +/// +/// An operation whose value uses only include debug effects, are ignored when considering the +/// liveness of those values. This allows debug metadata to be recorded in the use-def graph, +/// without interfering with dead-code elimination and other similar optimizations. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum DebugEffect { + /// The following effect indicates that the operation reads from some resource. + /// + /// A 'read' effect implies that a debugger may attempt dereferencing of the resource + Read, + /// The following effect indicates that the operation writes to some resource. + /// + /// A 'write' effect implies that a debugger will modify its internal state with respect to + /// some resource (e.g. the storage type or location of a value). This effect only describes + /// mutation of the state, not any visible dereference or read. + Write, + /// The following effect indicates that the operation allocates some resource. + /// + /// An 'allocate' effect implies only allocation of the resource, and not any visible mutation or + /// dereference. In the case of a debugger, this might correspond to allocating a new call frame + /// or start tracking the state of a local variable. + Allocate, + /// The following effect indicates that the operation frees some resource that has been + /// allocated. + /// + /// A 'free' effect implies only de-allocation of the resource, and not any visible + /// allocation, mutation or dereference. In a debugging context, this might correspond to + /// popping a frame from the call stack, or marking the end of the live range of some local + /// variable. + Free, +} + +impl PartialEq for &DebugEffect { + #[inline] + fn eq(&self, other: &DebugEffect) -> bool { + (**self).eq(other) + } +} + +impl Effect for DebugEffect {} + +pub trait DebugEffectOpInterface = EffectOpInterface; diff --git a/hir/src/ir/region/transforms/dce.rs b/hir/src/ir/region/transforms/dce.rs index 9ca6ec6e7..2d76d1f6a 100644 --- a/hir/src/ir/region/transforms/dce.rs +++ b/hir/src/ir/region/transforms/dce.rs @@ -7,7 +7,7 @@ use crate::{ OpOperandImpl, OpResult, Operation, OperationRef, PostOrderBlockIter, Region, RegionRef, Rewriter, SuccessorOperands, ValueRef, adt::SmallSet, - traits::{BranchOpInterface, Terminator}, + traits::{BranchOpInterface, Terminator, Transparent}, }; /// Data structure used to track which values have already been proved live. @@ -67,7 +67,12 @@ impl LiveMap { pub fn is_use_specially_known_dead(&self, user: &OpOperandImpl) -> bool { // DCE generally treats all uses of an op as live if the op itself is considered live. - // However, for successor operands to terminators we need a finer-grained notion where we + // + // However, there are two special cases: + // + // ## Successor Operands + // + // For successor operands to terminators we need a finer-grained notion where we // deduce liveness for operands individually. The reason for this is easiest to think about // in terms of a classical phi node based SSA IR, where each successor operand is really an // operand to a _separate_ phi node, rather than all operands to the branch itself as with @@ -76,15 +81,25 @@ impl LiveMap { // And similarly, because each successor operand is really an operand to a phi node, rather // than to the terminator op itself, a terminator op can't e.g. "print" the value of a // successor operand. - let owner = &user.owner; - if owner.borrow().implements::() - && let Some(branch_interface) = owner.borrow().as_trait::() + // + // ## Debug Info + // + // The debug info dialect introduces operations that "use" SSA values, but if the use would + // otherwise be dead if the op didn't exist, then we want to treat both the debug op and + // the value use as dead, so that debug info ops do not interfere with dead-code + // elimination. + let owner_ref = &user.owner; + let owner = owner_ref.borrow(); + if owner.implements::() + && let Some(branch_interface) = owner.as_trait::() && let Some(arg) = branch_interface.get_successor_block_argument(user.index as usize) { return !self.was_proven_live(&arg.upcast()); } - false + // If the owning op is transparent, then its value uses are not considered when determining + // liveness + owner.implements::() } pub fn propagate_region_liveness(&mut self, region: &Region) { @@ -100,15 +115,6 @@ impl LiveMap { self.propagate_liveness(&op); } - // We currently do not remove entry block arguments, so there is no need to track their - // liveness. - // - // TODO(pauls): We could track these and enable removing dead operands/arguments from - // region control flow operations in the future. - if block.is_entry_block() { - continue; - } - for arg in block.arguments().iter().copied() { let arg = arg as ValueRef; if !self.was_proven_live(&arg) { @@ -135,7 +141,36 @@ impl LiveMap { } // Process this op - if !op.would_be_trivially_dead() { + if op.implements::() { + // If this op is Transparent, it has zero or one operands and no results. + // + // We consider such ops live IFF it either: + // + // 1. Has no operands + // 2. Has an operand which has at least one real use + if op.has_operands() { + for operand in op.operands().iter() { + let operand = operand.borrow(); + if let Some(defining_op) = operand.value().get_defining_op() + && self.was_op_proven_live(&defining_op) + { + self.set_op_proved_live(op.as_operation_ref()); + return; + } else if self.was_proven_live(&operand.as_value_ref()) { + self.set_op_proved_live(op.as_operation_ref()); + return; + } + } + } else { + // Transparent ops with no SSA operands are always treated as live here, as we can + // not otherwise determine whether it is valid to remove it or not + // + // TODO(pauls): We may need to reject such ops, as it would otherwise not be + // generally possible to determine how to handle them during transformations other + // than DCE + self.set_op_proved_live(op.as_operation_ref()); + } + } else if !op.would_be_trivially_dead() { self.set_op_proved_live(op.as_operation_ref()); } @@ -191,6 +226,11 @@ impl Region { rewriter: &mut dyn Rewriter, ) -> Result<(), RegionTransformFailed> { log::debug!(target: "region-simplify", "starting region dead code elimination"); + let live_map = Self::compute_liveness(regions); + Self::cleanup_dead_code(regions, rewriter, &live_map) + } + + fn compute_liveness(regions: &[RegionRef]) -> LiveMap { let mut live_map = LiveMap::default(); loop { live_map.mark_unchanged(); @@ -206,8 +246,7 @@ impl Region { break; } } - - Self::cleanup_dead_code(regions, rewriter, &live_map) + live_map } /// Erase the unreachable blocks within the regions in `regions`. @@ -396,3 +435,124 @@ impl Region { } } } + +#[cfg(test)] +mod tests { + use alloc::format; + + use midenc_expect_test::expect_file; + use midenc_session::diagnostics::SourceSpan; + + use super::*; + use crate::{ + Builder, BuilderExt, Op, Type, + derive::{EffectOpInterface, operation}, + dialects::{ + builtin::BuiltinOpBuilder, + test::{TestDialect, TestOpBuilder}, + }, + effects::MemoryEffectOpInterface, + patterns::{NoopRewriterListener, RewriterImpl}, + testing::Test, + traits::{AnyType, Transparent}, + }; + + #[operation( + dialect = TestDialect, + traits(Transparent), + implements(MemoryEffectOpInterface) + )] + #[derive(EffectOpInterface)] + pub struct DebugValue { + #[operand] + #[effects(MemoryEffect())] + value: AnyType, + } + + #[test] + fn transparent_ops_are_not_considered_dead_unless_their_referent_value_is_dead() { + let mut test = + Test::new("transparent_ops_inherit_liveness_of_referent", &[Type::U32], &[Type::U32]); + + let op = test.function(); + let mut builder = test.function_builder(); + let entry = builder.entry_block(); + + let builder = builder.builder_mut(); + builder.set_insertion_point_to_end(entry); + + let input = entry.borrow().arguments()[0] as ValueRef; + + let unused_output = builder.add(input, input, SourceSpan::UNKNOWN).unwrap(); + let dead_debug_var = builder.create::(SourceSpan::UNKNOWN); + let dead_debug_var_op = dead_debug_var(unused_output).unwrap(); + + let output = builder.add(input, input, SourceSpan::UNKNOWN).unwrap(); + let live_debug_var = builder.create::(SourceSpan::UNKNOWN); + let live_debug_var_op = live_debug_var(output).unwrap(); + let ret_op = builder.ret([output], SourceSpan::UNKNOWN).unwrap(); + + let region = op.borrow().body().as_region_ref(); + let live_map = Region::compute_liveness(&[region]); + + // A ret op is always live in region dce + assert!(live_map.was_op_proven_live(&ret_op.as_operation_ref())); + // The `output` value must be live because it is an operand of the ret + assert!(live_map.was_proven_live(&output)); + // `live_debug_var_op` is live because `output` is live + assert!(live_map.was_op_proven_live(&live_debug_var_op.as_operation_ref())); + // `input` is live because it is used by the live `add` + assert!(live_map.was_proven_live(&input)); + // `unused_output` must be dead because it has no non-transparent users + assert!(!live_map.was_proven_live(&unused_output)); + // `dead_debug_var_op` must be dead because `unused_output` is dead + assert!(!live_map.was_op_proven_live(&dead_debug_var_op.as_operation_ref())); + } + + #[test] + fn transparent_ops_do_not_interfere_with_dead_code_elimination() { + let mut test = Test::new("transparent_ops_no_dce_interference", &[Type::U32], &[Type::U32]); + + let op = test.function(); + { + let mut builder = test.function_builder(); + let entry = builder.entry_block(); + + let builder = builder.builder_mut(); + builder.set_insertion_point_to_end(entry); + + let input = entry.borrow().arguments()[0] as ValueRef; + + let unused_output = builder.add(input, input, SourceSpan::UNKNOWN).unwrap(); + let dead_debug_var = builder.create::(SourceSpan::UNKNOWN); + let _dead_debug_var_op = dead_debug_var(unused_output).unwrap(); + + let output = builder.add(input, input, SourceSpan::UNKNOWN).unwrap(); + let live_debug_var = builder.create::(SourceSpan::UNKNOWN); + let _live_debug_var_op = live_debug_var(output).unwrap(); + builder.ret([output], SourceSpan::UNKNOWN).unwrap(); + } + + let before = format!("{}", op.borrow().as_operation()); + expect_file!["expected/transparent_ops_do_not_interfere_with_dce_before.hir"] + .assert_eq(&before); + + let region = op.borrow().body().as_region_ref(); + + { + let mut rewriter = RewriterImpl::::new(test.context_rc()); + Region::dead_code_elimination(&[region], &mut rewriter) + .expect("dead code elimination failed unexpectedly"); + } + + let after = format!("{}", op.borrow().as_operation()); + expect_file!["expected/transparent_ops_do_not_interfere_with_dce_after.hir"] + .assert_eq(&after); + + assert_ne!(&before, &after); + assert_eq!(before.matches("test.debug_value").count(), 2); + assert_eq!(before.matches("test.add").count(), 2); + assert_eq!(after.matches("test.debug_value").count(), 1); + assert_eq!(after.matches("test.add").count(), 1); + } +} diff --git a/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_after.hir b/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_after.hir new file mode 100644 index 000000000..3084583bc --- /dev/null +++ b/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_after.hir @@ -0,0 +1,5 @@ +builtin.function public extern("C") @transparent_ops_no_dce_interference(%0: u32) -> u32 { + %2 = test.add %0, %0 <{ overflow = #builtin.overflow }>; + "test.debug_value"(%2) : u32 -> (); + builtin.ret %2 : (u32); +}; \ No newline at end of file diff --git a/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_before.hir b/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_before.hir new file mode 100644 index 000000000..9c44fb38d --- /dev/null +++ b/hir/src/ir/region/transforms/expected/transparent_ops_do_not_interfere_with_dce_before.hir @@ -0,0 +1,7 @@ +builtin.function public extern("C") @transparent_ops_no_dce_interference(%0: u32) -> u32 { + %1 = test.add %0, %0 <{ overflow = #builtin.overflow }>; + "test.debug_value"(%1) : u32 -> (); + %2 = test.add %0, %0 <{ overflow = #builtin.overflow }>; + "test.debug_value"(%2) : u32 -> (); + builtin.ret %2 : (u32); +}; \ No newline at end of file diff --git a/hir/src/ir/traits.rs b/hir/src/ir/traits.rs index 081342127..c28310b53 100644 --- a/hir/src/ir/traits.rs +++ b/hir/src/ir/traits.rs @@ -319,3 +319,55 @@ pub trait SingleRegion { // pub trait HasParent {} // pub trait ParentOneOf<(T,...)> {} + +/// Marker trait for ops which: +/// +/// * Represent the attachment of metadata to values in the IR +/// * Should not be considered as a "real" user for purposes of determining liveness of its operands +/// * Should not be considered dead unless all of its operands are also dead +/// * Does not result in any code being emitted during codegen +/// +/// The goal of such operations is to attach important metadata, such as debug information, to +/// values in the IR, ensuring that the metadata is preserved through transformations, while not +/// interfering with optimizations that may make the original value dead except for the uses by +/// transparent ops. +#[operation_trait] +pub trait Transparent { + #[verifier] + fn has_no_results(op: &Operation, context: &Context) -> Result<(), Report> { + if op.results().is_empty() { + Ok(()) + } else { + Err(context + .diagnostics() + .diagnostic(Severity::Error) + .with_message(::alloc::format!("invalid operation {}", op.name())) + .with_primary_label(op.span(), "expected operation to have no results") + .with_help( + "this operator implements 'Transparent', which requires it to have no results", + ) + .into_report()) + } + } + + #[verifier] + fn has_no_more_than_one_operand(op: &Operation, context: &Context) -> Result<(), Report> { + if op.num_operands() > 1 { + Err(context + .diagnostics() + .diagnostic(Severity::Error) + .with_message(::alloc::format!("invalid operation {}", op.name())) + .with_primary_label( + op.span(), + "expected operation to have no more than one operand", + ) + .with_help( + "this operator implements 'Transparent', which requires it to have an arity < \ + 2", + ) + .into_report()) + } else { + Ok(()) + } + } +} diff --git a/hir/src/ir/value.rs b/hir/src/ir/value.rs index d09352e2d..49dd6431a 100644 --- a/hir/src/ir/value.rs +++ b/hir/src/ir/value.rs @@ -10,7 +10,7 @@ pub use self::{ stack::StackOperand, }; use super::*; -use crate::{DynHash, DynPartialEq, PartialEqable, any::AsAny, interner}; +use crate::{DynHash, DynPartialEq, PartialEqable, any::AsAny, interner, traits::Transparent}; /// A unique identifier for a [Value] in the IR #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -175,6 +175,13 @@ pub trait Value: self.iter_uses() .any(|user| user.owner.parent().is_some_and(|blk| !BlockRef::ptr_eq(&blk, block))) } + /// Returns true if this value has at least one user that is not a debug info op + fn has_real_uses(&self) -> bool { + // The value is used so long as at least one using op is not Transparent + self.uses() + .iter() + .any(|user| !user.owner.borrow().implements::()) + } /// Replace all uses of `self` with `replacement` fn replace_all_uses_with(&mut self, mut replacement: ValueRef) { let mut cursor = self.uses_mut().front_mut(); diff --git a/hir/src/lib.rs b/hir/src/lib.rs index d9d8d2789..1aee97a7c 100644 --- a/hir/src/lib.rs +++ b/hir/src/lib.rs @@ -26,6 +26,7 @@ #![feature(extend_one)] #![feature(extend_one_unchecked)] #![feature(iter_advance_by)] +#![feature(iter_intersperse)] #![feature(iter_next_chunk)] #![feature(iter_collect_into)] #![feature(trusted_len)] @@ -83,10 +84,7 @@ pub use midenc_session::diagnostics; pub use self::{ attributes::{ Attribute, AttributeName, AttributeRef, AttributeRegistration, AttributeValue, - DICompileUnit, DICompileUnitAttr, DIExpression, DIExpressionAttr, DIExpressionOp, - DILocalVariable, DILocalVariableAttr, DISubprogram, DISubprogramAttr, NamedAttribute, - NamedAttributeList, decode_frame_base_local_index, decode_frame_base_local_offset, - encode_frame_base_local_index, encode_frame_base_local_offset, + NamedAttribute, NamedAttributeList, }, dialects::builtin::attributes::{Location, Overflow, Visibility, version}, direction::{Backward, Direction, Forward}, diff --git a/hir/src/patterns/rewriter.rs b/hir/src/patterns/rewriter.rs index 2738890bb..925509494 100644 --- a/hir/src/patterns/rewriter.rs +++ b/hir/src/patterns/rewriter.rs @@ -6,10 +6,11 @@ use smallvec::SmallVec; use crate::{ BlockRef, Builder, Context, InsertionGuard, Listener, ListenerType, OpBuilder, OpOperandImpl, - OperationRef, PostOrderBlockIter, ProgramPoint, RegionRef, Report, SourceSpan, Usable, + OperationRef, PostOrderBlockIter, ProgramPoint, RegionRef, Report, SourceSpan, Usable, Value, ValueRef, formatter::{DisplayOptional, DisplayValues}, patterns::Pattern, + traits::Transparent, }; /// A [Rewriter] is a [Builder] extended with additional functionality that is of primary use when @@ -51,7 +52,24 @@ pub trait Rewriter: Builder + RewriterListener { /// This method erases an operation that is known to have no uses. fn erase_op(&mut self, mut op: OperationRef) { - assert!(!op.borrow().is_used(), "expected op to have no uses"); + // Assert `op` has no real uses, and erase any transparent users as they are now dead + { + let op = op.borrow(); + for result in op.results().iter() { + let result = result.borrow(); + for user in result.iter_uses() { + log::info!(target: "erase_op", "{}", user.owner.borrow()); + } + assert!(!result.has_real_uses(), "expected op to have no real uses"); + // If there are remaining uses, they must be transparent, so remove them + for user in result.iter_uses() { + let owner = user.owner; + drop(user); + assert!(owner.borrow().implements::()); + self.erase_op(owner); + } + } + } // If no listener is attached, the op can be dropped all at once. if !self.has_listener() { diff --git a/tests/integration-network/src/mockchain/basic_wallet.rs b/tests/integration-network/src/mockchain/basic_wallet.rs index b55b62897..46f10b5cd 100644 --- a/tests/integration-network/src/mockchain/basic_wallet.rs +++ b/tests/integration-network/src/mockchain/basic_wallet.rs @@ -107,7 +107,7 @@ pub fn test_basic_wallet_p2id() { chain.build_tx_context(alice_id, &[p2id_note_mint.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, consume_tx_context_builder); expect!["3216"].assert_eq(prologue_cycles(&tx_measurements)); - expect!["20094"].assert_eq(note_cycles(&tx_measurements, p2id_note_mint.id())); + expect!["20086"].assert_eq(note_cycles(&tx_measurements, p2id_note_mint.id())); eprintln!("\n=== Checking Alice's account has the minted asset ==="); let alice_account = chain.committed_account(alice_id).unwrap(); @@ -127,12 +127,12 @@ pub fn test_basic_wallet_p2id() { &mut note_rng, ); let tx_measurements = execute_tx(&mut chain, alice_tx_context_builder); - expect!["25009"].assert_eq(tx_script_processing_cycles(&tx_measurements)); + expect!["25007"].assert_eq(tx_script_processing_cycles(&tx_measurements)); eprintln!("\n=== Step 4: Bob consumes p2id note ==="); let consume_tx_context_builder = chain.build_tx_context(bob_id, &[bob_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, consume_tx_context_builder); - expect!["20094"].assert_eq(note_cycles(&tx_measurements, bob_note.id())); + expect!["20086"].assert_eq(note_cycles(&tx_measurements, bob_note.id())); eprintln!("\n=== Checking Bob's account has the transferred asset ==="); let bob_account = chain.committed_account(bob_id).unwrap(); @@ -257,7 +257,7 @@ pub fn test_basic_wallet_p2ide() { let consume_tx_context_builder = chain.build_tx_context(bob_id, &[p2ide_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, consume_tx_context_builder); - expect!["20569"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); + expect!["20561"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); // Step 5: verify balances let bob_account = chain.committed_account(bob_id).unwrap(); @@ -382,7 +382,7 @@ pub fn test_basic_wallet_p2ide_reclaim() { let reclaim_tx_context_builder = chain.build_tx_context(alice_id, &[p2ide_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, reclaim_tx_context_builder); - expect!["21582"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); + expect!["21574"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); // Step 5: verify Alice has her original amount back let alice_account = chain.committed_account(alice_id).unwrap(); diff --git a/tests/integration/expected/debug_variable_locations.hir b/tests/integration/expected/debug_variable_locations.hir index 9828234f9..1b02a71c4 100644 --- a/tests/integration/expected/debug_variable_locations.hir +++ b/tests/integration/expected/debug_variable_locations.hir @@ -4,10 +4,10 @@ builtin.component private @root_ns:root@1.0.0 { hir.store_local %0 <{ local = #builtin.local_variable<0, i32> }> : (i32); %2 = arith.constant 0 : i32; hir.store_local %2 <{ local = #builtin.local_variable<1, i32> }> : (i32); - "debuginfo.debug_value"(%2) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : i32 -> (); + di.debug_value %2 <{ variable = #di.variable<{ name = "local1", file = "unknown", line = 0, ty = i32 }>, expression = #di.expression<[DW_OP_WASM_local(1)]> }> : (i32); %3 = arith.constant 0 : i32; hir.store_local %3 <{ local = #builtin.local_variable<2, i32> }> : (i32); - "debuginfo.debug_value"(%3) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : i32 -> (); + di.debug_value %3 <{ variable = #di.variable<{ name = "local2", file = "unknown", line = 0, ty = i32 }>, expression = #di.expression<[DW_OP_WASM_local(2)]> }> : (i32); cf.br ^block8; ^block7(%1: i32): @@ -29,12 +29,12 @@ builtin.component private @root_ns:root@1.0.0 { %16 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; %17 = arith.add %15, %16 <{ overflow = #builtin.overflow }>; hir.store_local %17 <{ local = #builtin.local_variable<2, i32> }> : (i32); - "debuginfo.debug_value"(%17) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : i32 -> (); + di.debug_value %17 <{ variable = #di.variable<{ name = "local2", file = "unknown", line = 0, ty = i32 }>, expression = #di.expression<[DW_OP_WASM_local(2)]> }> : (i32); %18 = hir.load_local <{ local = #builtin.local_variable<1, i32> }>; %19 = arith.constant 1 : i32; %20 = arith.add %18, %19 <{ overflow = #builtin.overflow }>; hir.store_local %20 <{ local = #builtin.local_variable<1, i32> }> : (i32); - "debuginfo.debug_value"(%20) <{ variable = #builtin.di_local_variable, expression = #builtin.di_expression }> : i32 -> (); + di.debug_value %20 <{ variable = #di.variable<{ name = "local1", file = "unknown", line = 0, ty = i32 }>, expression = #di.expression<[DW_OP_WASM_local(1)]> }> : (i32); cf.br ^block8; ^block11: %14 = hir.load_local <{ local = #builtin.local_variable<2, i32> }>; diff --git a/tests/integration/src/rust_masm_tests/examples.rs b/tests/integration/src/rust_masm_tests/examples.rs index 4fc975c54..2a95e7b68 100644 --- a/tests/integration/src/rust_masm_tests/examples.rs +++ b/tests/integration/src/rust_masm_tests/examples.rs @@ -329,7 +329,7 @@ fn basic_wallet_and_p2id() { CompilerTest::rust_source_cargo_miden("../../examples/basic-wallet", config.clone(), []); let account_package = account_test.compile_package(); assert!(account_package.is_library(), "expected library"); - expect!["36630"].assert_eq(stripped_mast_size_str(&account_package)); + expect!["35596"].assert_eq(stripped_mast_size_str(&account_package)); let mut tx_script_test = CompilerTest::rust_source_cargo_miden( "../../examples/basic-wallet-tx-script", @@ -338,19 +338,19 @@ fn basic_wallet_and_p2id() { ); let tx_script_package = tx_script_test.compile_package(); assert!(tx_script_package.is_program(), "expected program"); - expect!["56999"].assert_eq(stripped_mast_size_str(&tx_script_package)); + expect!["55376"].assert_eq(stripped_mast_size_str(&tx_script_package)); let mut p2id_test = CompilerTest::rust_source_cargo_miden("../../examples/p2id-note", config.clone(), []); let note_package = p2id_test.compile_package(); assert!(note_package.is_library(), "expected library"); - expect!["55262"].assert_eq(stripped_mast_size_str(¬e_package)); + expect!["52965"].assert_eq(stripped_mast_size_str(¬e_package)); let mut p2ide_test = CompilerTest::rust_source_cargo_miden("../../examples/p2ide-note", config, []); let p2ide_package = p2ide_test.compile_package(); assert!(p2ide_package.is_library(), "expected library"); - expect!["61528"].assert_eq(stripped_mast_size_str(&p2ide_package)); + expect!["59086"].assert_eq(stripped_mast_size_str(&p2ide_package)); } #[test] From 90ebb547cc7bfc54de6cb65028524bb4558d0797 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Fri, 1 May 2026 13:08:44 -0400 Subject: [PATCH 29/32] chore: rebase on latest toolchain and dependency versions --- frontend/wasm/src/module/debug_info.rs | 14 ++--- midenc-compile/src/stages/assemble.rs | 53 +------------------ .../src/mockchain/basic_wallet.rs | 10 ++-- .../src/mockchain/counter_contract.rs | 2 +- .../src/mockchain/counter_contract_no_auth.rs | 2 +- .../mockchain/counter_contract_rust_auth.rs | 2 +- .../src/rust_masm_tests/examples.rs | 8 +-- 7 files changed, 18 insertions(+), 73 deletions(-) diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs index 5a924ca9d..8bda224ca 100644 --- a/frontend/wasm/src/module/debug_info.rs +++ b/frontend/wasm/src/module/debug_info.rs @@ -417,7 +417,7 @@ fn build_location_schedule(locals: &[Option]) -> Vec data, Ok(None) => break, Err(err) => { @@ -481,8 +481,6 @@ fn collect_dwarf_local_data( break; } }; - let (delta, entry) = next; - let _ = delta; // we don't need depth deltas explicitly. if entry.tag() == gimli::DW_TAG_subprogram { let Some(info) = @@ -540,8 +538,7 @@ fn resolve_subprogram_target>( let mut high_pc = None; let mut frame_base_global = None; - let mut attrs = entry.attrs(); - while let Ok(Some(attr)) = attrs.next() { + for attr in entry.attrs() { match attr.name() { gimli::DW_AT_name => { if let Ok(raw) = dwarf.attr_string(unit, attr.value()) @@ -724,7 +721,7 @@ fn walk_variable_nodes>( fn decode_variable_entry>( dwarf: &gimli::Dwarf, unit: &gimli::Unit, - entry: &gimli::DebuggingInformationEntry<'_, '_, R>, + entry: &gimli::DebuggingInformationEntry, low_pc: u64, high_pc: Option, frame_base_global: Option, @@ -736,8 +733,7 @@ fn decode_variable_entry>( let mut decl_line = None; let mut decl_column = None; - let mut attrs = entry.attrs(); - while let Some(attr) = attrs.next()? { + for attr in entry.attrs() { match attr.name() { gimli::DW_AT_name => { if let Ok(raw) = dwarf.attr_string(unit, attr.value()) diff --git a/midenc-compile/src/stages/assemble.rs b/midenc-compile/src/stages/assemble.rs index eed1ad7b7..21d499832 100644 --- a/midenc-compile/src/stages/assemble.rs +++ b/midenc-compile/src/stages/assemble.rs @@ -1,9 +1,4 @@ -use alloc::{string::ToString, vec::Vec}; - -use miden_mast_package::{ - Dependency, Package, PackageManifest, Section, SectionId, TargetType, Version, -}; -use midenc_session::Session; +use miden_mast_package::Package; use super::*; @@ -57,49 +52,3 @@ impl Stage for AssembleStage { } } } - -fn build_package( - artifact: midenc_codegen_masm::AssemblyArtifact, - outputs: &CodegenOutput, - session: &Session, -) -> Package { - let name = session.name.clone().into(); - - let mut dependencies = Vec::new(); - for (link_lib, lib) in session.options.link_libraries.iter().zip(outputs.link_libraries.iter()) - { - let dependency = Dependency { - name: link_lib.name.to_string().into(), - kind: TargetType::Library, - // proper version will be implemented in https://github.com/0xMiden/compiler/issues/1069 - version: Version::new(0, 0, 0), - digest: *lib.digest(), - }; - dependencies.push(dependency); - } - - let kind = artifact.kind(); - let mast = artifact.into_mast(); - let manifest = PackageManifest::from_library(&mast) - .with_dependencies(dependencies) - .expect("package dependencies should be unique"); - - let account_component_metadata_bytes = outputs.account_component_metadata_bytes.clone(); - - let mut sections = Vec::new(); - - if let Some(bytes) = account_component_metadata_bytes { - sections.push(Section::new(SectionId::ACCOUNT_COMPONENT_METADATA, bytes)); - } - - Package { - name, - // proper version will be implemented in https://github.com/0xMiden/compiler/issues/1068 - version: Version::new(0, 0, 0), - description: None, - kind, - mast: mast.into(), - manifest, - sections, - } -} diff --git a/tests/integration-network/src/mockchain/basic_wallet.rs b/tests/integration-network/src/mockchain/basic_wallet.rs index 46f10b5cd..125dc5144 100644 --- a/tests/integration-network/src/mockchain/basic_wallet.rs +++ b/tests/integration-network/src/mockchain/basic_wallet.rs @@ -107,7 +107,7 @@ pub fn test_basic_wallet_p2id() { chain.build_tx_context(alice_id, &[p2id_note_mint.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, consume_tx_context_builder); expect!["3216"].assert_eq(prologue_cycles(&tx_measurements)); - expect!["20086"].assert_eq(note_cycles(&tx_measurements, p2id_note_mint.id())); + expect!["20072"].assert_eq(note_cycles(&tx_measurements, p2id_note_mint.id())); eprintln!("\n=== Checking Alice's account has the minted asset ==="); let alice_account = chain.committed_account(alice_id).unwrap(); @@ -127,12 +127,12 @@ pub fn test_basic_wallet_p2id() { &mut note_rng, ); let tx_measurements = execute_tx(&mut chain, alice_tx_context_builder); - expect!["25007"].assert_eq(tx_script_processing_cycles(&tx_measurements)); + expect!["26223"].assert_eq(tx_script_processing_cycles(&tx_measurements)); eprintln!("\n=== Step 4: Bob consumes p2id note ==="); let consume_tx_context_builder = chain.build_tx_context(bob_id, &[bob_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, consume_tx_context_builder); - expect!["20086"].assert_eq(note_cycles(&tx_measurements, bob_note.id())); + expect!["20072"].assert_eq(note_cycles(&tx_measurements, bob_note.id())); eprintln!("\n=== Checking Bob's account has the transferred asset ==="); let bob_account = chain.committed_account(bob_id).unwrap(); @@ -257,7 +257,7 @@ pub fn test_basic_wallet_p2ide() { let consume_tx_context_builder = chain.build_tx_context(bob_id, &[p2ide_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, consume_tx_context_builder); - expect!["20561"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); + expect!["21212"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); // Step 5: verify balances let bob_account = chain.committed_account(bob_id).unwrap(); @@ -382,7 +382,7 @@ pub fn test_basic_wallet_p2ide_reclaim() { let reclaim_tx_context_builder = chain.build_tx_context(alice_id, &[p2ide_note.id()], &[]).unwrap(); let tx_measurements = execute_tx(&mut chain, reclaim_tx_context_builder); - expect!["21574"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); + expect!["22872"].assert_eq(note_cycles(&tx_measurements, p2ide_note.id())); // Step 5: verify Alice has her original amount back let alice_account = chain.committed_account(alice_id).unwrap(); diff --git a/tests/integration-network/src/mockchain/counter_contract.rs b/tests/integration-network/src/mockchain/counter_contract.rs index c5bf35447..a92fb08f8 100644 --- a/tests/integration-network/src/mockchain/counter_contract.rs +++ b/tests/integration-network/src/mockchain/counter_contract.rs @@ -68,7 +68,7 @@ pub fn test_counter_contract() { .build_tx_context(counter_account.clone(), &[counter_note.id()], &[]) .unwrap(); let tx_measurements = execute_tx(&mut chain, tx_context_builder); - expect!["24294"].assert_eq(note_cycles(&tx_measurements, counter_note.id())); + expect!["49505"].assert_eq(note_cycles(&tx_measurements, counter_note.id())); // The counter contract storage value should be 2 after the note is consumed (incremented by 1). assert_counter_storage( diff --git a/tests/integration-network/src/mockchain/counter_contract_no_auth.rs b/tests/integration-network/src/mockchain/counter_contract_no_auth.rs index 00909db45..ff2066b23 100644 --- a/tests/integration-network/src/mockchain/counter_contract_no_auth.rs +++ b/tests/integration-network/src/mockchain/counter_contract_no_auth.rs @@ -105,7 +105,7 @@ pub fn test_counter_contract_no_auth() { .unwrap(); let tx_measurements = execute_tx(&mut chain, tx_context_builder); expect!["1803"].assert_eq(auth_procedure_cycles(&tx_measurements)); - expect!["24294"].assert_eq(note_cycles(&tx_measurements, counter_note.id())); + expect!["49505"].assert_eq(note_cycles(&tx_measurements, counter_note.id())); // The counter contract storage value should be 2 after the note is consumed assert_counter_storage( diff --git a/tests/integration-network/src/mockchain/counter_contract_rust_auth.rs b/tests/integration-network/src/mockchain/counter_contract_rust_auth.rs index eef14a217..ef2919d9a 100644 --- a/tests/integration-network/src/mockchain/counter_contract_rust_auth.rs +++ b/tests/integration-network/src/mockchain/counter_contract_rust_auth.rs @@ -72,7 +72,7 @@ pub fn test_counter_contract_rust_auth_blocks_unauthorized_note_creation() { let tx_context = tx_context_builder.build().unwrap(); let executed_tx = block_on(tx_context.execute()).expect("authorized client should be able to create a note"); - expect!["82697"].assert_eq(auth_procedure_cycles(executed_tx.measurements())); + expect!["86895"].assert_eq(auth_procedure_cycles(executed_tx.measurements())); assert_eq!(executed_tx.output_notes().num_notes(), 1); assert_eq!(executed_tx.output_notes().get_note(0).id(), own_note.id()); diff --git a/tests/integration/src/rust_masm_tests/examples.rs b/tests/integration/src/rust_masm_tests/examples.rs index 2a95e7b68..e1323e357 100644 --- a/tests/integration/src/rust_masm_tests/examples.rs +++ b/tests/integration/src/rust_masm_tests/examples.rs @@ -329,7 +329,7 @@ fn basic_wallet_and_p2id() { CompilerTest::rust_source_cargo_miden("../../examples/basic-wallet", config.clone(), []); let account_package = account_test.compile_package(); assert!(account_package.is_library(), "expected library"); - expect!["35596"].assert_eq(stripped_mast_size_str(&account_package)); + expect!["36014"].assert_eq(stripped_mast_size_str(&account_package)); let mut tx_script_test = CompilerTest::rust_source_cargo_miden( "../../examples/basic-wallet-tx-script", @@ -338,19 +338,19 @@ fn basic_wallet_and_p2id() { ); let tx_script_package = tx_script_test.compile_package(); assert!(tx_script_package.is_program(), "expected program"); - expect!["55376"].assert_eq(stripped_mast_size_str(&tx_script_package)); + expect!["56555"].assert_eq(stripped_mast_size_str(&tx_script_package)); let mut p2id_test = CompilerTest::rust_source_cargo_miden("../../examples/p2id-note", config.clone(), []); let note_package = p2id_test.compile_package(); assert!(note_package.is_library(), "expected library"); - expect!["52965"].assert_eq(stripped_mast_size_str(¬e_package)); + expect!["53190"].assert_eq(stripped_mast_size_str(¬e_package)); let mut p2ide_test = CompilerTest::rust_source_cargo_miden("../../examples/p2ide-note", config, []); let p2ide_package = p2ide_test.compile_package(); assert!(p2ide_package.is_library(), "expected library"); - expect!["59086"].assert_eq(stripped_mast_size_str(&p2ide_package)); + expect!["62781"].assert_eq(stripped_mast_size_str(&p2ide_package)); } #[test] From ce54f0f62a4516e34932231e9e9ff81155b0df11 Mon Sep 17 00:00:00 2001 From: djole Date: Mon, 4 May 2026 12:29:07 +0200 Subject: [PATCH 30/32] fix(hir): preserve debug variable ops through rewrites --- .editorconfig | 3 ++ hir-transform/src/sink.rs | 56 ++++++++++++++++------------- hir/src/ir/region/transforms/dce.rs | 11 +++--- hir/src/ir/value.rs | 4 +-- 4 files changed, 42 insertions(+), 32 deletions(-) diff --git a/.editorconfig b/.editorconfig index 8740487fd..0a171c17a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -10,6 +10,9 @@ charset = utf-8 trim_trailing_whitespace = true insert_final_newline = true +[*.rs] +max_line_length = 100 + [*.yml] ident_size = 2 diff --git a/hir-transform/src/sink.rs b/hir-transform/src/sink.rs index 716c0130a..a8a6bd8a4 100644 --- a/hir-transform/src/sink.rs +++ b/hir-transform/src/sink.rs @@ -13,25 +13,33 @@ use midenc_hir::{ /// Check whether `operation` is the sole _non-transparent_ user of `value`. /// -/// Ops that implement `Transparent` are excluded, because they are purely informational and their -/// uses are not considered for purposes of computing liveness. +/// Transparent users that do not carry debug effects are excluded, because they are purely +/// informational and their uses are not considered for purposes of computing liveness. +/// +/// Debug-effect ops are also Transparent, but they must keep their operands alive until codegen +/// turns them into VM debug decorators. fn is_sole_non_transparent_user(value: &dyn Value, operation: OperationRef) -> bool { - value - .iter_uses() - .all(|user| user.owner == operation && !user.owner.borrow().implements::()) + value.iter_uses().all(|user| { + let owner = user.owner.borrow(); + user.owner == operation + || (owner.implements::() + && owner.as_trait::().is_none()) + }) } -/// Erase all debug info operations that reference the given value. +/// Erase all non-debug transparent operations that reference the given value. /// /// This is used before erasing a defining op whose result is only kept alive by -/// debug uses. The debug ops are simply removed; the codegen emitter is also -/// hardened to skip orphaned debug ops, so this is a best-effort cleanup. +/// non-debug transparent uses. fn erase_transparent_users(value: ValueRef) { - let debug_ops: SmallVec<[OperationRef; 2]> = { + let transparent_ops: SmallVec<[OperationRef; 2]> = { let v = value.borrow(); v.iter_uses() .filter_map(|user| { - if user.owner.borrow().implements::() { + let owner = user.owner.borrow(); + if owner.implements::() + && owner.as_trait::().is_none() + { Some(user.owner) } else { None @@ -39,7 +47,7 @@ fn erase_transparent_users(value: ValueRef) { }) .collect() }; - for mut op in debug_ops { + for mut op in transparent_ops { op.borrow_mut().erase(); } } @@ -254,8 +262,8 @@ impl Pass for SinkOperandDefs { for operand in op.operands().iter().rev() { let value = operand.borrow(); let value = value.value(); - // Exclude debug info uses when determining whether this is the sole user — - // transparent ops are observational and should not prevent sinking. + // Exclude non-debug transparent uses when determining whether this is the sole + // user. let is_sole_user = is_sole_non_transparent_user(&*value, operation); let Some(defining_op) = value.get_defining_op() else { @@ -316,8 +324,7 @@ impl Pass for SinkOperandDefs { continue; } - // If this operation is unused (or only has debug info uses), remove it now if it has no - // side effects. + // If this operation is unused, remove it now if it has no side effects. // // NOTE: We explicitly DO NOT remove transparent ops here, unless we're removing the // defining op of the transparent operand @@ -325,7 +332,7 @@ impl Pass for SinkOperandDefs { if !has_real_uses && op.would_be_trivially_dead() && erased.insert(operation) { log::debug!(target: Self::NAME, "erasing unused, effect-free, non-terminator op {op}"); drop(op); - // Erase any remaining debug uses before erasing the defining op + // Erase any remaining non-debug transparent uses before erasing the defining op. for result in operation.borrow().results().iter() { erase_transparent_users(result.borrow().as_value_ref()); } @@ -362,8 +369,8 @@ impl Pass for SinkOperandDefs { operand.borrow_mut().set(replacement); changed = PostPassStatus::Changed; - // If no other non-debug uses of this value remain, then remove - // the original operation, as it is now dead. + // If no real uses of this value remain, then remove the original + // operation, as it is now dead. if !operand_value.borrow().has_real_uses() { log::trace!(target: Self::NAME, " {operand_value} is no longer used, erasing definition"); erase_transparent_users(operand_value); @@ -376,7 +383,7 @@ impl Pass for SinkOperandDefs { } let value = operand_value.borrow(); - // Exclude debug info uses when determining sole-user status. + // Exclude non-debug transparent uses when determining sole-user status. let is_sole_user = is_sole_non_transparent_user(&*value, operation); let Some(mut defining_op) = value.get_defining_op() else { @@ -552,8 +559,8 @@ where /// users of the given op are dominated by the entry block of the region, and thus the operation /// can be sunk into the region. /// - /// Transparent uses are excluded because they are observational and should not prevent - /// control-flow sinking. + /// Non-debug transparent uses are excluded because they are observational and should not + /// prevent control-flow sinking. fn all_users_dominated_by(&self, op: &Operation, region: &Region) -> bool { assert!( region.find_ancestor_op(op.as_operation_ref()).is_none(), @@ -563,9 +570,10 @@ where op.results().iter().all(|result| { let result = result.borrow(); result.iter_uses().all(|user| { - // Skip debug info users — they are observational and should not - // prevent sinking. - if user.owner.borrow().implements::() { + let owner = user.owner.borrow(); + if owner.implements::() + && owner.as_trait::().is_none() + { return true; } // The user is dominated by the region if its containing block is dominated diff --git a/hir/src/ir/region/transforms/dce.rs b/hir/src/ir/region/transforms/dce.rs index 2d76d1f6a..cc232e694 100644 --- a/hir/src/ir/region/transforms/dce.rs +++ b/hir/src/ir/region/transforms/dce.rs @@ -82,12 +82,11 @@ impl LiveMap { // than to the terminator op itself, a terminator op can't e.g. "print" the value of a // successor operand. // - // ## Debug Info + // ## Transparent Metadata // - // The debug info dialect introduces operations that "use" SSA values, but if the use would - // otherwise be dead if the op didn't exist, then we want to treat both the debug op and - // the value use as dead, so that debug info ops do not interfere with dead-code - // elimination. + // Some Transparent operations "use" SSA values only as metadata. If the use would + // otherwise be dead if the op didn't exist, then we treat both the metadata op and the + // value use as dead, so that these ops do not interfere with dead-code elimination. let owner_ref = &user.owner; let owner = owner_ref.borrow(); if owner.implements::() @@ -98,7 +97,7 @@ impl LiveMap { } // If the owning op is transparent, then its value uses are not considered when determining - // liveness + // liveness. owner.implements::() } diff --git a/hir/src/ir/value.rs b/hir/src/ir/value.rs index 49dd6431a..ad5bf9378 100644 --- a/hir/src/ir/value.rs +++ b/hir/src/ir/value.rs @@ -175,9 +175,9 @@ pub trait Value: self.iter_uses() .any(|user| user.owner.parent().is_some_and(|blk| !BlockRef::ptr_eq(&blk, block))) } - /// Returns true if this value has at least one user that is not a debug info op + /// Returns true if this value has at least one non-transparent user that should keep it alive fn has_real_uses(&self) -> bool { - // The value is used so long as at least one using op is not Transparent + // The value is used so long as at least one using op is not Transparent. self.uses() .iter() .any(|user| !user.owner.borrow().implements::()) From 71a95b6a7db48a58e8eeaf98d31f28e971a860f4 Mon Sep 17 00:00:00 2001 From: djole Date: Mon, 4 May 2026 13:06:40 +0200 Subject: [PATCH 31/32] fix: emit Rust debug values in integration tests --- frontend/wasm/src/module/debug_info.rs | 34 +++++++++++++++++++ tests/integration/src/compiler_test.rs | 21 ++++++++++++ .../integration/src/rust_masm_tests/debug.rs | 21 +++++++++--- 3 files changed, 71 insertions(+), 5 deletions(-) diff --git a/frontend/wasm/src/module/debug_info.rs b/frontend/wasm/src/module/debug_info.rs index 8bda224ca..541e1a8fd 100644 --- a/frontend/wasm/src/module/debug_info.rs +++ b/frontend/wasm/src/module/debug_info.rs @@ -97,6 +97,7 @@ pub struct FunctionDebugInfo { #[derive(Default, Clone)] struct DwarfLocalData { name: Option, + decl_file: Option, locations: Vec, decl_line: Option, decl_column: Option, @@ -289,6 +290,9 @@ fn build_local_debug_info( } let dwarf_info = dwarf_entry.cloned(); if let Some(info) = dwarf_info.as_ref() { + if let Some(file) = info.decl_file { + attr.file = file; + } if let Some(line) = info.decl_line && line != 0 { @@ -340,6 +344,9 @@ fn build_local_debug_info( } let dwarf_info = dwarf_entry.cloned(); if let Some(info) = dwarf_info.as_ref() { + if let Some(file) = info.decl_file { + attr.file = file; + } if let Some(line) = info.decl_line && line != 0 { @@ -375,6 +382,9 @@ fn build_local_debug_info( for fb_var in fb_vars { let name = fb_var.name.unwrap_or_else(|| Symbol::intern("?")); let mut attr = Variable::new(name, subprogram.file, subprogram.line, subprogram.column); + if let Some(file) = fb_var.decl_file { + attr.file = file; + } if let Some(line) = fb_var.decl_line.filter(|l| *l != 0) { attr.line = line; } @@ -686,6 +696,7 @@ fn walk_variable_nodes>( let local_map = results.entry(func_index).or_default(); let entry = local_map.entry(local_index).or_insert_with(DwarfLocalData::default); entry.name = entry.name.or(data.name); + entry.decl_file = entry.decl_file.or(data.decl_file); entry.decl_line = entry.decl_line.or(data.decl_line); entry.decl_column = entry.decl_column.or(data.decl_column); if !data.locations.is_empty() { @@ -729,6 +740,7 @@ fn decode_variable_entry>( frame_base_vars: &mut Vec, ) -> gimli::Result> { let mut name_symbol = None; + let mut decl_file = None; let mut location_attr = None; let mut decl_line = None; let mut decl_column = None; @@ -743,6 +755,11 @@ fn decode_variable_entry>( } } gimli::DW_AT_location => location_attr = Some(attr.value()), + gimli::DW_AT_decl_file => { + if let Some(file_index) = attr.udata_value() { + decl_file = resolve_decl_file(dwarf, unit, file_index); + } + } gimli::DW_AT_decl_line => { if let Some(line) = attr.udata_value() { decl_line = Some(line as u32); @@ -783,6 +800,7 @@ fn decode_variable_entry>( }); let data = DwarfLocalData { name: name_symbol, + decl_file, locations, decl_line, decl_column, @@ -799,6 +817,7 @@ fn decode_variable_entry>( }); let data = DwarfLocalData { name: name_symbol, + decl_file, locations, decl_line, decl_column, @@ -849,6 +868,7 @@ fn decode_variable_entry>( { let data = DwarfLocalData { name: name_symbol, + decl_file, locations, decl_line, decl_column, @@ -858,6 +878,7 @@ fn decode_variable_entry>( // FrameBase-only location list variable let data = DwarfLocalData { name: name_symbol, + decl_file, locations, decl_line, decl_column, @@ -873,6 +894,19 @@ fn decode_variable_entry>( Ok(None) } +fn resolve_decl_file>( + dwarf: &gimli::Dwarf, + unit: &gimli::Unit, + file_index: u64, +) -> Option { + let line_program = unit.line_program.as_ref()?; + let header = line_program.header(); + let file = header.file(file_index)?; + let raw = dwarf.attr_string(unit, file.path_name()).ok()?; + let path = raw.to_string_lossy().ok()?; + Some(Symbol::intern(path.as_ref())) +} + fn decode_storage_from_expression>( expr: &gimli::Expression, unit: &gimli::Unit, diff --git a/tests/integration/src/compiler_test.rs b/tests/integration/src/compiler_test.rs index 696198b2d..75ea3cc24 100644 --- a/tests/integration/src/compiler_test.rs +++ b/tests/integration/src/compiler_test.rs @@ -420,6 +420,7 @@ impl CompilerTestBuilder { fs::remove_dir_all(&working_dir).unwrap(); } fs::create_dir_all(&working_dir).unwrap(); + let working_dir = working_dir.canonicalize().unwrap_or(working_dir); // Prepare inputs let basename = working_dir.join(config.name.as_ref()); @@ -429,9 +430,29 @@ impl CompilerTestBuilder { // Output is the same name as the input, just with a different extension let output_file = basename.with_extension("wasm"); + // `RUSTFLAGS` is a Cargo convention; direct `rustc` invocations + // need these flags passed as argv. `panic=immediate-abort` only + // works when rebuilding core, so keep it Cargo-only. + let mut rustc_flags = Vec::with_capacity(self.rustflags.len()); + let mut flags = self.rustflags.iter().map(|flag| flag.as_ref()); + while let Some(flag) = flags.next() { + if flag == "-C" + && let Some(value) = flags.next() + { + if value != "panic=immediate-abort" { + rustc_flags.extend([flag, value]); + } + continue; + } + rustc_flags.push(flag); + } + let output = command + .arg("--remap-path-prefix") + .arg(format!("{}=.", working_dir.display())) .args(["-C", "opt-level=z"]) // optimize for size .args(["-C", "target-feature=+wide-arithmetic"]) + .args(rustc_flags) .arg("--target") .arg(config.target.as_ref()) .arg("-o") diff --git a/tests/integration/src/rust_masm_tests/debug.rs b/tests/integration/src/rust_masm_tests/debug.rs index 0999e420e..683db2912 100644 --- a/tests/integration/src/rust_masm_tests/debug.rs +++ b/tests/integration/src/rust_masm_tests/debug.rs @@ -4,6 +4,17 @@ use midenc_expect_test::expect_file; use crate::{CompilerTestBuilder, testing::setup}; +fn debug_rustflags() -> [Cow<'static, str>; 6] { + [ + Cow::Borrowed("-C"), + Cow::Borrowed("debuginfo=2"), + Cow::Borrowed("-C"), + Cow::Borrowed("opt-level=0"), + Cow::Borrowed("-C"), + Cow::Borrowed("overflow-checks=off"), + ] +} + #[test] fn variable_locations_schedule() { setup::enable_compiler_instrumentation(); @@ -21,7 +32,7 @@ fn variable_locations_schedule() { "#; let mut builder = CompilerTestBuilder::rust_fn_body(source, []); - builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + builder.with_rustflags(debug_rustflags()); let mut test = builder.build(); test.expect_ir_unoptimized(expect_file!["../../expected/debug_variable_locations.hir"]); } @@ -37,7 +48,7 @@ fn debug_simple_params() { "#; let mut builder = CompilerTestBuilder::rust_fn_body(source, []); - builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + builder.with_rustflags(debug_rustflags()); let mut test = builder.build(); test.expect_ir_unoptimized(expect_file!["../../expected/debug_simple_params.hir"]); } @@ -54,7 +65,7 @@ fn debug_conditional_assignment() { "#; let mut builder = CompilerTestBuilder::rust_fn_body(source, []); - builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + builder.with_rustflags(debug_rustflags()); let mut test = builder.build(); test.expect_ir_unoptimized(expect_file!["../../expected/debug_conditional_assignment.hir"]); } @@ -73,7 +84,7 @@ fn debug_multiple_locals() { "#; let mut builder = CompilerTestBuilder::rust_fn_body(source, []); - builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + builder.with_rustflags(debug_rustflags()); let mut test = builder.build(); test.expect_ir_unoptimized(expect_file!["../../expected/debug_multiple_locals.hir"]); } @@ -99,7 +110,7 @@ fn debug_nested_loops() { "#; let mut builder = CompilerTestBuilder::rust_fn_body(source, []); - builder.with_rustflags([Cow::Borrowed("-C"), Cow::Borrowed("debuginfo=2")]); + builder.with_rustflags(debug_rustflags()); let mut test = builder.build(); test.expect_ir_unoptimized(expect_file!["../../expected/debug_nested_loops.hir"]); } From 90206ab928ff8a2a80f47c25896e57256ce67a8c Mon Sep 17 00:00:00 2001 From: djole Date: Mon, 4 May 2026 16:52:30 +0200 Subject: [PATCH 32/32] fix: use di.debug_declare for variables without SSA value anchor --- codegen/masm/src/lower/lowering.rs | 175 ++++++++---------- .../wasm/src/module/function_builder_ext.rs | 56 +++--- hir-transform/src/sink.rs | 41 ++-- hir/src/dialects/debuginfo.rs | 4 +- hir/src/dialects/debuginfo/builders.rs | 10 +- hir/src/dialects/debuginfo/ops.rs | 22 +-- 6 files changed, 148 insertions(+), 160 deletions(-) diff --git a/codegen/masm/src/lower/lowering.rs b/codegen/masm/src/lower/lowering.rs index 236706593..8b68477ec 100644 --- a/codegen/masm/src/lower/lowering.rs +++ b/codegen/masm/src/lower/lowering.rs @@ -1262,6 +1262,74 @@ impl HirLowering for arith::Split { } } +fn debug_var_location_from_expression( + expr: &midenc_hir::dialects::debuginfo::attributes::Expression, + value: Option, + emitter: &BlockEmitter<'_>, +) -> Option { + use miden_core::{Felt, operations::DebugVarLocation, serde::Serializable}; + use midenc_hir::dialects::debuginfo::attributes::ExpressionOp; + + match expr.operations.as_slice() { + [] => value + .as_ref() + .and_then(|value| emitter.stack.find(value)) + .map(|pos| emitter.stack.effective_index(pos) as u8) + .map(DebugVarLocation::Stack), + [first] => match first { + ExpressionOp::WasmStack(offset) => Some(DebugVarLocation::Stack(*offset as u8)), + ExpressionOp::WasmLocal(idx) => { + // WASM locals are always stored in memory via FMP in Miden. + // Store raw WASM local index; the FMP offset will be computed later in + // MasmFunctionBuilder::build() when num_locals is known. + i16::try_from(*idx).ok().map(DebugVarLocation::Local) + } + ExpressionOp::WasmGlobal(_) | ExpressionOp::Deref => value + .as_ref() + .and_then(|value| emitter.stack.find(value)) + .map(|pos| emitter.stack.effective_index(pos) as u8) + .map(DebugVarLocation::Stack), + ExpressionOp::ConstU64(val) => Some(DebugVarLocation::Const(Felt::new(*val))), + ExpressionOp::ConstS64(val) => Some(DebugVarLocation::Const(Felt::new(*val as u64))), + ExpressionOp::FrameBase { + global_index, + byte_offset, + } => Some(DebugVarLocation::FrameBase { + global_index: *global_index, + byte_offset: *byte_offset, + }), + _ => value + .as_ref() + .and_then(|value| emitter.stack.find(value)) + .map(|pos| emitter.stack.effective_index(pos) as u8) + .map(DebugVarLocation::Stack), + }, + _ => Some(DebugVarLocation::Expression(expr.to_bytes())), + } +} + +fn apply_debug_var_metadata( + debug_var: &mut miden_core::operations::DebugVarInfo, + var: &midenc_hir::dialects::debuginfo::attributes::Variable, +) { + // Set arg_index if this is a parameter + if let Some(arg_index) = var.arg_index { + debug_var.set_arg_index(arg_index + 1); // Convert to 1-based + } + + // Set source location + if let Some(line) = core::num::NonZeroU32::new(var.line) { + use miden_assembly::debuginfo::{ColumnNumber, FileLineCol, LineNumber, Uri}; + let uri = Uri::new(var.file.as_str()); + let file_line_col = FileLineCol::new( + uri, + LineNumber::new(line.get()).unwrap_or_default(), + var.column.and_then(ColumnNumber::new).unwrap_or_default(), + ); + debug_var.set_location(file_line_col); + } +} + impl HirLowering for debuginfo::DebugValue { fn schedule_operands(&self, _emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { // Debug value operations are purely observational — they do not consume their @@ -1276,10 +1344,7 @@ impl HirLowering for debuginfo::DebugValue { } fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { - use miden_core::{ - Felt, - operations::{DebugVarInfo, DebugVarLocation}, - }; + use miden_core::operations::DebugVarInfo; use midenc_hir::dialects::debuginfo::attributes::ExpressionOp; // Get the variable info @@ -1305,74 +1370,17 @@ impl HirLowering for debuginfo::DebugValue { // Value has been dropped and we have no other location info, skip return Ok(()); } - // Resolve the runtime location. Returns None when the location cannot - // be determined (value dropped and no expression info), in which case - // we skip emitting the decorator entirely rather than emitting a - // placeholder — the debugger would have nothing useful to show. - let value_location = match expr.operations.as_slice() { - [] => emitter - .stack - .find(&value) - .map(|pos| emitter.stack.effective_index(pos) as u8) - .map(DebugVarLocation::Stack), - [first] => match first { - ExpressionOp::WasmStack(offset) => Some(DebugVarLocation::Stack(*offset as u8)), - ExpressionOp::WasmLocal(idx) => { - // WASM locals are always stored in memory via FMP in Miden. - // Store raw WASM local index; the FMP offset will be computed later in - // MasmFunctionBuilder::build() when num_locals is known. - i16::try_from(*idx).ok().map(DebugVarLocation::Local) - } - ExpressionOp::WasmGlobal(_) | ExpressionOp::Deref => emitter - .stack - .find(&value) - .map(|pos| emitter.stack.effective_index(pos) as u8) - .map(DebugVarLocation::Stack), - ExpressionOp::ConstU64(val) => Some(DebugVarLocation::Const(Felt::new(*val))), - ExpressionOp::ConstS64(val) => { - Some(DebugVarLocation::Const(Felt::new(*val as u64))) - } - ExpressionOp::FrameBase { - global_index, - byte_offset, - } => Some(DebugVarLocation::FrameBase { - global_index: *global_index, - byte_offset: *byte_offset, - }), - _ => emitter - .stack - .find(&value) - .map(|pos| emitter.stack.effective_index(pos) as u8) - .map(DebugVarLocation::Stack), - }, - _ => { - use miden_core::serde::Serializable; - Some(DebugVarLocation::Expression(expr.as_value().to_bytes())) - } - }; + // Resolve the runtime location. Returns None when the location cannot be determined, in + // which case we skip the decorator rather than emitting a placeholder. + let value_location = + debug_var_location_from_expression(expr.as_value(), Some(value), emitter); let Some(value_location) = value_location else { return Ok(()); }; let mut debug_var = DebugVarInfo::new(var.name.to_string(), value_location); - - // Set arg_index if this is a parameter - if let Some(arg_index) = var.arg_index { - debug_var.set_arg_index(arg_index + 1); // Convert to 1-based - } - - // Set source location - if let Some(line) = core::num::NonZeroU32::new(var.line) { - use miden_assembly::debuginfo::{ColumnNumber, FileLineCol, LineNumber, Uri}; - let uri = Uri::new(var.file.as_str()); - let file_line_col = FileLineCol::new( - uri, - LineNumber::new(line.get()).unwrap_or_default(), - var.column.and_then(ColumnNumber::new).unwrap_or_default(), - ); - debug_var.set_location(file_line_col); - } + apply_debug_var_metadata(&mut debug_var, var.as_value()); // Emit the instruction let inst = masm::Instruction::DebugVar(debug_var); @@ -1384,49 +1392,28 @@ impl HirLowering for debuginfo::DebugValue { impl HirLowering for debuginfo::DebugDeclare { fn schedule_operands(&self, _emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { - // Debug value operations are purely observational — they do not consume their - // operand from the stack. Skip operand scheduling entirely; the emit() method - // will look up the value's current stack position (if any) on its own. Ok(()) } fn required_operands(&self) -> ValueRange<'_, 4> { - // No operands need to be scheduled on the stack for debug ops. ValueRange::Empty } fn emit(&self, emitter: &mut BlockEmitter<'_>) -> Result<(), Report> { - use miden_core::operations::{DebugVarInfo, DebugVarLocation}; + use miden_core::operations::DebugVarInfo; let var = self.variable(); - let address = self.address().as_value_ref(); + let expr = self.expression(); - let Some(index) = emitter.stack.find(&address) else { - // Do nothing if the stack value is no longer live at this point + let Some(value_location) = + debug_var_location_from_expression(expr.as_value(), None, emitter) + else { return Ok(()); }; - let mut debug_var = - DebugVarInfo::new(var.name.to_string(), DebugVarLocation::Stack(index as u8)); - - // Set arg_index if this is a parameter - if let Some(arg_index) = var.arg_index { - debug_var.set_arg_index(arg_index + 1); // Convert to 1-based - } - - // Set source location - if let Some(line) = core::num::NonZeroU32::new(var.line) { - use miden_assembly::debuginfo::{ColumnNumber, FileLineCol, LineNumber, Uri}; - let uri = Uri::new(var.file.as_str()); - let file_line_col = FileLineCol::new( - uri, - LineNumber::new(line.get()).unwrap_or_default(), - var.column.and_then(ColumnNumber::new).unwrap_or_default(), - ); - debug_var.set_location(file_line_col); - } + let mut debug_var = DebugVarInfo::new(var.name.to_string(), value_location); + apply_debug_var_metadata(&mut debug_var, var.as_value()); - // Emit the instruction let inst = masm::Instruction::DebugVar(debug_var); emitter.emit_op(masm::Op::Inst(Span::new(self.span(), inst))); diff --git a/frontend/wasm/src/module/function_builder_ext.rs b/frontend/wasm/src/module/function_builder_ext.rs index ff32256a9..4887046a9 100644 --- a/frontend/wasm/src/module/function_builder_ext.rs +++ b/frontend/wasm/src/module/function_builder_ext.rs @@ -290,31 +290,21 @@ impl FunctionBuilderExt<'_, B> { } let var = Variable::new(entry.var_index); - let value = match self.try_use_var(var) { - Ok(v) => v, - Err(_) => { - if is_frame_base { - // FrameBase-only variables have no WASM local, so no SSA value - // exists for them. The di.value op requires an SSA operand, so we attach an - // existing parameter value as an anchor. The MASM lowering ignores this operand - // when the DIExpression contains FrameBase — the location is fully described by - // the expression. - if let Some((_, v)) = self.param_values.first() { - let anchor = *v; - self.def_var(var, anchor); - anchor - } else { - warn!( - "cannot track FrameBase variable (index {}): no SSA value available \ - (function has no parameters)", - entry.var_index - ); - return; - } - } else { + let is_defined = self.defined_vars.contains(&(entry.var_index as u32)); + if !is_defined && is_frame_base { + self.emit_scheduled_dbg_declare(entry, span); + return; + } + + let value = if is_defined { + match self.try_use_var(var) { + Ok(v) => v, + Err(_) => { return; } } + } else { + return; }; // Create expression from the scheduled location @@ -343,6 +333,28 @@ impl FunctionBuilderExt<'_, B> { } } + fn emit_scheduled_dbg_declare(&mut self, entry: LocationScheduleEntry, span: SourceSpan) { + if entry.storage.is_empty() { + return; + } + + let Some(info) = self.debug_info.as_ref() else { + return; + }; + let idx = entry.var_index; + let attr_opt = { + let info = info.borrow(); + info.local_attr(idx).cloned() + }; + let Some(attr) = attr_opt else { + return; + }; + + if let Err(err) = DIBuilder::builder_mut(self).debug_declare(attr, entry.storage, span) { + warn!("failed to emit scheduled dbg.declare for local {idx}: {err:?}"); + } + } + pub fn name(&self) -> Ident { *self.inner.func.borrow().get_name() } diff --git a/hir-transform/src/sink.rs b/hir-transform/src/sink.rs index a8a6bd8a4..d73b58550 100644 --- a/hir-transform/src/sink.rs +++ b/hir-transform/src/sink.rs @@ -13,33 +13,24 @@ use midenc_hir::{ /// Check whether `operation` is the sole _non-transparent_ user of `value`. /// -/// Transparent users that do not carry debug effects are excluded, because they are purely -/// informational and their uses are not considered for purposes of computing liveness. -/// -/// Debug-effect ops are also Transparent, but they must keep their operands alive until codegen -/// turns them into VM debug decorators. +/// Transparent users are excluded, because they are purely informational and their uses are not +/// considered for purposes of computing liveness. fn is_sole_non_transparent_user(value: &dyn Value, operation: OperationRef) -> bool { - value.iter_uses().all(|user| { - let owner = user.owner.borrow(); - user.owner == operation - || (owner.implements::() - && owner.as_trait::().is_none()) - }) + value + .iter_uses() + .all(|user| user.owner == operation || user.owner.borrow().implements::()) } -/// Erase all non-debug transparent operations that reference the given value. +/// Erase all transparent operations that reference the given value. /// /// This is used before erasing a defining op whose result is only kept alive by -/// non-debug transparent uses. +/// transparent uses. fn erase_transparent_users(value: ValueRef) { let transparent_ops: SmallVec<[OperationRef; 2]> = { let v = value.borrow(); v.iter_uses() .filter_map(|user| { - let owner = user.owner.borrow(); - if owner.implements::() - && owner.as_trait::().is_none() - { + if user.owner.borrow().implements::() { Some(user.owner) } else { None @@ -262,8 +253,7 @@ impl Pass for SinkOperandDefs { for operand in op.operands().iter().rev() { let value = operand.borrow(); let value = value.value(); - // Exclude non-debug transparent uses when determining whether this is the sole - // user. + // Exclude transparent uses when determining whether this is the sole user. let is_sole_user = is_sole_non_transparent_user(&*value, operation); let Some(defining_op) = value.get_defining_op() else { @@ -332,7 +322,7 @@ impl Pass for SinkOperandDefs { if !has_real_uses && op.would_be_trivially_dead() && erased.insert(operation) { log::debug!(target: Self::NAME, "erasing unused, effect-free, non-terminator op {op}"); drop(op); - // Erase any remaining non-debug transparent uses before erasing the defining op. + // Erase any remaining transparent uses before erasing the defining op. for result in operation.borrow().results().iter() { erase_transparent_users(result.borrow().as_value_ref()); } @@ -383,7 +373,7 @@ impl Pass for SinkOperandDefs { } let value = operand_value.borrow(); - // Exclude non-debug transparent uses when determining sole-user status. + // Exclude transparent uses when determining sole-user status. let is_sole_user = is_sole_non_transparent_user(&*value, operation); let Some(mut defining_op) = value.get_defining_op() else { @@ -559,8 +549,8 @@ where /// users of the given op are dominated by the entry block of the region, and thus the operation /// can be sunk into the region. /// - /// Non-debug transparent uses are excluded because they are observational and should not - /// prevent control-flow sinking. + /// Transparent uses are excluded because they are observational and should not prevent + /// control-flow sinking. fn all_users_dominated_by(&self, op: &Operation, region: &Region) -> bool { assert!( region.find_ancestor_op(op.as_operation_ref()).is_none(), @@ -570,10 +560,7 @@ where op.results().iter().all(|result| { let result = result.borrow(); result.iter_uses().all(|user| { - let owner = user.owner.borrow(); - if owner.implements::() - && owner.as_trait::().is_none() - { + if user.owner.borrow().implements::() { return true; } // The user is dominated by the region if its containing block is dominated diff --git a/hir/src/dialects/debuginfo.rs b/hir/src/dialects/debuginfo.rs index 5c993a472..8da67672f 100644 --- a/hir/src/dialects/debuginfo.rs +++ b/hir/src/dialects/debuginfo.rs @@ -21,8 +21,8 @@ //! - **`di.value`** — Records the current value of a source variable. Uses an SSA value operand, //! so deleting the value without updating debug uses is a hard error. //! -//! - **`di.declare`** — Records the storage address of a source variable. Similarly uses an SSA -//! operand for the address. +//! - **`di.debug_declare`** — Records the storage location of a source variable using a debug +//! expression. //! //! - **`di.kill`** — Marks a variable as dead, giving the debugger precise lifetime boundaries //! instead of scope-based heuristics. diff --git a/hir/src/dialects/debuginfo/builders.rs b/hir/src/dialects/debuginfo/builders.rs index 165edb87d..bf300c12a 100644 --- a/hir/src/dialects/debuginfo/builders.rs +++ b/hir/src/dialects/debuginfo/builders.rs @@ -19,6 +19,9 @@ use super::ops::*; /// // With a custom expression (e.g., value needs a deref): /// builder.debug_value_with_expr(ssa_value, variable_attr, Some(expr), span)?; /// +/// // Emit a debug declaration for storage described by an expression: +/// builder.debug_declare(variable_attr, expr, span)?; +/// /// // Mark a variable as dead: /// builder.debug_kill(variable_attr, span)?; /// ``` @@ -54,15 +57,16 @@ pub trait DIBuilder<'f, B: ?Sized + Builder> { op_builder(value, variable, expr) } - /// Emit a `di.declare` operation that records the storage address of a source-level variable. + /// Emit a `di.debug_declare` operation that records the storage location of a source-level + /// variable. fn debug_declare( &mut self, - address: ValueRef, variable: Variable, + expression: Expression, span: SourceSpan, ) -> Result { let op_builder = self.builder_mut().create::(span); - op_builder(address, variable) + op_builder(variable, expression) } /// Emit a `di.kill` operation that marks a variable as dead. diff --git a/hir/src/dialects/debuginfo/ops.rs b/hir/src/dialects/debuginfo/ops.rs index 908fd6a6a..72ceb2beb 100644 --- a/hir/src/dialects/debuginfo/ops.rs +++ b/hir/src/dialects/debuginfo/ops.rs @@ -18,13 +18,14 @@ pub type DebugKillRef = UnsafeIntrusiveEntityRef; /// Records the current value of a source-level variable. /// -/// This is the core operation of the debuginfo dialect. It creates a first-class SSA use of the +/// This is the core operation of the debuginfo dialect. It records a transparent SSA use of the /// value, which means: /// /// - If a transform deletes the value without updating its debug uses, that's a hard error (not a /// silent drop like with metadata-based approaches). /// - Standard MLIR-style use-def tracking automatically enforces this — transforms must call -/// `replace_all_uses_with` or explicitly handle debug uses. +/// `replace_all_uses_with`, explicitly handle debug uses, or drop the debug op when its referent +/// is dead. /// /// The `variable` attribute identifies the source variable, and the `expression` attribute /// describes how to recover the source-level value from the IR value (e.g., "dereference this @@ -61,12 +62,9 @@ impl EffectOpInterface for DebugValue { /// Records the storage location (address) of a source-level variable. /// -/// Unlike [DebugValue] which tracks values, [DebugDeclare] tracks the address where a variable is -/// stored. This is useful for variables that live in memory (e.g., stack allocations) where the -/// address itself doesn't change, but the value at that address may be updated through stores. -/// -/// Like `DebugValue`, this creates a real SSA use of the address value, preventing silent drops -/// during transforms. +/// Unlike [DebugValue] which tracks values, [DebugDeclare] tracks the location where a variable is +/// stored. This is useful for variables that live in memory (e.g., stack slots) where the address is +/// described by a debug expression such as `DW_OP_fbreg`. #[derive(EffectOpInterface, OpParser, OpPrinter)] #[operation( dialect = DebugInfoDialect, @@ -74,12 +72,12 @@ impl EffectOpInterface for DebugValue { implements(DebugEffectOpInterface, MemoryEffectOpInterface, OpPrinter) )] pub struct DebugDeclare { - #[operand] - #[effects(DebugEffect(DebugEffect::Read))] - address: AnyType, #[attr] #[effects(DebugEffect(DebugEffect::Allocate))] variable: VariableAttr, + #[attr] + #[effects(DebugEffect(DebugEffect::Write))] + expression: ExpressionAttr, } impl EffectOpInterface for DebugDeclare { @@ -95,7 +93,7 @@ impl EffectOpInterface for DebugDeclare { /// scope-based heuristics which can be inaccurate after optimizations. /// /// After a `debuginfo.kill`, the debugger should report the variable as "optimized out" or "not -/// available" until the next `di.value` or `di.declare` for the same variable. +/// available" until the next `di.value` or `di.debug_declare` for the same variable. /// /// # Example ///