From 52a4a5c373d08f385b71502f222aa97942aa6372 Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Mon, 15 Sep 2025 10:18:46 +0900
Subject: [PATCH 01/11] =?UTF-8?q?CI:=20AVX2/AVX=E2=80=91512/NEON=20?=
 =?UTF-8?q?=E3=81=A7=E3=81=AE=E3=83=93=E3=83=AB=E3=83=89=E3=83=BB=E3=83=86?=
 =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0=20(vibe-kanban=20?=
 =?UTF-8?q?4c682469)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

branche name: feat/multi_arch_ci

## 背景
- 本ワークスペースの SIMD 最適化（AVX2/AVX‑512/NEON）コードパスの劣化検知が必要
。
- GitHub ホストランナーは AVX‑512 を保証せず、ARM64 も環境により未提供のため、実行戦略を分離。
  - AVX2: x86_64 ランナーでビルド＋テストを実行。
  - AVX‑512: x86_64 ランナーでは未対応の可能性が高く、ビルドのみで担保。
  - NEON: ARM64 ランナー（例: `ubuntu-24.04-arm64` または `self-hosted, Linux, ARM64`）でビルド＋テストを実行。

## スコープ
- `.github/workflows/rust.yml` の Build & Test を CPU 機能マトリクス化。
- x86_64/AVX2 は実行テストまで、x86_64/AVX‑512 はビルドのみ、ARM64/NEON は実行テストまで。
- 既存の Clippy/Fmt ジョブは維持。
- すべて nightly、`Swatinem/rust-cache@v2`、`RUST_BACKTRACE=1` を適用。

## 受け入れ条件
- Push/PR で以下が動作:
  - x64/AVX2: リリースビルド＋テストが成功。
  - x64/AVX‑512: リリースビルドが成功（テストはスキップ）。
  - ARM64/NEON: リリースビルド＋テストが成功（ARM64 ランナーが利用可能な場合）。
- Clippy/Fmt は従来通り、警告・未整形で CI が失敗。
- 2 回目以降の実行でビルドキャッシュが効く。

## 注意点
- AVX‑512 はホストランナーで未対応の可能性が高く、テスト実行は SIGILL 回避のためスキップ。
- ARM64 ランナーが未提供の場合は NEON 行を一時的にコメントアウト、または `self-hosted` ラベル運用に切替。
- 実行テストは `is_x86_feature_detected!` や `cfg(target_feature)` で条件化し、誤実行を防止。

## 提案する rust.yml（差し替え案）
```yaml
name: Rust CI

on:
  push:
    branches: [ "main" ]
  pull_request:
    branches: [ "main" ]
  workflow_dispatch:

permissions:
  contents: read

concurrency:
  group: rust-ci-${{ github.ref }}
  cancel-in-progress: true

env:
  CARGO_TERM_COLOR: always
  RUST_BACKTRACE: 1

jobs:
  build-test-matrix:
    name: Build & Test (${{ matrix.name }})
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - name: x64-avx2
            os: ubuntu-latest
            rustflags: -C target-cpu=haswell
            run_tests: true
          - name: x64-avx512-build
            os: ubuntu-latest
            rustflags: -C target-cpu=skylake-avx512
            run_tests: false  # build only to avoid SIGILL
          - name: arm64-neon
            # 利用可能な場合は GitHub ホスト ARM64 ランナーに置換
            # 例: ubuntu-24.04-arm64（組織設定依存）
            # 未提供時は self-hosted を使用: [self-hosted, Linux, ARM64]
            os: ubuntu-24.04-arm64
            rustflags: -C target-feature=+neon
            run_tests: true
    steps:
      - uses: actions/checkout@v4
      - name: Install nightly toolchain
        uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: nightly
      - name: Cache cargo build
        uses: Swatinem/rust-cache@v2
      - name: Build (release)
        run: cargo build --workspace --release --verbose
        env:
          RUSTFLAGS: ${{ matrix.rustflags }}
      - name: Test (release)
        if: ${{ matrix.run_tests == true }}
        run: cargo test --workspace --release --verbose
        env:
          RUSTFLAGS: ${{ matrix.rustflags }}
  clippy:
    runs-on: ubuntu-latest
    name: Clippy
    steps:
      - uses: actions/checkout@v4
      - name: Install nightly toolchain
        uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: nightly
          components: clippy
      - name: Cache cargo build
        uses: Swatinem/rust-cache@v2
      - name: Clippy (deny warnings)
        run: cargo clippy --workspace --all-targets --all-features -- -D warning
s

  fmt:
    runs-on: ubuntu-latest
    name: Rustfmt
    steps:
      - uses: actions/checkout@v4
      - name: Install nightly toolchain
        uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
          toolchain: nightly
          components: rustfmt
      - name: Cache cargo build
        uses: Swatinem/rust-cache@v2
      - name: Check formatting
        run: cargo fmt --all -- --check
```
---
 .github/workflows/rust.yml | 91 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100644 .github/workflows/rust.yml

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
new file mode 100644
index 00000000..94d0a018
--- /dev/null
+++ b/.github/workflows/rust.yml
@@ -0,0 +1,91 @@
+name: Rust CI
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: rust-ci-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  CARGO_TERM_COLOR: always
+  RUST_BACKTRACE: 1
+
+jobs:
+  build-test-matrix:
+    name: Build & Test (${{ matrix.name }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: x64-avx2
+            os: ubuntu-latest
+            rustflags: -C target-cpu=haswell
+            run_tests: true
+          - name: x64-avx512-build
+            os: ubuntu-latest
+            rustflags: -C target-cpu=skylake-avx512
+            run_tests: false # build only to avoid SIGILL
+          - name: arm64-neon
+            # If available, use GitHub-hosted ARM64 runner
+            # e.g. ubuntu-24.04-arm64 (org setting dependent)
+            # If unavailable, consider: [self-hosted, Linux, ARM64]
+            os: ubuntu-24.04-arm64
+            rustflags: -C target-feature=+neon
+            run_tests: true
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install nightly toolchain
+        uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          toolchain: nightly
+      - name: Cache cargo build
+        uses: Swatinem/rust-cache@v2
+      - name: Build (release)
+        run: cargo build --workspace --release --verbose
+        env:
+          RUSTFLAGS: ${{ matrix.rustflags }}
+      - name: Test (release)
+        if: ${{ matrix.run_tests == true }}
+        run: cargo test --workspace --release --verbose
+        env:
+          RUSTFLAGS: ${{ matrix.rustflags }}
+
+  clippy:
+    runs-on: ubuntu-latest
+    name: Clippy
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install nightly toolchain
+        uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          toolchain: nightly
+          components: clippy
+      - name: Cache cargo build
+        uses: Swatinem/rust-cache@v2
+      - name: Clippy (deny warnings)
+        run: cargo clippy --workspace --all-targets --all-features -- -D warnings
+
+  fmt:
+    runs-on: ubuntu-latest
+    name: Rustfmt
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install nightly toolchain
+        uses: actions-rust-lang/setup-rust-toolchain@v1
+        with:
+          toolchain: nightly
+          components: rustfmt
+      - name: Cache cargo build
+        uses: Swatinem/rust-cache@v2
+      - name: Check formatting
+        run: cargo fmt --all -- --check
+

From b2200a6cc0939def3320509a2c5ea3e19c117332 Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Mon, 15 Sep 2025 10:34:40 +0900
Subject: [PATCH 02/11] Implemented VM-side event collection and event-driven
 witness building.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

What I changed
- VM events: Added `VmPoseidon16Event`, `VmPoseidon24Event`, `VmDotProductEvent`, `VmMultilinearEvalEvent` in `crates/vm/src/lib.rs`. Events store cycle, addresses, sizes, and values (inputs/outputs).
- ExecutionResult: Extended with event vectors to export collected events to consumers:
  - `vm_poseidon16_events`, `vm_poseidon24_events`, `vm_dot_product_events`, `vm_multilinear_eval_events`
- Runner integration: In `crates/vm/src/runner.rs`
  - Recorded events during final execution only (`final_execution == true`) right after each precompile executes.
  - Captured input and output values for reproducibility.
  - Added debug assertions to ensure call counts match event counts in final execution.
- Trace construction: In `crates/zk_vm/zk_vm_trace/src/execution_trace.rs`
  - Removed the memory rescanning logic for precompile witness reconstruction.
  - Built `WitnessPoseidon*`, `WitnessDotProduct`, and `WitnessMultilinearEval` directly from the VM events.
  - Kept Poseidon padding and dummy row logic unchanged.

Notes
- First pass does not record events; final pass does. Debug asserts guard count consistency.
- Poseidon tables retain 2^k padding and dummy rows exactly as before.
- No fallback to rescanning remains; witness creation is event-only.

Validation
- Ran `cargo test`: all tests passed.
- Existing trace columns generation remains intact and still uses instruction encodings and memory reads; only precompile witness collection switched to events.

Next steps
- Do you want me to add unit tests for event correctness (cycle, addresses, lengths, values) and padding invariants?
- If you’d like, I can run a release test and a quick timing comparison to check trace build time improvements.
---
 crates/vm/src/lib.rs                          |  47 +++++++
 crates/vm/src/runner.rs                       |  92 +++++++++++-
 .../zk_vm/zk_vm_trace/src/execution_trace.rs  | 132 ++++++------------
 3 files changed, 183 insertions(+), 88 deletions(-)

diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs
index a71ea6f4..f2363d59 100644
--- a/crates/vm/src/lib.rs
+++ b/crates/vm/src/lib.rs
@@ -23,3 +23,50 @@ pub const ONE_VEC_PTR: usize = 2; // convention (vectorized pointer of size 1, p
 pub const POSEIDON_16_NULL_HASH_PTR: usize = 3; // convention (vectorized pointer of size 2, = the 16 elements of poseidon_16(0))
 pub const POSEIDON_24_NULL_HASH_PTR: usize = 5; // convention (vectorized pointer of size 1, = the last 8 elements of poseidon_24(0))
 pub const PUBLIC_INPUT_START: usize = 6 * 8; // normal pointer
+
+// VM-side events collected during final execution only.
+// These events are designed to be self-contained (store values),
+// allowing zk_vm_trace to construct Witness* without rescanning memory.
+
+#[derive(Debug, Clone)]
+pub struct VmDotProductEvent {
+    pub cycle: usize,
+    pub addr_0: usize,   // normal pointer
+    pub addr_1: usize,   // normal pointer
+    pub addr_res: usize, // normal pointer
+    pub len: usize,
+    pub slice_0: Vec<EF>,
+    pub slice_1: Vec<EF>,
+    pub res: EF,
+}
+
+#[derive(Debug, Clone)]
+pub struct VmMultilinearEvalEvent {
+    pub cycle: usize,
+    pub addr_coeffs: usize, // vectorized pointer, of size 8*2^n_vars
+    pub addr_point: usize,  // vectorized pointer, of size n_vars
+    pub addr_res: usize,    // vectorized pointer
+    pub n_vars: usize,
+    pub point: Vec<EF>,
+    pub res: EF,
+}
+
+#[derive(Debug, Clone)]
+pub struct VmPoseidon16Event {
+    pub cycle: usize,
+    pub addr_input_a: usize, // vectorized pointer (size 1)
+    pub addr_input_b: usize, // vectorized pointer (size 1)
+    pub addr_output: usize,  // vectorized pointer (size 2)
+    pub input: [F; 16],
+    pub output: [F; 16],
+}
+
+#[derive(Debug, Clone)]
+pub struct VmPoseidon24Event {
+    pub cycle: usize,
+    pub addr_input_a: usize, // vectorized pointer (size 2)
+    pub addr_input_b: usize, // vectorized pointer (size 1)
+    pub addr_output: usize,  // vectorized pointer (size 1)
+    pub input: [F; 24],
+    pub output: [F; 8], // last 8 elements of the output
+}
diff --git a/crates/vm/src/runner.rs b/crates/vm/src/runner.rs
index 8a51d5d1..c343eab3 100644
--- a/crates/vm/src/runner.rs
+++ b/crates/vm/src/runner.rs
@@ -146,6 +146,10 @@ pub struct ExecutionResult {
     pub memory: Memory,
     pub pcs: Vec<usize>,
     pub fps: Vec<usize>,
+    pub vm_poseidon16_events: Vec<VmPoseidon16Event>,
+    pub vm_poseidon24_events: Vec<VmPoseidon24Event>,
+    pub vm_dot_product_events: Vec<VmDotProductEvent>,
+    pub vm_multilinear_eval_events: Vec<VmMultilinearEvalEvent>,
 }
 
 pub fn build_public_memory(public_input: &[F]) -> Vec<F> {
@@ -221,6 +225,12 @@ fn execute_bytecode_helper(
     let mut pcs = Vec::new();
     let mut fps = Vec::new();
 
+    // Events collected only in final execution
+    let mut vm_poseidon16_events: Vec<VmPoseidon16Event> = Vec::new();
+    let mut vm_poseidon24_events: Vec<VmPoseidon24Event> = Vec::new();
+    let mut vm_dot_product_events: Vec<VmDotProductEvent> = Vec::new();
+    let mut vm_multilinear_eval_events: Vec<VmMultilinearEvalEvent> = Vec::new();
+
     let mut add_counts = 0;
     let mut mul_counts = 0;
     let mut deref_counts = 0;
@@ -419,6 +429,8 @@ fn execute_bytecode_helper(
                 input[..VECTOR_LEN].copy_from_slice(&arg0);
                 input[VECTOR_LEN..].copy_from_slice(&arg1);
 
+                // Keep a copy of the input before permutation for event recording
+                let input_before = input;
                 poseidon_16.permute_mut(&mut input);
 
                 let res0: [F; VECTOR_LEN] = input[..VECTOR_LEN].try_into().unwrap();
@@ -427,6 +439,26 @@ fn execute_bytecode_helper(
                 memory.set_vector(res_value.to_usize(), res0)?;
                 memory.set_vector(1 + res_value.to_usize(), res1)?;
 
+                if final_execution {
+                    let cycle = pcs.len() - 1;
+                    let addr_input_a = a_value.to_usize();
+                    let addr_input_b = b_value.to_usize();
+                    let addr_output = res_value.to_usize();
+                    // Build output by concatenating the two result vectors we wrote to memory
+                    let output: [F; 16] = [res0.as_slice(), res1.as_slice()]
+                        .concat()
+                        .try_into()
+                        .unwrap();
+                    vm_poseidon16_events.push(VmPoseidon16Event {
+                        cycle,
+                        addr_input_a,
+                        addr_input_b,
+                        addr_output,
+                        input: input_before,
+                        output,
+                    });
+                }
+
                 pc += 1;
             }
             Instruction::Poseidon2_24 { arg_a, arg_b, res } => {
@@ -445,12 +477,29 @@ fn execute_bytecode_helper(
                 input[VECTOR_LEN..2 * VECTOR_LEN].copy_from_slice(&arg1);
                 input[2 * VECTOR_LEN..].copy_from_slice(&arg2);
 
+                // Keep a copy of the input before permutation for event recording
+                let input_before = input;
                 poseidon_24.permute_mut(&mut input);
 
                 let res: [F; VECTOR_LEN] = input[2 * VECTOR_LEN..].try_into().unwrap();
 
                 memory.set_vector(res_value.to_usize(), res)?;
 
+                if final_execution {
+                    let cycle = pcs.len() - 1;
+                    let addr_input_a = a_value.to_usize();
+                    let addr_input_b = b_value.to_usize();
+                    let addr_output = res_value.to_usize();
+                    vm_poseidon24_events.push(VmPoseidon24Event {
+                        cycle,
+                        addr_input_a,
+                        addr_input_b,
+                        addr_output,
+                        input: input_before,
+                        output: res,
+                    });
+                }
+
                 pc += 1;
             }
             Instruction::DotProductExtensionExtension {
@@ -468,9 +517,26 @@ fn execute_bytecode_helper(
                 let slice_0 = memory.get_continuous_slice_of_ef_elements(ptr_arg_0, *size)?;
                 let slice_1 = memory.get_continuous_slice_of_ef_elements(ptr_arg_1, *size)?;
 
-                let dot_product = dot_product::<EF, _, _>(slice_0.into_iter(), slice_1.into_iter());
+                let dot_product = dot_product::<EF, _, _>(
+                    slice_0.iter().copied(),
+                    slice_1.iter().copied(),
+                );
                 memory.set_ef_element(ptr_res, dot_product)?;
 
+                if final_execution {
+                    let cycle = pcs.len() - 1;
+                    vm_dot_product_events.push(VmDotProductEvent {
+                        cycle,
+                        addr_0: ptr_arg_0,
+                        addr_1: ptr_arg_1,
+                        addr_res: ptr_res,
+                        len: *size,
+                        slice_0: slice_0.clone(),
+                        slice_1: slice_1.clone(),
+                        res: dot_product,
+                    });
+                }
+
                 pc += 1;
             }
             Instruction::MultilinearEval {
@@ -488,9 +554,22 @@ fn execute_bytecode_helper(
                 let slice_coeffs = memory.slice(ptr_coeffs << *n_vars, n_coeffs)?;
                 let point = memory.get_continuous_slice_of_ef_elements(ptr_point, *n_vars)?;
 
-                let eval = slice_coeffs.evaluate(&MultilinearPoint(point));
+                let eval = slice_coeffs.evaluate(&MultilinearPoint(point.clone()));
                 memory.set_ef_element(ptr_res, eval)?;
 
+                if final_execution {
+                    let cycle = pcs.len() - 1;
+                    vm_multilinear_eval_events.push(VmMultilinearEvalEvent {
+                        cycle,
+                        addr_coeffs: ptr_coeffs,
+                        addr_point: ptr_point,
+                        addr_res: ptr_res,
+                        n_vars: *n_vars,
+                        point,
+                        res: eval,
+                    });
+                }
+
                 pc += 1;
             }
         }
@@ -501,6 +580,11 @@ fn execute_bytecode_helper(
     fps.push(fp);
 
     if final_execution {
+        // Ensure event counts match call counts in final execution
+        debug_assert_eq!(poseidon16_calls, vm_poseidon16_events.len());
+        debug_assert_eq!(poseidon24_calls, vm_poseidon24_events.len());
+        debug_assert_eq!(dot_product_ext_ext_calls, vm_dot_product_events.len());
+        debug_assert_eq!(multilinear_eval_calls, vm_multilinear_eval_events.len());
         if profiler {
             let report = profiling_report(instruction_history, function_locations);
             println!("\n{report}");
@@ -594,5 +678,9 @@ fn execute_bytecode_helper(
         memory,
         pcs,
         fps,
+        vm_poseidon16_events,
+        vm_poseidon24_events,
+        vm_dot_product_events,
+        vm_multilinear_eval_events,
     })
 }
diff --git a/crates/zk_vm/zk_vm_trace/src/execution_trace.rs b/crates/zk_vm/zk_vm_trace/src/execution_trace.rs
index 22bca1ef..6ff05277 100644
--- a/crates/zk_vm/zk_vm_trace/src/execution_trace.rs
+++ b/crates/zk_vm/zk_vm_trace/src/execution_trace.rs
@@ -134,92 +134,7 @@ pub fn get_execution_trace(
         trace[COL_INDEX_MEM_ADDRESS_B][cycle] = addr_b;
         trace[COL_INDEX_MEM_ADDRESS_C][cycle] = addr_c;
 
-        match instruction {
-            Instruction::Poseidon2_16 { arg_a, arg_b, res } => {
-                let addr_input_a = arg_a.read_value(memory, fp).unwrap().to_usize();
-                let addr_input_b = arg_b.read_value(memory, fp).unwrap().to_usize();
-                let addr_output = res.read_value(memory, fp).unwrap().to_usize();
-                let value_a = memory.get_vector(addr_input_a).unwrap();
-                let value_b = memory.get_vector(addr_input_b).unwrap();
-                let output = memory.get_vectorized_slice(addr_output, 2).unwrap();
-                poseidons_16.push(WitnessPoseidon16 {
-                    cycle: Some(cycle),
-                    addr_input_a,
-                    addr_input_b,
-                    addr_output,
-                    input: [value_a, value_b].concat().try_into().unwrap(),
-                    output: output.try_into().unwrap(),
-                });
-            }
-            Instruction::Poseidon2_24 { arg_a, arg_b, res } => {
-                let addr_input_a = arg_a.read_value(memory, fp).unwrap().to_usize();
-                let addr_input_b = arg_b.read_value(memory, fp).unwrap().to_usize();
-                let addr_output = res.read_value(memory, fp).unwrap().to_usize();
-                let value_a = memory.get_vectorized_slice(addr_input_a, 2).unwrap();
-                let value_b = memory.get_vector(addr_input_b).unwrap().to_vec();
-                let output = memory.get_vector(addr_output).unwrap();
-                poseidons_24.push(WitnessPoseidon24 {
-                    cycle: Some(cycle),
-                    addr_input_a,
-                    addr_input_b,
-                    addr_output,
-                    input: [value_a, value_b].concat().try_into().unwrap(),
-                    output,
-                });
-            }
-            Instruction::DotProductExtensionExtension {
-                arg0,
-                arg1,
-                res,
-                size,
-            } => {
-                let addr_0 = arg0.read_value(memory, fp).unwrap().to_usize();
-                let addr_1 = arg1.read_value(memory, fp).unwrap().to_usize();
-                let addr_res = res.read_value(memory, fp).unwrap().to_usize();
-                let slice_0 = memory
-                    .get_continuous_slice_of_ef_elements(addr_0, *size)
-                    .unwrap();
-                let slice_1 = memory
-                    .get_continuous_slice_of_ef_elements(addr_1, *size)
-                    .unwrap();
-                let res = memory.get_ef_element(addr_res).unwrap();
-                dot_products.push(WitnessDotProduct {
-                    cycle,
-                    addr_0,
-                    addr_1,
-                    addr_res,
-                    len: *size,
-                    slice_0,
-                    slice_1,
-                    res,
-                });
-            }
-            Instruction::MultilinearEval {
-                coeffs,
-                point,
-                res,
-                n_vars,
-            } => {
-                let addr_coeffs = coeffs.read_value(memory, fp).unwrap().to_usize();
-                let addr_point = point.read_value(memory, fp).unwrap().to_usize();
-                let addr_res = res.read_value(memory, fp).unwrap().to_usize();
-                let point = (0..*n_vars)
-                    .map(|i| memory.get_ef_element(addr_point + i * DIMENSION))
-                    .collect::<Result<Vec<EF>, _>>()
-                    .unwrap();
-                let res = memory.get_ef_element(addr_res).unwrap();
-                vm_multilinear_evals.push(WitnessMultilinearEval {
-                    cycle,
-                    addr_coeffs,
-                    addr_point,
-                    addr_res,
-                    n_vars: *n_vars,
-                    point,
-                    res,
-                });
-            }
-            _ => {}
-        }
+        // Precompile witness collection is now driven by VM events instead of rescanning here.
     }
 
     // repeat the last row to get to a power of two
@@ -242,6 +157,51 @@ pub fn get_execution_trace(
         F::ZERO,
     );
 
+    // Build witnesses from VM-collected events
+    for e in &execution_result.vm_poseidon16_events {
+        poseidons_16.push(WitnessPoseidon16 {
+            cycle: Some(e.cycle),
+            addr_input_a: e.addr_input_a,
+            addr_input_b: e.addr_input_b,
+            addr_output: e.addr_output,
+            input: e.input,
+            output: e.output,
+        });
+    }
+    for e in &execution_result.vm_poseidon24_events {
+        poseidons_24.push(WitnessPoseidon24 {
+            cycle: Some(e.cycle),
+            addr_input_a: e.addr_input_a,
+            addr_input_b: e.addr_input_b,
+            addr_output: e.addr_output,
+            input: e.input,
+            output: e.output,
+        });
+    }
+    for e in &execution_result.vm_dot_product_events {
+        dot_products.push(WitnessDotProduct {
+            cycle: e.cycle,
+            addr_0: e.addr_0,
+            addr_1: e.addr_1,
+            addr_res: e.addr_res,
+            len: e.len,
+            slice_0: e.slice_0.clone(),
+            slice_1: e.slice_1.clone(),
+            res: e.res,
+        });
+    }
+    for e in &execution_result.vm_multilinear_eval_events {
+        vm_multilinear_evals.push(WitnessMultilinearEval {
+            cycle: e.cycle,
+            addr_coeffs: e.addr_coeffs,
+            addr_point: e.addr_point,
+            addr_res: e.addr_res,
+            n_vars: e.n_vars,
+            point: e.point.clone(),
+            res: e.res,
+        });
+    }
+
     let n_poseidons_16 = poseidons_16.len();
     let n_poseidons_24 = poseidons_24.len();
 

From b7bdfe0b59d5ca9b4056c69720728040f03db33d Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Thu, 18 Sep 2025 10:23:26 +0900
Subject: [PATCH 03/11] Refactor dot product calculation for improved
 readability

---
 crates/lean_vm/src/runner.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/crates/lean_vm/src/runner.rs b/crates/lean_vm/src/runner.rs
index 42e3d395..3d8765b5 100644
--- a/crates/lean_vm/src/runner.rs
+++ b/crates/lean_vm/src/runner.rs
@@ -528,10 +528,8 @@ fn execute_bytecode_helper(
                 let slice_0 = memory.get_continuous_slice_of_ef_elements(ptr_arg_0, *size)?;
                 let slice_1 = memory.get_continuous_slice_of_ef_elements(ptr_arg_1, *size)?;
 
-                let dot_product = dot_product::<EF, _, _>(
-                    slice_0.iter().copied(),
-                    slice_1.iter().copied(),
-                );
+                let dot_product =
+                    dot_product::<EF, _, _>(slice_0.iter().copied(), slice_1.iter().copied());
                 memory.set_ef_element(ptr_res, dot_product)?;
 
                 if final_execution {

From 6238db777898a39b9fe3ee42ca270a18d273825b Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Thu, 18 Sep 2025 14:16:25 +0900
Subject: [PATCH 04/11] Remove unused instruction handling from execution trace
 function

---
 .../witness_generation/src/execution_trace.rs | 105 +-----------------
 1 file changed, 1 insertion(+), 104 deletions(-)

diff --git a/crates/lean_prover/witness_generation/src/execution_trace.rs b/crates/lean_prover/witness_generation/src/execution_trace.rs
index dd931e6b..cdabc2fb 100644
--- a/crates/lean_prover/witness_generation/src/execution_trace.rs
+++ b/crates/lean_prover/witness_generation/src/execution_trace.rs
@@ -5,10 +5,9 @@ use crate::{
     N_EXEC_COLUMNS, N_INSTRUCTION_COLUMNS,
 };
 use lean_vm::*;
+use p3_field::Field;
 use p3_field::PrimeCharacteristicRing;
-use p3_field::{BasedVectorSpace, Field};
 use p3_symmetric::Permutation;
-use p3_util::log2_ceil_usize;
 use rayon::prelude::*;
 use utils::{ToUsize, get_poseidon16, get_poseidon24};
 
@@ -162,108 +161,6 @@ pub fn get_execution_trace(
         trace[COL_INDEX_MEM_ADDRESS_A][cycle] = addr_a;
         trace[COL_INDEX_MEM_ADDRESS_B][cycle] = addr_b;
         trace[COL_INDEX_MEM_ADDRESS_C][cycle] = addr_c;
-
-        match instruction {
-            Instruction::Poseidon2_16 { arg_a, arg_b, res } => {
-                let addr_input_a = arg_a.read_value(memory, fp).unwrap().to_usize();
-                let addr_input_b = arg_b.read_value(memory, fp).unwrap().to_usize();
-                let addr_output = res.read_value(memory, fp).unwrap().to_usize();
-                let value_a = memory.get_vector(addr_input_a).unwrap();
-                let value_b = memory.get_vector(addr_input_b).unwrap();
-                let output = memory.get_vectorized_slice(addr_output, 2).unwrap();
-                poseidons_16.push(WitnessPoseidon16 {
-                    cycle: Some(cycle),
-                    addr_input_a,
-                    addr_input_b,
-                    addr_output,
-                    input: [value_a, value_b].concat().try_into().unwrap(),
-                    output: output.try_into().unwrap(),
-                });
-            }
-            Instruction::Poseidon2_24 { arg_a, arg_b, res } => {
-                let addr_input_a = arg_a.read_value(memory, fp).unwrap().to_usize();
-                let addr_input_b = arg_b.read_value(memory, fp).unwrap().to_usize();
-                let addr_output = res.read_value(memory, fp).unwrap().to_usize();
-                let value_a = memory.get_vectorized_slice(addr_input_a, 2).unwrap();
-                let value_b = memory.get_vector(addr_input_b).unwrap().to_vec();
-                let output = memory.get_vector(addr_output).unwrap();
-                poseidons_24.push(WitnessPoseidon24 {
-                    cycle: Some(cycle),
-                    addr_input_a,
-                    addr_input_b,
-                    addr_output,
-                    input: [value_a, value_b].concat().try_into().unwrap(),
-                    output,
-                });
-            }
-            Instruction::DotProductExtensionExtension {
-                arg0,
-                arg1,
-                res,
-                size,
-            } => {
-                let addr_0 = arg0.read_value(memory, fp).unwrap().to_usize();
-                let addr_1 = arg1.read_value(memory, fp).unwrap().to_usize();
-                let addr_res = res.read_value(memory, fp).unwrap().to_usize();
-                let slice_0 = memory
-                    .get_continuous_slice_of_ef_elements(addr_0, *size)
-                    .unwrap();
-                let slice_1 = memory
-                    .get_continuous_slice_of_ef_elements(addr_1, *size)
-                    .unwrap();
-                let res = memory.get_ef_element(addr_res).unwrap();
-                dot_products.push(WitnessDotProduct {
-                    cycle,
-                    addr_0,
-                    addr_1,
-                    addr_res,
-                    len: *size,
-                    slice_0,
-                    slice_1,
-                    res,
-                });
-            }
-            Instruction::MultilinearEval {
-                coeffs,
-                point,
-                res,
-                n_vars,
-            } => {
-                let addr_coeffs = coeffs.read_value(memory, fp).unwrap().to_usize();
-                let addr_point = point.read_value(memory, fp).unwrap().to_usize();
-                let addr_res = res.read_value(memory, fp).unwrap().to_usize();
-
-                let log_point_size = log2_ceil_usize(*n_vars * DIMENSION);
-                let point_slice = memory
-                    .slice(addr_point << log_point_size, *n_vars * DIMENSION)
-                    .unwrap();
-                for i in *n_vars * DIMENSION..(*n_vars * DIMENSION).next_power_of_two() {
-                    assert!(
-                        memory
-                            .get((addr_point << log_point_size) + i)
-                            .unwrap()
-                            .is_zero()
-                    ); // padding
-                }
-                let point = point_slice[..*n_vars * DIMENSION]
-                    .chunks_exact(DIMENSION)
-                    .map(|chunk| EF::from_basis_coefficients_slice(chunk).unwrap())
-                    .collect::<Vec<_>>();
-
-                let res = memory.get_vector(addr_res).unwrap();
-                assert!(res[DIMENSION..].iter().all(|&x| x.is_zero()));
-                vm_multilinear_evals.push(WitnessMultilinearEval {
-                    cycle,
-                    addr_coeffs,
-                    addr_point,
-                    addr_res,
-                    n_vars: *n_vars,
-                    point,
-                    res: EF::from_basis_coefficients_slice(&res[..DIMENSION]).unwrap(),
-                });
-            }
-            _ => {}
-        }
     }
 
     // repeat the last row to get to a power of two

From fdcd03fc7fded93723d808e64b6c6272d427e282 Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Thu, 18 Sep 2025 21:52:22 +0900
Subject: [PATCH 05/11] Add a blank line for improved readability in the Clippy
 job section of the Rust workflow

---
 .github/workflows/rust.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index a3d2b0d0..512ce70f 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -28,6 +28,7 @@ jobs:
   cargo-clippy:
     runs-on: ubuntu-latest
     name: Clippy
+    
     steps:
       - uses: actions/checkout@v4
       - name: Install nightly toolchain

From d96e0dc8c2b4d32bffde799bc3fc5afbfe003655 Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Fri, 19 Sep 2025 01:06:13 +0900
Subject: [PATCH 06/11] Remove unnecessary blank line in the Clippy job section
 of the Rust workflow

---
 .github/workflows/rust.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 512ce70f..ef524661 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -28,7 +28,7 @@ jobs:
   cargo-clippy:
     runs-on: ubuntu-latest
     name: Clippy
-    
+
     steps:
       - uses: actions/checkout@v4
       - name: Install nightly toolchain

From 5e8b8e3150d32304796ee932e6501aa9824f67f9 Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Sat, 20 Sep 2025 13:40:51 +0900
Subject: [PATCH 07/11] **Unit Tests Added** - `crates/lean_vm/src/runner.rs`:
 Added two comprehensive tests (`vm_precompile_events_capture_expected_data`,
 `vm_precompile_events_only_final_pass`) covering event contents,
 pointer/address assertions, value integrity for all precompile types, and
 ensuring only the final execution populates event vectors. -
 `crates/lean_prover/witness_generation/src/execution_trace.rs`: Added
 end-to-end test (`execution_trace_uses_vm_events`) asserting witness
 construction directly from VM events, including poseidon padding invariants,
 dot-product/multilinear data, and evaluation outputs; auxiliary guard skips
 the sentinel `ending_pc` to avoid panics when iterating PCs.

**Build/Test Adjustments**
- Ensured helper data uses disjoint memory regions and power-of-two frame growth so tests run deterministically.
- `get_execution_trace` now skips the final `pc == ending_pc` entry to prevent out-of-bounds access during witness generation.

**Tests Run**
- `cargo test -p lean_vm -q`
- `cargo test -p witness_generation -q`

(`cargo test --workspace` still aborts on existing `test_zkvm` stack overflow, so I validated the new coverage with targeted crate runs.)
---
 .../witness_generation/src/execution_trace.rs | 368 +++++++++++++++++
 crates/lean_vm/src/runner.rs                  | 372 +++++++++++++++++-
 2 files changed, 739 insertions(+), 1 deletion(-)

diff --git a/crates/lean_prover/witness_generation/src/execution_trace.rs b/crates/lean_prover/witness_generation/src/execution_trace.rs
index cdabc2fb..099d6aec 100644
--- a/crates/lean_prover/witness_generation/src/execution_trace.rs
+++ b/crates/lean_prover/witness_generation/src/execution_trace.rs
@@ -117,6 +117,9 @@ pub fn get_execution_trace(
         .zip(&execution_result.fps)
         .enumerate()
     {
+        if pc == bytecode.ending_pc {
+            continue;
+        }
         let instruction = &bytecode.instructions[pc];
         let field_repr = field_representation(instruction);
 
@@ -263,3 +266,368 @@ pub fn get_execution_trace(
         memory,
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use p3_field::{BasedVectorSpace, PrimeCharacteristicRing, dot_product};
+    use p3_util::log2_ceil_usize;
+    use std::collections::BTreeMap;
+    use utils::ToUsize;
+
+    const POSEIDON16_ARG_A_PTR: usize = 6;
+    const POSEIDON16_ARG_B_PTR: usize = 7;
+    const POSEIDON24_ARG_A_PTR: usize = 11;
+    const POSEIDON24_ARG_B_PTR: usize = 13;
+    const DOT_ARG0_PTR: usize = 180;
+    const DOT_ARG1_PTR: usize = 200;
+    const MLE_COEFF_PTR: usize = 32;
+    const MLE_POINT_PTR: usize = 15;
+
+    const POSEIDON16_RES_OFFSET: usize = 0;
+    const POSEIDON24_RES_OFFSET: usize = 1;
+    const DOT_RES_OFFSET: usize = 2;
+    const MLE_RES_OFFSET: usize = 3;
+
+    const DOT_PRODUCT_LEN: usize = 2;
+    const MLE_N_VARS: usize = 1;
+
+    const MAX_MEMORY_INDEX: usize = DOT_ARG1_PTR + DOT_PRODUCT_LEN * DIMENSION - 1;
+    const PUBLIC_INPUT_LEN: usize = MAX_MEMORY_INDEX - PUBLIC_INPUT_START + 1;
+
+    const POSEIDON16_ARG_A_VALUES: [u64; VECTOR_LEN] = [1, 2, 3, 4, 5, 6, 7, 8];
+    const POSEIDON16_ARG_B_VALUES: [u64; VECTOR_LEN] = [101, 102, 103, 104, 105, 106, 107, 108];
+    const POSEIDON24_ARG_A_VALUES: [[u64; VECTOR_LEN]; 2] = [
+        [201, 202, 203, 204, 205, 206, 207, 208],
+        [211, 212, 213, 214, 215, 216, 217, 218],
+    ];
+    const POSEIDON24_ARG_B_VALUES: [u64; VECTOR_LEN] = [221, 222, 223, 224, 225, 226, 227, 228];
+    const DOT_ARG0_VALUES: [[u64; DIMENSION]; DOT_PRODUCT_LEN] =
+        [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]];
+    const DOT_ARG1_VALUES: [[u64; DIMENSION]; DOT_PRODUCT_LEN] =
+        [[11, 12, 13, 14, 15], [16, 17, 18, 19, 20]];
+    const MLE_COEFF_VALUES: [u64; 1 << MLE_N_VARS] = [7, 9];
+    const MLE_POINT_VALUES: [u64; DIMENSION] = [21, 22, 23, 24, 25];
+
+    fn f(value: u64) -> F {
+        F::from_isize(value as isize)
+    }
+
+    fn set_public_input_cell(public_input: &mut [F], memory_index: usize, value: F) {
+        assert!(memory_index >= PUBLIC_INPUT_START);
+        let idx = memory_index - PUBLIC_INPUT_START;
+        assert!(idx < public_input.len());
+        public_input[idx] = value;
+    }
+
+    fn set_vector(public_input: &mut [F], ptr: usize, values: &[u64]) {
+        assert_eq!(values.len(), VECTOR_LEN);
+        for (i, &value) in values.iter().enumerate() {
+            set_public_input_cell(public_input, ptr * VECTOR_LEN + i, f(value));
+        }
+    }
+
+    fn set_multivector(public_input: &mut [F], ptr: usize, chunks: &[&[u64]]) {
+        for (chunk_idx, chunk) in chunks.iter().enumerate() {
+            assert_eq!(chunk.len(), VECTOR_LEN);
+            for (i, &value) in chunk.iter().enumerate() {
+                set_public_input_cell(public_input, (ptr + chunk_idx) * VECTOR_LEN + i, f(value));
+            }
+        }
+    }
+
+    fn set_ef_slice(public_input: &mut [F], ptr: usize, elements: &[[u64; DIMENSION]]) {
+        for (i, coeffs) in elements.iter().enumerate() {
+            for (j, &value) in coeffs.iter().enumerate() {
+                set_public_input_cell(public_input, ptr + i * DIMENSION + j, f(value));
+            }
+        }
+    }
+
+    fn set_base_slice(public_input: &mut [F], start_index: usize, values: &[u64]) {
+        for (i, &value) in values.iter().enumerate() {
+            set_public_input_cell(public_input, start_index + i, f(value));
+        }
+    }
+
+    fn build_test_case() -> (Bytecode, Vec<F>) {
+        let mut public_input = vec![F::ZERO; PUBLIC_INPUT_LEN];
+
+        set_vector(
+            &mut public_input,
+            POSEIDON16_ARG_A_PTR,
+            &POSEIDON16_ARG_A_VALUES,
+        );
+        set_vector(
+            &mut public_input,
+            POSEIDON16_ARG_B_PTR,
+            &POSEIDON16_ARG_B_VALUES,
+        );
+
+        let poseidon24_chunks = [
+            &POSEIDON24_ARG_A_VALUES[0][..],
+            &POSEIDON24_ARG_A_VALUES[1][..],
+        ];
+        set_multivector(&mut public_input, POSEIDON24_ARG_A_PTR, &poseidon24_chunks);
+        set_vector(
+            &mut public_input,
+            POSEIDON24_ARG_B_PTR,
+            &POSEIDON24_ARG_B_VALUES,
+        );
+
+        set_ef_slice(&mut public_input, DOT_ARG0_PTR, &DOT_ARG0_VALUES);
+        set_ef_slice(&mut public_input, DOT_ARG1_PTR, &DOT_ARG1_VALUES);
+
+        let coeff_base = MLE_COEFF_PTR << MLE_N_VARS;
+        set_base_slice(&mut public_input, coeff_base, &MLE_COEFF_VALUES);
+
+        let log_point_size = log2_ceil_usize(MLE_N_VARS * DIMENSION);
+        let point_base = MLE_POINT_PTR << log_point_size;
+        set_base_slice(&mut public_input, point_base, &MLE_POINT_VALUES);
+
+        let mut hints = BTreeMap::new();
+        hints.insert(
+            0,
+            vec![Hint::RequestMemory {
+                offset: POSEIDON16_RES_OFFSET,
+                size: MemOrConstant::Constant(f(2)),
+                vectorized: true,
+                vectorized_len: LOG_VECTOR_LEN + 1,
+            }],
+        );
+        hints.insert(
+            1,
+            vec![Hint::RequestMemory {
+                offset: POSEIDON24_RES_OFFSET,
+                size: MemOrConstant::Constant(f(1)),
+                vectorized: true,
+                vectorized_len: LOG_VECTOR_LEN,
+            }],
+        );
+        hints.insert(
+            2,
+            vec![Hint::RequestMemory {
+                offset: DOT_RES_OFFSET,
+                size: MemOrConstant::Constant(f(1)),
+                vectorized: false,
+                vectorized_len: 0,
+            }],
+        );
+        hints.insert(
+            3,
+            vec![Hint::RequestMemory {
+                offset: MLE_RES_OFFSET,
+                size: MemOrConstant::Constant(f(1)),
+                vectorized: true,
+                vectorized_len: LOG_VECTOR_LEN,
+            }],
+        );
+
+        let instructions = vec![
+            Instruction::Poseidon2_16 {
+                arg_a: MemOrConstant::Constant(f(POSEIDON16_ARG_A_PTR as u64)),
+                arg_b: MemOrConstant::Constant(f(POSEIDON16_ARG_B_PTR as u64)),
+                res: MemOrFp::MemoryAfterFp {
+                    offset: POSEIDON16_RES_OFFSET,
+                },
+            },
+            Instruction::Poseidon2_24 {
+                arg_a: MemOrConstant::Constant(f(POSEIDON24_ARG_A_PTR as u64)),
+                arg_b: MemOrConstant::Constant(f(POSEIDON24_ARG_B_PTR as u64)),
+                res: MemOrFp::MemoryAfterFp {
+                    offset: POSEIDON24_RES_OFFSET,
+                },
+            },
+            Instruction::DotProductExtensionExtension {
+                arg0: MemOrConstant::Constant(f(DOT_ARG0_PTR as u64)),
+                arg1: MemOrConstant::Constant(f(DOT_ARG1_PTR as u64)),
+                res: MemOrFp::MemoryAfterFp {
+                    offset: DOT_RES_OFFSET,
+                },
+                size: DOT_PRODUCT_LEN,
+            },
+            Instruction::MultilinearEval {
+                coeffs: MemOrConstant::Constant(f(MLE_COEFF_PTR as u64)),
+                point: MemOrConstant::Constant(f(MLE_POINT_PTR as u64)),
+                res: MemOrFp::MemoryAfterFp {
+                    offset: MLE_RES_OFFSET,
+                },
+                n_vars: MLE_N_VARS,
+            },
+        ];
+
+        let bytecode = Bytecode {
+            instructions,
+            hints,
+            starting_frame_memory: 512,
+            ending_pc: 4,
+        };
+
+        (bytecode, public_input)
+    }
+
+    fn embed_base_into_extension(value: F) -> EF {
+        let mut coeffs = [F::ZERO; DIMENSION];
+        coeffs[0] = value;
+        EF::from_basis_coefficients_slice(&coeffs).unwrap()
+    }
+
+    #[test]
+    fn execution_trace_uses_vm_events() {
+        let (bytecode, public_input) = build_test_case();
+        let execution_result =
+            execute_bytecode(&bytecode, &public_input, &[], "", &BTreeMap::new(), false);
+
+        let trace = get_execution_trace(&bytecode, &execution_result);
+
+        assert_eq!(execution_result.vm_poseidon16_events.len(), 1);
+        assert_eq!(execution_result.vm_poseidon24_events.len(), 1);
+        assert_eq!(execution_result.vm_dot_product_events.len(), 1);
+        assert_eq!(execution_result.vm_multilinear_eval_events.len(), 1);
+
+        assert_eq!(trace.n_poseidons_16, 1);
+        assert_eq!(trace.poseidons_16.len(), 1);
+        assert_eq!(trace.n_poseidons_24, 1);
+        assert_eq!(trace.poseidons_24.len(), 1);
+        assert_eq!(trace.dot_products.len(), 1);
+        assert_eq!(trace.vm_multilinear_evals.len(), 1);
+
+        let poseidon16_event = &execution_result.vm_poseidon16_events[0];
+        let witness16 = &trace.poseidons_16[0];
+        assert_eq!(witness16.cycle, Some(poseidon16_event.cycle));
+        assert_eq!(witness16.addr_input_a, poseidon16_event.addr_input_a);
+        assert_eq!(witness16.addr_input_b, poseidon16_event.addr_input_b);
+        assert_eq!(witness16.addr_output, poseidon16_event.addr_output);
+        assert_eq!(witness16.input, poseidon16_event.input);
+        assert_eq!(witness16.output, poseidon16_event.output);
+
+        let mut expected_poseidon16_input = [F::ZERO; 16];
+        expected_poseidon16_input[..VECTOR_LEN].copy_from_slice(&POSEIDON16_ARG_A_VALUES.map(f));
+        expected_poseidon16_input[VECTOR_LEN..].copy_from_slice(&POSEIDON16_ARG_B_VALUES.map(f));
+        assert_eq!(witness16.input, expected_poseidon16_input);
+
+        let poseidon16_perm = get_poseidon16().permute(poseidon16_event.input);
+        assert_eq!(witness16.output, poseidon16_perm);
+
+        let poseidon24_event = &execution_result.vm_poseidon24_events[0];
+        let witness24 = &trace.poseidons_24[0];
+        assert_eq!(witness24.cycle, Some(poseidon24_event.cycle));
+        assert_eq!(witness24.addr_input_a, poseidon24_event.addr_input_a);
+        assert_eq!(witness24.addr_input_b, poseidon24_event.addr_input_b);
+        assert_eq!(witness24.addr_output, poseidon24_event.addr_output);
+        assert_eq!(witness24.input, poseidon24_event.input);
+        assert_eq!(witness24.output, poseidon24_event.output);
+
+        let mut expected_poseidon24_input = [F::ZERO; 24];
+        expected_poseidon24_input[..VECTOR_LEN].copy_from_slice(&POSEIDON24_ARG_A_VALUES[0].map(f));
+        expected_poseidon24_input[VECTOR_LEN..2 * VECTOR_LEN]
+            .copy_from_slice(&POSEIDON24_ARG_A_VALUES[1].map(f));
+        expected_poseidon24_input[2 * VECTOR_LEN..]
+            .copy_from_slice(&POSEIDON24_ARG_B_VALUES.map(f));
+        assert_eq!(witness24.input, expected_poseidon24_input);
+
+        let mut poseidon24_input = poseidon24_event.input;
+        get_poseidon24().permute_mut(&mut poseidon24_input);
+        let expected_poseidon24: [F; VECTOR_LEN] =
+            poseidon24_input[2 * VECTOR_LEN..].try_into().unwrap();
+        assert_eq!(witness24.output, expected_poseidon24);
+
+        let dot_event = &execution_result.vm_dot_product_events[0];
+        let witness_dot = &trace.dot_products[0];
+        assert_eq!(witness_dot.cycle, dot_event.cycle);
+        assert_eq!(witness_dot.addr_0, dot_event.addr_0);
+        assert_eq!(witness_dot.addr_1, dot_event.addr_1);
+        assert_eq!(witness_dot.addr_res, dot_event.addr_res);
+        assert_eq!(witness_dot.len, dot_event.len);
+        assert_eq!(witness_dot.slice_0, dot_event.slice_0);
+        assert_eq!(witness_dot.slice_1, dot_event.slice_1);
+        assert_eq!(witness_dot.res, dot_event.res);
+
+        let expected_dot_slice_0 = DOT_ARG0_VALUES
+            .iter()
+            .map(|coeffs| coeffs.map(f))
+            .map(|coeffs| EF::from_basis_coefficients_slice(&coeffs).unwrap())
+            .collect::<Vec<_>>();
+        let expected_dot_slice_1 = DOT_ARG1_VALUES
+            .iter()
+            .map(|coeffs| coeffs.map(f))
+            .map(|coeffs| EF::from_basis_coefficients_slice(&coeffs).unwrap())
+            .collect::<Vec<_>>();
+        assert_eq!(witness_dot.slice_0, expected_dot_slice_0);
+        assert_eq!(witness_dot.slice_1, expected_dot_slice_1);
+
+        let expected_dot = dot_product::<EF, _, _>(
+            witness_dot.slice_0.iter().copied(),
+            witness_dot.slice_1.iter().copied(),
+        );
+        assert_eq!(witness_dot.res, expected_dot);
+
+        let mle_event = &execution_result.vm_multilinear_eval_events[0];
+        let witness_mle = &trace.vm_multilinear_evals[0];
+        assert_eq!(witness_mle.cycle, mle_event.cycle);
+        assert_eq!(witness_mle.addr_coeffs, mle_event.addr_coeffs);
+        assert_eq!(witness_mle.addr_point, mle_event.addr_point);
+        assert_eq!(witness_mle.addr_res, mle_event.addr_res);
+        assert_eq!(witness_mle.n_vars, mle_event.n_vars);
+        assert_eq!(witness_mle.point, mle_event.point);
+        assert_eq!(witness_mle.res, mle_event.res);
+
+        let expected_point =
+            vec![EF::from_basis_coefficients_slice(&MLE_POINT_VALUES.map(f)).unwrap()];
+        assert_eq!(witness_mle.point, expected_point);
+
+        let coeff_slice = execution_result
+            .memory
+            .slice(
+                mle_event.addr_coeffs << mle_event.n_vars,
+                1 << mle_event.n_vars,
+            )
+            .unwrap();
+        let point = witness_mle.point[0];
+        let c0 = embed_base_into_extension(coeff_slice[0]);
+        let c1 = embed_base_into_extension(coeff_slice[1]);
+        let expected_mle = c0 * (EF::ONE - point) + c1 * point;
+        assert_eq!(witness_mle.res, expected_mle);
+
+        assert_eq!(
+            trace.poseidons_16.len(),
+            trace.n_poseidons_16.next_power_of_two()
+        );
+        assert_eq!(
+            trace.poseidons_24.len(),
+            trace.n_poseidons_24.next_power_of_two()
+        );
+        assert_eq!(
+            trace.public_memory_size,
+            execution_result.public_memory_size
+        );
+
+        let poseidon16_res_ptr = execution_result
+            .memory
+            .get(execution_result.fps[poseidon16_event.cycle] + POSEIDON16_RES_OFFSET)
+            .unwrap()
+            .to_usize();
+        assert_eq!(witness16.addr_output, poseidon16_res_ptr);
+
+        let poseidon24_res_ptr = execution_result
+            .memory
+            .get(execution_result.fps[poseidon24_event.cycle] + POSEIDON24_RES_OFFSET)
+            .unwrap()
+            .to_usize();
+        assert_eq!(witness24.addr_output, poseidon24_res_ptr);
+
+        let dot_res_ptr = execution_result
+            .memory
+            .get(execution_result.fps[dot_event.cycle] + DOT_RES_OFFSET)
+            .unwrap()
+            .to_usize();
+        assert_eq!(witness_dot.addr_res, dot_res_ptr);
+
+        let mle_res_ptr = execution_result
+            .memory
+            .get(execution_result.fps[mle_event.cycle] + MLE_RES_OFFSET)
+            .unwrap()
+            .to_usize();
+        assert_eq!(witness_mle.addr_res, mle_res_ptr);
+    }
+}
diff --git a/crates/lean_vm/src/runner.rs b/crates/lean_vm/src/runner.rs
index 3d8765b5..40ae47da 100644
--- a/crates/lean_vm/src/runner.rs
+++ b/crates/lean_vm/src/runner.rs
@@ -572,7 +572,7 @@ fn execute_bytecode_helper(
                     .map(|chunk| EF::from_basis_coefficients_slice(chunk).unwrap())
                     .collect::<Vec<_>>();
 
-                let eval = slice_coeffs.evaluate(&MultilinearPoint(point));
+                let eval = slice_coeffs.evaluate(&MultilinearPoint(point.clone()));
                 let mut res_vec = eval.as_basis_coefficients_slice().to_vec();
                 res_vec.resize(VECTOR_LEN, F::ZERO);
                 memory.set_vector(ptr_res, res_vec.try_into().unwrap())?;
@@ -704,3 +704,373 @@ fn execute_bytecode_helper(
         vm_multilinear_eval_events,
     })
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use p3_field::BasedVectorSpace;
+    use p3_util::log2_ceil_usize;
+    use std::collections::BTreeMap;
+    use utils::ToUsize;
+
+    // Pointers for precompile inputs allocated in public memory.
+    const POSEIDON16_ARG_A_PTR: usize = 6;
+    const POSEIDON16_ARG_B_PTR: usize = 7;
+    const POSEIDON24_ARG_A_PTR: usize = 11; // uses ptr and ptr + 1
+    const POSEIDON24_ARG_B_PTR: usize = 13;
+    const DOT_ARG0_PTR: usize = 180; // normal pointer, len 2
+    const DOT_ARG1_PTR: usize = 200; // normal pointer, len 2
+    const MLE_COEFF_PTR: usize = 32; // interpreted with shift << n_vars
+    const MLE_POINT_PTR: usize = 15; // interpreted with shift << log_point_size
+
+    // Offsets used in hints for storing result pointers at fp + offset.
+    const POSEIDON16_RES_OFFSET: usize = 0;
+    const POSEIDON24_RES_OFFSET: usize = 1;
+    const DOT_RES_OFFSET: usize = 2;
+    const MLE_RES_OFFSET: usize = 3;
+
+    const DOT_PRODUCT_LEN: usize = 2;
+    const MLE_N_VARS: usize = 1;
+
+    // Ensure public input covers the highest index used (dot product arg1 slice).
+    const MAX_MEMORY_INDEX: usize = DOT_ARG1_PTR + DOT_PRODUCT_LEN * DIMENSION - 1;
+    const PUBLIC_INPUT_LEN: usize = MAX_MEMORY_INDEX - PUBLIC_INPUT_START + 1;
+
+    const POSEIDON16_ARG_A_VALUES: [u64; VECTOR_LEN] = [1, 2, 3, 4, 5, 6, 7, 8];
+    const POSEIDON16_ARG_B_VALUES: [u64; VECTOR_LEN] = [101, 102, 103, 104, 105, 106, 107, 108];
+    const POSEIDON24_ARG_A_VALUES: [[u64; VECTOR_LEN]; 2] = [
+        [201, 202, 203, 204, 205, 206, 207, 208],
+        [211, 212, 213, 214, 215, 216, 217, 218],
+    ];
+    const POSEIDON24_ARG_B_VALUES: [u64; VECTOR_LEN] = [221, 222, 223, 224, 225, 226, 227, 228];
+    const DOT_ARG0_VALUES: [[u64; DIMENSION]; DOT_PRODUCT_LEN] =
+        [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]];
+    const DOT_ARG1_VALUES: [[u64; DIMENSION]; DOT_PRODUCT_LEN] =
+        [[11, 12, 13, 14, 15], [16, 17, 18, 19, 20]];
+    const MLE_COEFF_VALUES: [u64; 1 << MLE_N_VARS] = [7, 9];
+    const MLE_POINT_VALUES: [u64; DIMENSION] = [21, 22, 23, 24, 25];
+
+    fn f(value: u64) -> F {
+        F::from_isize(value as isize)
+    }
+
+    fn set_public_input_cell(public_input: &mut [F], memory_index: usize, value: F) {
+        assert!(memory_index >= PUBLIC_INPUT_START);
+        let idx = memory_index - PUBLIC_INPUT_START;
+        assert!(idx < public_input.len());
+        public_input[idx] = value;
+    }
+
+    fn set_vector(public_input: &mut [F], ptr: usize, values: &[u64]) {
+        assert_eq!(values.len(), VECTOR_LEN);
+        for (i, &value) in values.iter().enumerate() {
+            set_public_input_cell(public_input, ptr * VECTOR_LEN + i, f(value));
+        }
+    }
+
+    fn set_multivector(public_input: &mut [F], ptr: usize, chunks: &[&[u64]]) {
+        for (chunk_index, chunk) in chunks.iter().enumerate() {
+            assert_eq!(chunk.len(), VECTOR_LEN);
+            for (i, &value) in chunk.iter().enumerate() {
+                set_public_input_cell(public_input, (ptr + chunk_index) * VECTOR_LEN + i, f(value));
+            }
+        }
+    }
+
+    fn set_ef_slice(public_input: &mut [F], ptr: usize, elements: &[[u64; DIMENSION]]) {
+        for (i, coeffs) in elements.iter().enumerate() {
+            for (j, &value) in coeffs.iter().enumerate() {
+                set_public_input_cell(public_input, ptr + i * DIMENSION + j, f(value));
+            }
+        }
+    }
+
+    fn set_base_slice(public_input: &mut [F], start_index: usize, values: &[u64]) {
+        for (i, &value) in values.iter().enumerate() {
+            set_public_input_cell(public_input, start_index + i, f(value));
+        }
+    }
+
+    fn build_test_case() -> (Bytecode, Vec<F>) {
+        let mut public_input = vec![F::ZERO; PUBLIC_INPUT_LEN];
+
+        set_vector(
+            &mut public_input,
+            POSEIDON16_ARG_A_PTR,
+            &POSEIDON16_ARG_A_VALUES,
+        );
+        set_vector(
+            &mut public_input,
+            POSEIDON16_ARG_B_PTR,
+            &POSEIDON16_ARG_B_VALUES,
+        );
+
+        let poseidon24_chunks = [
+            &POSEIDON24_ARG_A_VALUES[0][..],
+            &POSEIDON24_ARG_A_VALUES[1][..],
+        ];
+        set_multivector(&mut public_input, POSEIDON24_ARG_A_PTR, &poseidon24_chunks);
+        set_vector(
+            &mut public_input,
+            POSEIDON24_ARG_B_PTR,
+            &POSEIDON24_ARG_B_VALUES,
+        );
+
+        set_ef_slice(&mut public_input, DOT_ARG0_PTR, &DOT_ARG0_VALUES);
+        set_ef_slice(&mut public_input, DOT_ARG1_PTR, &DOT_ARG1_VALUES);
+
+        let coeff_base = MLE_COEFF_PTR << MLE_N_VARS;
+        set_base_slice(&mut public_input, coeff_base, &MLE_COEFF_VALUES);
+
+        let log_point_size = log2_ceil_usize(MLE_N_VARS * DIMENSION);
+        let point_base = MLE_POINT_PTR << log_point_size;
+        set_base_slice(&mut public_input, point_base, &MLE_POINT_VALUES);
+
+        let mut hints = BTreeMap::new();
+        hints.insert(
+            0,
+            vec![Hint::RequestMemory {
+                offset: POSEIDON16_RES_OFFSET,
+                size: MemOrConstant::Constant(f(2)),
+                vectorized: true,
+                vectorized_len: LOG_VECTOR_LEN + 1,
+            }],
+        );
+        hints.insert(
+            1,
+            vec![Hint::RequestMemory {
+                offset: POSEIDON24_RES_OFFSET,
+                size: MemOrConstant::Constant(f(1)),
+                vectorized: true,
+                vectorized_len: LOG_VECTOR_LEN,
+            }],
+        );
+        hints.insert(
+            2,
+            vec![Hint::RequestMemory {
+                offset: DOT_RES_OFFSET,
+                size: MemOrConstant::Constant(f(1)),
+                vectorized: false,
+                vectorized_len: 0,
+            }],
+        );
+        hints.insert(
+            3,
+            vec![Hint::RequestMemory {
+                offset: MLE_RES_OFFSET,
+                size: MemOrConstant::Constant(f(1)),
+                vectorized: true,
+                vectorized_len: LOG_VECTOR_LEN,
+            }],
+        );
+
+        let instructions = vec![
+            Instruction::Poseidon2_16 {
+                arg_a: MemOrConstant::Constant(f(POSEIDON16_ARG_A_PTR as u64)),
+                arg_b: MemOrConstant::Constant(f(POSEIDON16_ARG_B_PTR as u64)),
+                res: MemOrFp::MemoryAfterFp {
+                    offset: POSEIDON16_RES_OFFSET,
+                },
+            },
+            Instruction::Poseidon2_24 {
+                arg_a: MemOrConstant::Constant(f(POSEIDON24_ARG_A_PTR as u64)),
+                arg_b: MemOrConstant::Constant(f(POSEIDON24_ARG_B_PTR as u64)),
+                res: MemOrFp::MemoryAfterFp {
+                    offset: POSEIDON24_RES_OFFSET,
+                },
+            },
+            Instruction::DotProductExtensionExtension {
+                arg0: MemOrConstant::Constant(f(DOT_ARG0_PTR as u64)),
+                arg1: MemOrConstant::Constant(f(DOT_ARG1_PTR as u64)),
+                res: MemOrFp::MemoryAfterFp {
+                    offset: DOT_RES_OFFSET,
+                },
+                size: DOT_PRODUCT_LEN,
+            },
+            Instruction::MultilinearEval {
+                coeffs: MemOrConstant::Constant(f(MLE_COEFF_PTR as u64)),
+                point: MemOrConstant::Constant(f(MLE_POINT_PTR as u64)),
+                res: MemOrFp::MemoryAfterFp {
+                    offset: MLE_RES_OFFSET,
+                },
+                n_vars: MLE_N_VARS,
+            },
+        ];
+
+        let bytecode = Bytecode {
+            instructions,
+            hints,
+            starting_frame_memory: 512,
+            ending_pc: 4,
+        };
+
+        (bytecode, public_input)
+    }
+
+    fn run_program() -> (Bytecode, ExecutionResult) {
+        let (bytecode, public_input) = build_test_case();
+        let result = execute_bytecode(&bytecode, &public_input, &[], "", &BTreeMap::new(), false);
+        (bytecode, result)
+    }
+
+    #[test]
+    fn vm_precompile_events_capture_expected_data() {
+        let (_bytecode, execution_result) = run_program();
+
+        assert_eq!(execution_result.vm_poseidon16_events.len(), 1);
+        assert_eq!(execution_result.vm_poseidon24_events.len(), 1);
+        assert_eq!(execution_result.vm_dot_product_events.len(), 1);
+        assert_eq!(execution_result.vm_multilinear_eval_events.len(), 1);
+
+        let poseidon16_event = &execution_result.vm_poseidon16_events[0];
+        assert_eq!(poseidon16_event.cycle, 0);
+        assert_eq!(poseidon16_event.addr_input_a, POSEIDON16_ARG_A_PTR);
+        assert_eq!(poseidon16_event.addr_input_b, POSEIDON16_ARG_B_PTR);
+
+        let poseidon16_res_ptr = execution_result
+            .memory
+            .get(execution_result.fps[poseidon16_event.cycle] + POSEIDON16_RES_OFFSET)
+            .unwrap()
+            .to_usize();
+        assert_eq!(poseidon16_event.addr_output, poseidon16_res_ptr);
+
+        let poseidon16_input_a = POSEIDON16_ARG_A_VALUES.map(f);
+        let poseidon16_input_b = POSEIDON16_ARG_B_VALUES.map(f);
+        let mut expected_poseidon16_input = [F::ZERO; 16];
+        expected_poseidon16_input[..VECTOR_LEN].copy_from_slice(&poseidon16_input_a);
+        expected_poseidon16_input[VECTOR_LEN..].copy_from_slice(&poseidon16_input_b);
+        assert_eq!(poseidon16_event.input, expected_poseidon16_input);
+
+        let poseidon16_output = execution_result
+            .memory
+            .get_vectorized_slice(poseidon16_event.addr_output, 2)
+            .unwrap();
+        assert_eq!(poseidon16_output, poseidon16_event.output.to_vec());
+
+        let poseidon24_event = &execution_result.vm_poseidon24_events[0];
+        assert_eq!(poseidon24_event.cycle, 1);
+        assert_eq!(poseidon24_event.addr_input_a, POSEIDON24_ARG_A_PTR);
+        assert_eq!(poseidon24_event.addr_input_b, POSEIDON24_ARG_B_PTR);
+
+        let poseidon24_res_ptr = execution_result
+            .memory
+            .get(execution_result.fps[poseidon24_event.cycle] + POSEIDON24_RES_OFFSET)
+            .unwrap()
+            .to_usize();
+        assert_eq!(poseidon24_event.addr_output, poseidon24_res_ptr);
+
+        let poseidon24_input_a0 = POSEIDON24_ARG_A_VALUES[0].map(f);
+        let poseidon24_input_a1 = POSEIDON24_ARG_A_VALUES[1].map(f);
+        let poseidon24_input_b = POSEIDON24_ARG_B_VALUES.map(f);
+        let mut expected_poseidon24_input = [F::ZERO; 24];
+        expected_poseidon24_input[..VECTOR_LEN].copy_from_slice(&poseidon24_input_a0);
+        expected_poseidon24_input[VECTOR_LEN..2 * VECTOR_LEN].copy_from_slice(&poseidon24_input_a1);
+        expected_poseidon24_input[2 * VECTOR_LEN..].copy_from_slice(&poseidon24_input_b);
+        assert_eq!(poseidon24_event.input, expected_poseidon24_input);
+
+        let poseidon24_output = execution_result
+            .memory
+            .get_vector(poseidon24_event.addr_output)
+            .unwrap();
+        assert_eq!(poseidon24_output, poseidon24_event.output);
+
+        let dot_event = &execution_result.vm_dot_product_events[0];
+        assert_eq!(dot_event.cycle, 2);
+        assert_eq!(dot_event.addr_0, DOT_ARG0_PTR);
+        assert_eq!(dot_event.addr_1, DOT_ARG1_PTR);
+
+        let dot_res_ptr = execution_result
+            .memory
+            .get(execution_result.fps[dot_event.cycle] + DOT_RES_OFFSET)
+            .unwrap()
+            .to_usize();
+        assert_eq!(dot_event.addr_res, dot_res_ptr);
+
+        let dot_slice_0 = DOT_ARG0_VALUES
+            .iter()
+            .map(|coeffs| coeffs.map(f))
+            .map(|coeffs| EF::from_basis_coefficients_slice(&coeffs).unwrap())
+            .collect::<Vec<_>>();
+        let dot_slice_1 = DOT_ARG1_VALUES
+            .iter()
+            .map(|coeffs| coeffs.map(f))
+            .map(|coeffs| EF::from_basis_coefficients_slice(&coeffs).unwrap())
+            .collect::<Vec<_>>();
+        assert_eq!(dot_event.slice_0, dot_slice_0);
+        assert_eq!(dot_event.slice_1, dot_slice_1);
+
+        let dot_res = execution_result
+            .memory
+            .get_ef_element(dot_event.addr_res)
+            .unwrap();
+        assert_eq!(dot_event.res, dot_res);
+
+        let mle_event = &execution_result.vm_multilinear_eval_events[0];
+        assert_eq!(mle_event.cycle, 3);
+        assert_eq!(mle_event.addr_coeffs, MLE_COEFF_PTR);
+        assert_eq!(mle_event.addr_point, MLE_POINT_PTR);
+
+        let mle_res_ptr = execution_result
+            .memory
+            .get(execution_result.fps[mle_event.cycle] + MLE_RES_OFFSET)
+            .unwrap()
+            .to_usize();
+        assert_eq!(mle_event.addr_res, mle_res_ptr);
+
+        let expected_point =
+            vec![EF::from_basis_coefficients_slice(&MLE_POINT_VALUES.map(f)).unwrap()];
+        assert_eq!(mle_event.point, expected_point);
+
+        let mle_res_vec = execution_result
+            .memory
+            .get_vector(mle_event.addr_res)
+            .unwrap();
+        let mle_res_coeffs: [F; DIMENSION] = mle_res_vec[..DIMENSION].try_into().unwrap();
+        let mle_res = EF::from_basis_coefficients_slice(&mle_res_coeffs).unwrap();
+        assert_eq!(mle_event.res, mle_res);
+    }
+
+    #[test]
+    fn vm_precompile_events_only_final_pass() {
+        let (bytecode, public_input) = build_test_case();
+        let mut std_out = String::new();
+        let mut history = ExecutionHistory::default();
+
+        let first_pass = execute_bytecode_helper(
+            &bytecode,
+            &public_input,
+            &[],
+            MAX_MEMORY_SIZE / 2,
+            false,
+            &mut std_out,
+            &mut history,
+            false,
+            &BTreeMap::new(),
+        )
+        .expect("first execution should succeed");
+
+        assert!(first_pass.vm_poseidon16_events.is_empty());
+        assert!(first_pass.vm_poseidon24_events.is_empty());
+        assert!(first_pass.vm_dot_product_events.is_empty());
+        assert!(first_pass.vm_multilinear_eval_events.is_empty());
+
+        let mut history_final = ExecutionHistory::default();
+        let final_pass = execute_bytecode_helper(
+            &bytecode,
+            &public_input,
+            &[],
+            first_pass.no_vec_runtime_memory,
+            true,
+            &mut String::new(),
+            &mut history_final,
+            false,
+            &BTreeMap::new(),
+        )
+        .expect("final execution should succeed");
+
+        assert_eq!(final_pass.vm_poseidon16_events.len(), 1);
+        assert_eq!(final_pass.vm_poseidon24_events.len(), 1);
+        assert_eq!(final_pass.vm_dot_product_events.len(), 1);
+        assert_eq!(final_pass.vm_multilinear_eval_events.len(), 1);
+    }
+}

From 74f274a94187a08c936a005f4705cf4334466860 Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Mon, 22 Sep 2025 14:35:15 +0900
Subject: [PATCH 08/11] remove related issue

---
 TODO.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/TODO.md b/TODO.md
index a0594b0c..40b19e92 100644
--- a/TODO.md
+++ b/TODO.md
@@ -8,7 +8,6 @@
 - one can "move out" the variable of the eq(.) polynomials out of the sumcheck computation in WHIR (as done in the PIOP)
 - Structured AIR: often no all the columns use both up/down -> only handle the used ones to speed up the PIOP zerocheck
 - use RowMAjorMatrix instead of Vec<Vec> for witness, and avoid any transpositions as suggested by Thomas
-- Fill Precompile tables during bytecode execution
 - Use Univariate Skip to commit to tables with k.2^n rows (k small)
 - avoid field embedding in the initial sumcheck of logup*, when table / values are in base field
 - opti logup* GKR: 
@@ -104,4 +103,4 @@ fn doesnt_work(x) inline -> 1 {
     } // the bug: we do not JUMP here, when inlined
     return 1; // will be compiled to `res = 1`; -> invalid (res = 0 and = 1 at the same time)
 }
-```
\ No newline at end of file
+```

From 0f2466831f9191297847163408ce29e208632fbd Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Mon, 22 Sep 2025 15:14:30 +0900
Subject: [PATCH 09/11] Refactor execution trace structure by encapsulating
 multilinear evaluation details within a new RowMultilinearEval struct. Update
 related tests to reflect this change and ensure correctness of assertions.

---
 .../witness_generation/src/execution_trace.rs | 35 ++++++++++---------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/crates/lean_prover/witness_generation/src/execution_trace.rs b/crates/lean_prover/witness_generation/src/execution_trace.rs
index 8d23308b..6aed1f4d 100644
--- a/crates/lean_prover/witness_generation/src/execution_trace.rs
+++ b/crates/lean_prover/witness_generation/src/execution_trace.rs
@@ -267,12 +267,13 @@ pub fn get_execution_trace(
     for e in &execution_result.vm_multilinear_eval_events {
         vm_multilinear_evals.push(WitnessMultilinearEval {
             cycle: e.cycle,
-            addr_coeffs: e.addr_coeffs,
-            addr_point: e.addr_point,
-            addr_res: e.addr_res,
-            n_vars: e.n_vars,
-            point: e.point.clone(),
-            res: e.res,
+            inner: RowMultilinearEval {
+                addr_coeffs: e.addr_coeffs,
+                addr_point: e.addr_point,
+                addr_res: e.addr_res,
+                point: e.point.clone(),
+                res: e.res,
+            },
         });
     }
 
@@ -616,17 +617,19 @@ mod tests {
 
         let mle_event = &execution_result.vm_multilinear_eval_events[0];
         let witness_mle = &trace.vm_multilinear_evals[0];
+        let witness_mle_row = &witness_mle.inner;
         assert_eq!(witness_mle.cycle, mle_event.cycle);
-        assert_eq!(witness_mle.addr_coeffs, mle_event.addr_coeffs);
-        assert_eq!(witness_mle.addr_point, mle_event.addr_point);
-        assert_eq!(witness_mle.addr_res, mle_event.addr_res);
-        assert_eq!(witness_mle.n_vars, mle_event.n_vars);
-        assert_eq!(witness_mle.point, mle_event.point);
-        assert_eq!(witness_mle.res, mle_event.res);
+        assert_eq!(witness_mle_row.addr_coeffs, mle_event.addr_coeffs);
+        assert_eq!(witness_mle_row.addr_point, mle_event.addr_point);
+        assert_eq!(witness_mle_row.addr_res, mle_event.addr_res);
+        assert_eq!(witness_mle_row.n_vars(), mle_event.n_vars);
+        let witness_point = &witness_mle_row.point;
+        assert_eq!(witness_point, &mle_event.point);
+        assert_eq!(witness_mle_row.res, mle_event.res);
 
         let expected_point =
             vec![EF::from_basis_coefficients_slice(&MLE_POINT_VALUES.map(f)).unwrap()];
-        assert_eq!(witness_mle.point, expected_point);
+        assert_eq!(witness_point, &expected_point);
 
         let coeff_slice = execution_result
             .memory
@@ -635,11 +638,11 @@ mod tests {
                 1 << mle_event.n_vars,
             )
             .unwrap();
-        let point = witness_mle.point[0];
+        let point = witness_point[0];
         let c0 = embed_base_into_extension(coeff_slice[0]);
         let c1 = embed_base_into_extension(coeff_slice[1]);
         let expected_mle = c0 * (EF::ONE - point) + c1 * point;
-        assert_eq!(witness_mle.res, expected_mle);
+        assert_eq!(witness_mle_row.res, expected_mle);
 
         assert_eq!(
             trace.poseidons_16.len(),
@@ -680,6 +683,6 @@ mod tests {
             .get(execution_result.fps[mle_event.cycle] + MLE_RES_OFFSET)
             .unwrap()
             .to_usize();
-        assert_eq!(witness_mle.addr_res, mle_res_ptr);
+        assert_eq!(witness_mle_row.addr_res, mle_res_ptr);
     }
 }

From ce7a9f195c7bb731a3f83d437a0bde19aab73ebf Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Mon, 22 Sep 2025 15:36:58 +0900
Subject: [PATCH 10/11] Update memory size constant in runner to
 MAX_RUNNER_MEMORY_SIZE for improved clarity and consistency.

---
 crates/lean_vm/src/runner.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/lean_vm/src/runner.rs b/crates/lean_vm/src/runner.rs
index 1da14878..ac66a703 100644
--- a/crates/lean_vm/src/runner.rs
+++ b/crates/lean_vm/src/runner.rs
@@ -1051,7 +1051,7 @@ mod tests {
             &bytecode,
             &public_input,
             &[],
-            MAX_MEMORY_SIZE / 2,
+            MAX_RUNNER_MEMORY_SIZE / 2,
             false,
             &mut std_out,
             &mut history,

From 8fab114aadecca04aaccad7b56b9c6e06bd6d444 Mon Sep 17 00:00:00 2001
From: adust09 <shoukitsuda@icloud.com>
Date: Mon, 22 Sep 2025 20:17:09 +0900
Subject: [PATCH 11/11] Add execution trace updates to handle instruction
 representation correctly at ending program counter

---
 .../lean_prover/witness_generation/src/execution_trace.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/crates/lean_prover/witness_generation/src/execution_trace.rs b/crates/lean_prover/witness_generation/src/execution_trace.rs
index 6aed1f4d..3c9d76cd 100644
--- a/crates/lean_prover/witness_generation/src/execution_trace.rs
+++ b/crates/lean_prover/witness_generation/src/execution_trace.rs
@@ -165,6 +165,14 @@ pub fn get_execution_trace(
         .enumerate()
     {
         if pc == bytecode.ending_pc {
+            if pc < bytecode.instructions.len() {
+                let field_repr = field_representation(&bytecode.instructions[pc]);
+                for (j, field) in field_repr.iter().enumerate() {
+                    trace[j][cycle] = *field;
+                }
+            }
+            trace[COL_INDEX_PC][cycle] = F::from_usize(pc);
+            trace[COL_INDEX_FP][cycle] = F::from_usize(fp);
             continue;
         }
         let instruction = &bytecode.instructions[pc];