From 52a4a5c373d08f385b71502f222aa97942aa6372 Mon Sep 17 00:00:00 2001 From: adust09 Date: Mon, 15 Sep 2025 10:18:46 +0900 Subject: [PATCH 01/11] =?UTF-8?q?CI:=20AVX2/AVX=E2=80=91512/NEON=20?= =?UTF-8?q?=E3=81=A7=E3=81=AE=E3=83=93=E3=83=AB=E3=83=89=E3=83=BB=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0=20(vibe-kanban=20?= =?UTF-8?q?4c682469)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit branche name: feat/multi_arch_ci ## 背景 - 本ワークスペースの SIMD 最適化(AVX2/AVX‑512/NEON)コードパスの劣化検知が必要 。 - GitHub ホストランナーは AVX‑512 を保証せず、ARM64 も環境により未提供のため、実行戦略を分離。 - AVX2: x86_64 ランナーでビルド+テストを実行。 - AVX‑512: x86_64 ランナーでは未対応の可能性が高く、ビルドのみで担保。 - NEON: ARM64 ランナー(例: `ubuntu-24.04-arm64` または `self-hosted, Linux, ARM64`)でビルド+テストを実行。 ## スコープ - `.github/workflows/rust.yml` の Build & Test を CPU 機能マトリクス化。 - x86_64/AVX2 は実行テストまで、x86_64/AVX‑512 はビルドのみ、ARM64/NEON は実行テストまで。 - 既存の Clippy/Fmt ジョブは維持。 - すべて nightly、`Swatinem/rust-cache@v2`、`RUST_BACKTRACE=1` を適用。 ## 受け入れ条件 - Push/PR で以下が動作: - x64/AVX2: リリースビルド+テストが成功。 - x64/AVX‑512: リリースビルドが成功(テストはスキップ)。 - ARM64/NEON: リリースビルド+テストが成功(ARM64 ランナーが利用可能な場合)。 - Clippy/Fmt は従来通り、警告・未整形で CI が失敗。 - 2 回目以降の実行でビルドキャッシュが効く。 ## 注意点 - AVX‑512 はホストランナーで未対応の可能性が高く、テスト実行は SIGILL 回避のためスキップ。 - ARM64 ランナーが未提供の場合は NEON 行を一時的にコメントアウト、または `self-hosted` ラベル運用に切替。 - 実行テストは `is_x86_feature_detected!` や `cfg(target_feature)` で条件化し、誤実行を防止。 ## 提案する rust.yml(差し替え案) ```yaml name: Rust CI on: push: branches: [ "main" ] pull_request: branches: [ "main" ] workflow_dispatch: permissions: contents: read concurrency: group: rust-ci-${{ github.ref }} cancel-in-progress: true env: CARGO_TERM_COLOR: always RUST_BACKTRACE: 1 jobs: build-test-matrix: name: Build & Test (${{ matrix.name }}) runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: include: - name: x64-avx2 os: ubuntu-latest rustflags: -C target-cpu=haswell run_tests: true - name: x64-avx512-build os: ubuntu-latest rustflags: -C target-cpu=skylake-avx512 run_tests: false # build only to avoid SIGILL - name: arm64-neon # 利用可能な場合は GitHub ホスト ARM64 ランナーに置換 # 例: ubuntu-24.04-arm64(組織設定依存) # 未提供時は self-hosted を使用: [self-hosted, Linux, ARM64] os: ubuntu-24.04-arm64 rustflags: -C target-feature=+neon run_tests: true steps: - uses: actions/checkout@v4 - name: Install nightly toolchain uses: actions-rust-lang/setup-rust-toolchain@v1 with: toolchain: nightly - name: Cache cargo build uses: Swatinem/rust-cache@v2 - name: Build (release) run: cargo build --workspace --release --verbose env: RUSTFLAGS: ${{ matrix.rustflags }} - name: Test (release) if: ${{ matrix.run_tests == true }} run: cargo test --workspace --release --verbose env: RUSTFLAGS: ${{ matrix.rustflags }} clippy: runs-on: ubuntu-latest name: Clippy steps: - uses: actions/checkout@v4 - name: Install nightly toolchain uses: actions-rust-lang/setup-rust-toolchain@v1 with: toolchain: nightly components: clippy - name: Cache cargo build uses: Swatinem/rust-cache@v2 - name: Clippy (deny warnings) run: cargo clippy --workspace --all-targets --all-features -- -D warning s fmt: runs-on: ubuntu-latest name: Rustfmt steps: - uses: actions/checkout@v4 - name: Install nightly toolchain uses: actions-rust-lang/setup-rust-toolchain@v1 with: toolchain: nightly components: rustfmt - name: Cache cargo build uses: Swatinem/rust-cache@v2 - name: Check formatting run: cargo fmt --all -- --check ``` --- .github/workflows/rust.yml | 91 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 .github/workflows/rust.yml diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml new file mode 100644 index 00000000..94d0a018 --- /dev/null +++ b/.github/workflows/rust.yml @@ -0,0 +1,91 @@ +name: Rust CI + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: rust-ci-${{ github.ref }} + cancel-in-progress: true + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 + +jobs: + build-test-matrix: + name: Build & Test (${{ matrix.name }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - name: x64-avx2 + os: ubuntu-latest + rustflags: -C target-cpu=haswell + run_tests: true + - name: x64-avx512-build + os: ubuntu-latest + rustflags: -C target-cpu=skylake-avx512 + run_tests: false # build only to avoid SIGILL + - name: arm64-neon + # If available, use GitHub-hosted ARM64 runner + # e.g. ubuntu-24.04-arm64 (org setting dependent) + # If unavailable, consider: [self-hosted, Linux, ARM64] + os: ubuntu-24.04-arm64 + rustflags: -C target-feature=+neon + run_tests: true + steps: + - uses: actions/checkout@v4 + - name: Install nightly toolchain + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: nightly + - name: Cache cargo build + uses: Swatinem/rust-cache@v2 + - name: Build (release) + run: cargo build --workspace --release --verbose + env: + RUSTFLAGS: ${{ matrix.rustflags }} + - name: Test (release) + if: ${{ matrix.run_tests == true }} + run: cargo test --workspace --release --verbose + env: + RUSTFLAGS: ${{ matrix.rustflags }} + + clippy: + runs-on: ubuntu-latest + name: Clippy + steps: + - uses: actions/checkout@v4 + - name: Install nightly toolchain + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: nightly + components: clippy + - name: Cache cargo build + uses: Swatinem/rust-cache@v2 + - name: Clippy (deny warnings) + run: cargo clippy --workspace --all-targets --all-features -- -D warnings + + fmt: + runs-on: ubuntu-latest + name: Rustfmt + steps: + - uses: actions/checkout@v4 + - name: Install nightly toolchain + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: nightly + components: rustfmt + - name: Cache cargo build + uses: Swatinem/rust-cache@v2 + - name: Check formatting + run: cargo fmt --all -- --check + From b2200a6cc0939def3320509a2c5ea3e19c117332 Mon Sep 17 00:00:00 2001 From: adust09 Date: Mon, 15 Sep 2025 10:34:40 +0900 Subject: [PATCH 02/11] Implemented VM-side event collection and event-driven witness building. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit What I changed - VM events: Added `VmPoseidon16Event`, `VmPoseidon24Event`, `VmDotProductEvent`, `VmMultilinearEvalEvent` in `crates/vm/src/lib.rs`. Events store cycle, addresses, sizes, and values (inputs/outputs). - ExecutionResult: Extended with event vectors to export collected events to consumers: - `vm_poseidon16_events`, `vm_poseidon24_events`, `vm_dot_product_events`, `vm_multilinear_eval_events` - Runner integration: In `crates/vm/src/runner.rs` - Recorded events during final execution only (`final_execution == true`) right after each precompile executes. - Captured input and output values for reproducibility. - Added debug assertions to ensure call counts match event counts in final execution. - Trace construction: In `crates/zk_vm/zk_vm_trace/src/execution_trace.rs` - Removed the memory rescanning logic for precompile witness reconstruction. - Built `WitnessPoseidon*`, `WitnessDotProduct`, and `WitnessMultilinearEval` directly from the VM events. - Kept Poseidon padding and dummy row logic unchanged. Notes - First pass does not record events; final pass does. Debug asserts guard count consistency. - Poseidon tables retain 2^k padding and dummy rows exactly as before. - No fallback to rescanning remains; witness creation is event-only. Validation - Ran `cargo test`: all tests passed. - Existing trace columns generation remains intact and still uses instruction encodings and memory reads; only precompile witness collection switched to events. Next steps - Do you want me to add unit tests for event correctness (cycle, addresses, lengths, values) and padding invariants? - If you’d like, I can run a release test and a quick timing comparison to check trace build time improvements. --- crates/vm/src/lib.rs | 47 +++++++ crates/vm/src/runner.rs | 92 +++++++++++- .../zk_vm/zk_vm_trace/src/execution_trace.rs | 132 ++++++------------ 3 files changed, 183 insertions(+), 88 deletions(-) diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs index a71ea6f4..f2363d59 100644 --- a/crates/vm/src/lib.rs +++ b/crates/vm/src/lib.rs @@ -23,3 +23,50 @@ pub const ONE_VEC_PTR: usize = 2; // convention (vectorized pointer of size 1, p pub const POSEIDON_16_NULL_HASH_PTR: usize = 3; // convention (vectorized pointer of size 2, = the 16 elements of poseidon_16(0)) pub const POSEIDON_24_NULL_HASH_PTR: usize = 5; // convention (vectorized pointer of size 1, = the last 8 elements of poseidon_24(0)) pub const PUBLIC_INPUT_START: usize = 6 * 8; // normal pointer + +// VM-side events collected during final execution only. +// These events are designed to be self-contained (store values), +// allowing zk_vm_trace to construct Witness* without rescanning memory. + +#[derive(Debug, Clone)] +pub struct VmDotProductEvent { + pub cycle: usize, + pub addr_0: usize, // normal pointer + pub addr_1: usize, // normal pointer + pub addr_res: usize, // normal pointer + pub len: usize, + pub slice_0: Vec, + pub slice_1: Vec, + pub res: EF, +} + +#[derive(Debug, Clone)] +pub struct VmMultilinearEvalEvent { + pub cycle: usize, + pub addr_coeffs: usize, // vectorized pointer, of size 8*2^n_vars + pub addr_point: usize, // vectorized pointer, of size n_vars + pub addr_res: usize, // vectorized pointer + pub n_vars: usize, + pub point: Vec, + pub res: EF, +} + +#[derive(Debug, Clone)] +pub struct VmPoseidon16Event { + pub cycle: usize, + pub addr_input_a: usize, // vectorized pointer (size 1) + pub addr_input_b: usize, // vectorized pointer (size 1) + pub addr_output: usize, // vectorized pointer (size 2) + pub input: [F; 16], + pub output: [F; 16], +} + +#[derive(Debug, Clone)] +pub struct VmPoseidon24Event { + pub cycle: usize, + pub addr_input_a: usize, // vectorized pointer (size 2) + pub addr_input_b: usize, // vectorized pointer (size 1) + pub addr_output: usize, // vectorized pointer (size 1) + pub input: [F; 24], + pub output: [F; 8], // last 8 elements of the output +} diff --git a/crates/vm/src/runner.rs b/crates/vm/src/runner.rs index 8a51d5d1..c343eab3 100644 --- a/crates/vm/src/runner.rs +++ b/crates/vm/src/runner.rs @@ -146,6 +146,10 @@ pub struct ExecutionResult { pub memory: Memory, pub pcs: Vec, pub fps: Vec, + pub vm_poseidon16_events: Vec, + pub vm_poseidon24_events: Vec, + pub vm_dot_product_events: Vec, + pub vm_multilinear_eval_events: Vec, } pub fn build_public_memory(public_input: &[F]) -> Vec { @@ -221,6 +225,12 @@ fn execute_bytecode_helper( let mut pcs = Vec::new(); let mut fps = Vec::new(); + // Events collected only in final execution + let mut vm_poseidon16_events: Vec = Vec::new(); + let mut vm_poseidon24_events: Vec = Vec::new(); + let mut vm_dot_product_events: Vec = Vec::new(); + let mut vm_multilinear_eval_events: Vec = Vec::new(); + let mut add_counts = 0; let mut mul_counts = 0; let mut deref_counts = 0; @@ -419,6 +429,8 @@ fn execute_bytecode_helper( input[..VECTOR_LEN].copy_from_slice(&arg0); input[VECTOR_LEN..].copy_from_slice(&arg1); + // Keep a copy of the input before permutation for event recording + let input_before = input; poseidon_16.permute_mut(&mut input); let res0: [F; VECTOR_LEN] = input[..VECTOR_LEN].try_into().unwrap(); @@ -427,6 +439,26 @@ fn execute_bytecode_helper( memory.set_vector(res_value.to_usize(), res0)?; memory.set_vector(1 + res_value.to_usize(), res1)?; + if final_execution { + let cycle = pcs.len() - 1; + let addr_input_a = a_value.to_usize(); + let addr_input_b = b_value.to_usize(); + let addr_output = res_value.to_usize(); + // Build output by concatenating the two result vectors we wrote to memory + let output: [F; 16] = [res0.as_slice(), res1.as_slice()] + .concat() + .try_into() + .unwrap(); + vm_poseidon16_events.push(VmPoseidon16Event { + cycle, + addr_input_a, + addr_input_b, + addr_output, + input: input_before, + output, + }); + } + pc += 1; } Instruction::Poseidon2_24 { arg_a, arg_b, res } => { @@ -445,12 +477,29 @@ fn execute_bytecode_helper( input[VECTOR_LEN..2 * VECTOR_LEN].copy_from_slice(&arg1); input[2 * VECTOR_LEN..].copy_from_slice(&arg2); + // Keep a copy of the input before permutation for event recording + let input_before = input; poseidon_24.permute_mut(&mut input); let res: [F; VECTOR_LEN] = input[2 * VECTOR_LEN..].try_into().unwrap(); memory.set_vector(res_value.to_usize(), res)?; + if final_execution { + let cycle = pcs.len() - 1; + let addr_input_a = a_value.to_usize(); + let addr_input_b = b_value.to_usize(); + let addr_output = res_value.to_usize(); + vm_poseidon24_events.push(VmPoseidon24Event { + cycle, + addr_input_a, + addr_input_b, + addr_output, + input: input_before, + output: res, + }); + } + pc += 1; } Instruction::DotProductExtensionExtension { @@ -468,9 +517,26 @@ fn execute_bytecode_helper( let slice_0 = memory.get_continuous_slice_of_ef_elements(ptr_arg_0, *size)?; let slice_1 = memory.get_continuous_slice_of_ef_elements(ptr_arg_1, *size)?; - let dot_product = dot_product::(slice_0.into_iter(), slice_1.into_iter()); + let dot_product = dot_product::( + slice_0.iter().copied(), + slice_1.iter().copied(), + ); memory.set_ef_element(ptr_res, dot_product)?; + if final_execution { + let cycle = pcs.len() - 1; + vm_dot_product_events.push(VmDotProductEvent { + cycle, + addr_0: ptr_arg_0, + addr_1: ptr_arg_1, + addr_res: ptr_res, + len: *size, + slice_0: slice_0.clone(), + slice_1: slice_1.clone(), + res: dot_product, + }); + } + pc += 1; } Instruction::MultilinearEval { @@ -488,9 +554,22 @@ fn execute_bytecode_helper( let slice_coeffs = memory.slice(ptr_coeffs << *n_vars, n_coeffs)?; let point = memory.get_continuous_slice_of_ef_elements(ptr_point, *n_vars)?; - let eval = slice_coeffs.evaluate(&MultilinearPoint(point)); + let eval = slice_coeffs.evaluate(&MultilinearPoint(point.clone())); memory.set_ef_element(ptr_res, eval)?; + if final_execution { + let cycle = pcs.len() - 1; + vm_multilinear_eval_events.push(VmMultilinearEvalEvent { + cycle, + addr_coeffs: ptr_coeffs, + addr_point: ptr_point, + addr_res: ptr_res, + n_vars: *n_vars, + point, + res: eval, + }); + } + pc += 1; } } @@ -501,6 +580,11 @@ fn execute_bytecode_helper( fps.push(fp); if final_execution { + // Ensure event counts match call counts in final execution + debug_assert_eq!(poseidon16_calls, vm_poseidon16_events.len()); + debug_assert_eq!(poseidon24_calls, vm_poseidon24_events.len()); + debug_assert_eq!(dot_product_ext_ext_calls, vm_dot_product_events.len()); + debug_assert_eq!(multilinear_eval_calls, vm_multilinear_eval_events.len()); if profiler { let report = profiling_report(instruction_history, function_locations); println!("\n{report}"); @@ -594,5 +678,9 @@ fn execute_bytecode_helper( memory, pcs, fps, + vm_poseidon16_events, + vm_poseidon24_events, + vm_dot_product_events, + vm_multilinear_eval_events, }) } diff --git a/crates/zk_vm/zk_vm_trace/src/execution_trace.rs b/crates/zk_vm/zk_vm_trace/src/execution_trace.rs index 22bca1ef..6ff05277 100644 --- a/crates/zk_vm/zk_vm_trace/src/execution_trace.rs +++ b/crates/zk_vm/zk_vm_trace/src/execution_trace.rs @@ -134,92 +134,7 @@ pub fn get_execution_trace( trace[COL_INDEX_MEM_ADDRESS_B][cycle] = addr_b; trace[COL_INDEX_MEM_ADDRESS_C][cycle] = addr_c; - match instruction { - Instruction::Poseidon2_16 { arg_a, arg_b, res } => { - let addr_input_a = arg_a.read_value(memory, fp).unwrap().to_usize(); - let addr_input_b = arg_b.read_value(memory, fp).unwrap().to_usize(); - let addr_output = res.read_value(memory, fp).unwrap().to_usize(); - let value_a = memory.get_vector(addr_input_a).unwrap(); - let value_b = memory.get_vector(addr_input_b).unwrap(); - let output = memory.get_vectorized_slice(addr_output, 2).unwrap(); - poseidons_16.push(WitnessPoseidon16 { - cycle: Some(cycle), - addr_input_a, - addr_input_b, - addr_output, - input: [value_a, value_b].concat().try_into().unwrap(), - output: output.try_into().unwrap(), - }); - } - Instruction::Poseidon2_24 { arg_a, arg_b, res } => { - let addr_input_a = arg_a.read_value(memory, fp).unwrap().to_usize(); - let addr_input_b = arg_b.read_value(memory, fp).unwrap().to_usize(); - let addr_output = res.read_value(memory, fp).unwrap().to_usize(); - let value_a = memory.get_vectorized_slice(addr_input_a, 2).unwrap(); - let value_b = memory.get_vector(addr_input_b).unwrap().to_vec(); - let output = memory.get_vector(addr_output).unwrap(); - poseidons_24.push(WitnessPoseidon24 { - cycle: Some(cycle), - addr_input_a, - addr_input_b, - addr_output, - input: [value_a, value_b].concat().try_into().unwrap(), - output, - }); - } - Instruction::DotProductExtensionExtension { - arg0, - arg1, - res, - size, - } => { - let addr_0 = arg0.read_value(memory, fp).unwrap().to_usize(); - let addr_1 = arg1.read_value(memory, fp).unwrap().to_usize(); - let addr_res = res.read_value(memory, fp).unwrap().to_usize(); - let slice_0 = memory - .get_continuous_slice_of_ef_elements(addr_0, *size) - .unwrap(); - let slice_1 = memory - .get_continuous_slice_of_ef_elements(addr_1, *size) - .unwrap(); - let res = memory.get_ef_element(addr_res).unwrap(); - dot_products.push(WitnessDotProduct { - cycle, - addr_0, - addr_1, - addr_res, - len: *size, - slice_0, - slice_1, - res, - }); - } - Instruction::MultilinearEval { - coeffs, - point, - res, - n_vars, - } => { - let addr_coeffs = coeffs.read_value(memory, fp).unwrap().to_usize(); - let addr_point = point.read_value(memory, fp).unwrap().to_usize(); - let addr_res = res.read_value(memory, fp).unwrap().to_usize(); - let point = (0..*n_vars) - .map(|i| memory.get_ef_element(addr_point + i * DIMENSION)) - .collect::, _>>() - .unwrap(); - let res = memory.get_ef_element(addr_res).unwrap(); - vm_multilinear_evals.push(WitnessMultilinearEval { - cycle, - addr_coeffs, - addr_point, - addr_res, - n_vars: *n_vars, - point, - res, - }); - } - _ => {} - } + // Precompile witness collection is now driven by VM events instead of rescanning here. } // repeat the last row to get to a power of two @@ -242,6 +157,51 @@ pub fn get_execution_trace( F::ZERO, ); + // Build witnesses from VM-collected events + for e in &execution_result.vm_poseidon16_events { + poseidons_16.push(WitnessPoseidon16 { + cycle: Some(e.cycle), + addr_input_a: e.addr_input_a, + addr_input_b: e.addr_input_b, + addr_output: e.addr_output, + input: e.input, + output: e.output, + }); + } + for e in &execution_result.vm_poseidon24_events { + poseidons_24.push(WitnessPoseidon24 { + cycle: Some(e.cycle), + addr_input_a: e.addr_input_a, + addr_input_b: e.addr_input_b, + addr_output: e.addr_output, + input: e.input, + output: e.output, + }); + } + for e in &execution_result.vm_dot_product_events { + dot_products.push(WitnessDotProduct { + cycle: e.cycle, + addr_0: e.addr_0, + addr_1: e.addr_1, + addr_res: e.addr_res, + len: e.len, + slice_0: e.slice_0.clone(), + slice_1: e.slice_1.clone(), + res: e.res, + }); + } + for e in &execution_result.vm_multilinear_eval_events { + vm_multilinear_evals.push(WitnessMultilinearEval { + cycle: e.cycle, + addr_coeffs: e.addr_coeffs, + addr_point: e.addr_point, + addr_res: e.addr_res, + n_vars: e.n_vars, + point: e.point.clone(), + res: e.res, + }); + } + let n_poseidons_16 = poseidons_16.len(); let n_poseidons_24 = poseidons_24.len(); From b7bdfe0b59d5ca9b4056c69720728040f03db33d Mon Sep 17 00:00:00 2001 From: adust09 Date: Thu, 18 Sep 2025 10:23:26 +0900 Subject: [PATCH 03/11] Refactor dot product calculation for improved readability --- crates/lean_vm/src/runner.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/lean_vm/src/runner.rs b/crates/lean_vm/src/runner.rs index 42e3d395..3d8765b5 100644 --- a/crates/lean_vm/src/runner.rs +++ b/crates/lean_vm/src/runner.rs @@ -528,10 +528,8 @@ fn execute_bytecode_helper( let slice_0 = memory.get_continuous_slice_of_ef_elements(ptr_arg_0, *size)?; let slice_1 = memory.get_continuous_slice_of_ef_elements(ptr_arg_1, *size)?; - let dot_product = dot_product::( - slice_0.iter().copied(), - slice_1.iter().copied(), - ); + let dot_product = + dot_product::(slice_0.iter().copied(), slice_1.iter().copied()); memory.set_ef_element(ptr_res, dot_product)?; if final_execution { From 6238db777898a39b9fe3ee42ca270a18d273825b Mon Sep 17 00:00:00 2001 From: adust09 Date: Thu, 18 Sep 2025 14:16:25 +0900 Subject: [PATCH 04/11] Remove unused instruction handling from execution trace function --- .../witness_generation/src/execution_trace.rs | 105 +----------------- 1 file changed, 1 insertion(+), 104 deletions(-) diff --git a/crates/lean_prover/witness_generation/src/execution_trace.rs b/crates/lean_prover/witness_generation/src/execution_trace.rs index dd931e6b..cdabc2fb 100644 --- a/crates/lean_prover/witness_generation/src/execution_trace.rs +++ b/crates/lean_prover/witness_generation/src/execution_trace.rs @@ -5,10 +5,9 @@ use crate::{ N_EXEC_COLUMNS, N_INSTRUCTION_COLUMNS, }; use lean_vm::*; +use p3_field::Field; use p3_field::PrimeCharacteristicRing; -use p3_field::{BasedVectorSpace, Field}; use p3_symmetric::Permutation; -use p3_util::log2_ceil_usize; use rayon::prelude::*; use utils::{ToUsize, get_poseidon16, get_poseidon24}; @@ -162,108 +161,6 @@ pub fn get_execution_trace( trace[COL_INDEX_MEM_ADDRESS_A][cycle] = addr_a; trace[COL_INDEX_MEM_ADDRESS_B][cycle] = addr_b; trace[COL_INDEX_MEM_ADDRESS_C][cycle] = addr_c; - - match instruction { - Instruction::Poseidon2_16 { arg_a, arg_b, res } => { - let addr_input_a = arg_a.read_value(memory, fp).unwrap().to_usize(); - let addr_input_b = arg_b.read_value(memory, fp).unwrap().to_usize(); - let addr_output = res.read_value(memory, fp).unwrap().to_usize(); - let value_a = memory.get_vector(addr_input_a).unwrap(); - let value_b = memory.get_vector(addr_input_b).unwrap(); - let output = memory.get_vectorized_slice(addr_output, 2).unwrap(); - poseidons_16.push(WitnessPoseidon16 { - cycle: Some(cycle), - addr_input_a, - addr_input_b, - addr_output, - input: [value_a, value_b].concat().try_into().unwrap(), - output: output.try_into().unwrap(), - }); - } - Instruction::Poseidon2_24 { arg_a, arg_b, res } => { - let addr_input_a = arg_a.read_value(memory, fp).unwrap().to_usize(); - let addr_input_b = arg_b.read_value(memory, fp).unwrap().to_usize(); - let addr_output = res.read_value(memory, fp).unwrap().to_usize(); - let value_a = memory.get_vectorized_slice(addr_input_a, 2).unwrap(); - let value_b = memory.get_vector(addr_input_b).unwrap().to_vec(); - let output = memory.get_vector(addr_output).unwrap(); - poseidons_24.push(WitnessPoseidon24 { - cycle: Some(cycle), - addr_input_a, - addr_input_b, - addr_output, - input: [value_a, value_b].concat().try_into().unwrap(), - output, - }); - } - Instruction::DotProductExtensionExtension { - arg0, - arg1, - res, - size, - } => { - let addr_0 = arg0.read_value(memory, fp).unwrap().to_usize(); - let addr_1 = arg1.read_value(memory, fp).unwrap().to_usize(); - let addr_res = res.read_value(memory, fp).unwrap().to_usize(); - let slice_0 = memory - .get_continuous_slice_of_ef_elements(addr_0, *size) - .unwrap(); - let slice_1 = memory - .get_continuous_slice_of_ef_elements(addr_1, *size) - .unwrap(); - let res = memory.get_ef_element(addr_res).unwrap(); - dot_products.push(WitnessDotProduct { - cycle, - addr_0, - addr_1, - addr_res, - len: *size, - slice_0, - slice_1, - res, - }); - } - Instruction::MultilinearEval { - coeffs, - point, - res, - n_vars, - } => { - let addr_coeffs = coeffs.read_value(memory, fp).unwrap().to_usize(); - let addr_point = point.read_value(memory, fp).unwrap().to_usize(); - let addr_res = res.read_value(memory, fp).unwrap().to_usize(); - - let log_point_size = log2_ceil_usize(*n_vars * DIMENSION); - let point_slice = memory - .slice(addr_point << log_point_size, *n_vars * DIMENSION) - .unwrap(); - for i in *n_vars * DIMENSION..(*n_vars * DIMENSION).next_power_of_two() { - assert!( - memory - .get((addr_point << log_point_size) + i) - .unwrap() - .is_zero() - ); // padding - } - let point = point_slice[..*n_vars * DIMENSION] - .chunks_exact(DIMENSION) - .map(|chunk| EF::from_basis_coefficients_slice(chunk).unwrap()) - .collect::>(); - - let res = memory.get_vector(addr_res).unwrap(); - assert!(res[DIMENSION..].iter().all(|&x| x.is_zero())); - vm_multilinear_evals.push(WitnessMultilinearEval { - cycle, - addr_coeffs, - addr_point, - addr_res, - n_vars: *n_vars, - point, - res: EF::from_basis_coefficients_slice(&res[..DIMENSION]).unwrap(), - }); - } - _ => {} - } } // repeat the last row to get to a power of two From fdcd03fc7fded93723d808e64b6c6272d427e282 Mon Sep 17 00:00:00 2001 From: adust09 Date: Thu, 18 Sep 2025 21:52:22 +0900 Subject: [PATCH 05/11] Add a blank line for improved readability in the Clippy job section of the Rust workflow --- .github/workflows/rust.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index a3d2b0d0..512ce70f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -28,6 +28,7 @@ jobs: cargo-clippy: runs-on: ubuntu-latest name: Clippy + steps: - uses: actions/checkout@v4 - name: Install nightly toolchain From d96e0dc8c2b4d32bffde799bc3fc5afbfe003655 Mon Sep 17 00:00:00 2001 From: adust09 Date: Fri, 19 Sep 2025 01:06:13 +0900 Subject: [PATCH 06/11] Remove unnecessary blank line in the Clippy job section of the Rust workflow --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 512ce70f..ef524661 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -28,7 +28,7 @@ jobs: cargo-clippy: runs-on: ubuntu-latest name: Clippy - + steps: - uses: actions/checkout@v4 - name: Install nightly toolchain From 5e8b8e3150d32304796ee932e6501aa9824f67f9 Mon Sep 17 00:00:00 2001 From: adust09 Date: Sat, 20 Sep 2025 13:40:51 +0900 Subject: [PATCH 07/11] **Unit Tests Added** - `crates/lean_vm/src/runner.rs`: Added two comprehensive tests (`vm_precompile_events_capture_expected_data`, `vm_precompile_events_only_final_pass`) covering event contents, pointer/address assertions, value integrity for all precompile types, and ensuring only the final execution populates event vectors. - `crates/lean_prover/witness_generation/src/execution_trace.rs`: Added end-to-end test (`execution_trace_uses_vm_events`) asserting witness construction directly from VM events, including poseidon padding invariants, dot-product/multilinear data, and evaluation outputs; auxiliary guard skips the sentinel `ending_pc` to avoid panics when iterating PCs. **Build/Test Adjustments** - Ensured helper data uses disjoint memory regions and power-of-two frame growth so tests run deterministically. - `get_execution_trace` now skips the final `pc == ending_pc` entry to prevent out-of-bounds access during witness generation. **Tests Run** - `cargo test -p lean_vm -q` - `cargo test -p witness_generation -q` (`cargo test --workspace` still aborts on existing `test_zkvm` stack overflow, so I validated the new coverage with targeted crate runs.) --- .../witness_generation/src/execution_trace.rs | 368 +++++++++++++++++ crates/lean_vm/src/runner.rs | 372 +++++++++++++++++- 2 files changed, 739 insertions(+), 1 deletion(-) diff --git a/crates/lean_prover/witness_generation/src/execution_trace.rs b/crates/lean_prover/witness_generation/src/execution_trace.rs index cdabc2fb..099d6aec 100644 --- a/crates/lean_prover/witness_generation/src/execution_trace.rs +++ b/crates/lean_prover/witness_generation/src/execution_trace.rs @@ -117,6 +117,9 @@ pub fn get_execution_trace( .zip(&execution_result.fps) .enumerate() { + if pc == bytecode.ending_pc { + continue; + } let instruction = &bytecode.instructions[pc]; let field_repr = field_representation(instruction); @@ -263,3 +266,368 @@ pub fn get_execution_trace( memory, } } + +#[cfg(test)] +mod tests { + use super::*; + use p3_field::{BasedVectorSpace, PrimeCharacteristicRing, dot_product}; + use p3_util::log2_ceil_usize; + use std::collections::BTreeMap; + use utils::ToUsize; + + const POSEIDON16_ARG_A_PTR: usize = 6; + const POSEIDON16_ARG_B_PTR: usize = 7; + const POSEIDON24_ARG_A_PTR: usize = 11; + const POSEIDON24_ARG_B_PTR: usize = 13; + const DOT_ARG0_PTR: usize = 180; + const DOT_ARG1_PTR: usize = 200; + const MLE_COEFF_PTR: usize = 32; + const MLE_POINT_PTR: usize = 15; + + const POSEIDON16_RES_OFFSET: usize = 0; + const POSEIDON24_RES_OFFSET: usize = 1; + const DOT_RES_OFFSET: usize = 2; + const MLE_RES_OFFSET: usize = 3; + + const DOT_PRODUCT_LEN: usize = 2; + const MLE_N_VARS: usize = 1; + + const MAX_MEMORY_INDEX: usize = DOT_ARG1_PTR + DOT_PRODUCT_LEN * DIMENSION - 1; + const PUBLIC_INPUT_LEN: usize = MAX_MEMORY_INDEX - PUBLIC_INPUT_START + 1; + + const POSEIDON16_ARG_A_VALUES: [u64; VECTOR_LEN] = [1, 2, 3, 4, 5, 6, 7, 8]; + const POSEIDON16_ARG_B_VALUES: [u64; VECTOR_LEN] = [101, 102, 103, 104, 105, 106, 107, 108]; + const POSEIDON24_ARG_A_VALUES: [[u64; VECTOR_LEN]; 2] = [ + [201, 202, 203, 204, 205, 206, 207, 208], + [211, 212, 213, 214, 215, 216, 217, 218], + ]; + const POSEIDON24_ARG_B_VALUES: [u64; VECTOR_LEN] = [221, 222, 223, 224, 225, 226, 227, 228]; + const DOT_ARG0_VALUES: [[u64; DIMENSION]; DOT_PRODUCT_LEN] = + [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]; + const DOT_ARG1_VALUES: [[u64; DIMENSION]; DOT_PRODUCT_LEN] = + [[11, 12, 13, 14, 15], [16, 17, 18, 19, 20]]; + const MLE_COEFF_VALUES: [u64; 1 << MLE_N_VARS] = [7, 9]; + const MLE_POINT_VALUES: [u64; DIMENSION] = [21, 22, 23, 24, 25]; + + fn f(value: u64) -> F { + F::from_isize(value as isize) + } + + fn set_public_input_cell(public_input: &mut [F], memory_index: usize, value: F) { + assert!(memory_index >= PUBLIC_INPUT_START); + let idx = memory_index - PUBLIC_INPUT_START; + assert!(idx < public_input.len()); + public_input[idx] = value; + } + + fn set_vector(public_input: &mut [F], ptr: usize, values: &[u64]) { + assert_eq!(values.len(), VECTOR_LEN); + for (i, &value) in values.iter().enumerate() { + set_public_input_cell(public_input, ptr * VECTOR_LEN + i, f(value)); + } + } + + fn set_multivector(public_input: &mut [F], ptr: usize, chunks: &[&[u64]]) { + for (chunk_idx, chunk) in chunks.iter().enumerate() { + assert_eq!(chunk.len(), VECTOR_LEN); + for (i, &value) in chunk.iter().enumerate() { + set_public_input_cell(public_input, (ptr + chunk_idx) * VECTOR_LEN + i, f(value)); + } + } + } + + fn set_ef_slice(public_input: &mut [F], ptr: usize, elements: &[[u64; DIMENSION]]) { + for (i, coeffs) in elements.iter().enumerate() { + for (j, &value) in coeffs.iter().enumerate() { + set_public_input_cell(public_input, ptr + i * DIMENSION + j, f(value)); + } + } + } + + fn set_base_slice(public_input: &mut [F], start_index: usize, values: &[u64]) { + for (i, &value) in values.iter().enumerate() { + set_public_input_cell(public_input, start_index + i, f(value)); + } + } + + fn build_test_case() -> (Bytecode, Vec) { + let mut public_input = vec![F::ZERO; PUBLIC_INPUT_LEN]; + + set_vector( + &mut public_input, + POSEIDON16_ARG_A_PTR, + &POSEIDON16_ARG_A_VALUES, + ); + set_vector( + &mut public_input, + POSEIDON16_ARG_B_PTR, + &POSEIDON16_ARG_B_VALUES, + ); + + let poseidon24_chunks = [ + &POSEIDON24_ARG_A_VALUES[0][..], + &POSEIDON24_ARG_A_VALUES[1][..], + ]; + set_multivector(&mut public_input, POSEIDON24_ARG_A_PTR, &poseidon24_chunks); + set_vector( + &mut public_input, + POSEIDON24_ARG_B_PTR, + &POSEIDON24_ARG_B_VALUES, + ); + + set_ef_slice(&mut public_input, DOT_ARG0_PTR, &DOT_ARG0_VALUES); + set_ef_slice(&mut public_input, DOT_ARG1_PTR, &DOT_ARG1_VALUES); + + let coeff_base = MLE_COEFF_PTR << MLE_N_VARS; + set_base_slice(&mut public_input, coeff_base, &MLE_COEFF_VALUES); + + let log_point_size = log2_ceil_usize(MLE_N_VARS * DIMENSION); + let point_base = MLE_POINT_PTR << log_point_size; + set_base_slice(&mut public_input, point_base, &MLE_POINT_VALUES); + + let mut hints = BTreeMap::new(); + hints.insert( + 0, + vec![Hint::RequestMemory { + offset: POSEIDON16_RES_OFFSET, + size: MemOrConstant::Constant(f(2)), + vectorized: true, + vectorized_len: LOG_VECTOR_LEN + 1, + }], + ); + hints.insert( + 1, + vec![Hint::RequestMemory { + offset: POSEIDON24_RES_OFFSET, + size: MemOrConstant::Constant(f(1)), + vectorized: true, + vectorized_len: LOG_VECTOR_LEN, + }], + ); + hints.insert( + 2, + vec![Hint::RequestMemory { + offset: DOT_RES_OFFSET, + size: MemOrConstant::Constant(f(1)), + vectorized: false, + vectorized_len: 0, + }], + ); + hints.insert( + 3, + vec![Hint::RequestMemory { + offset: MLE_RES_OFFSET, + size: MemOrConstant::Constant(f(1)), + vectorized: true, + vectorized_len: LOG_VECTOR_LEN, + }], + ); + + let instructions = vec![ + Instruction::Poseidon2_16 { + arg_a: MemOrConstant::Constant(f(POSEIDON16_ARG_A_PTR as u64)), + arg_b: MemOrConstant::Constant(f(POSEIDON16_ARG_B_PTR as u64)), + res: MemOrFp::MemoryAfterFp { + offset: POSEIDON16_RES_OFFSET, + }, + }, + Instruction::Poseidon2_24 { + arg_a: MemOrConstant::Constant(f(POSEIDON24_ARG_A_PTR as u64)), + arg_b: MemOrConstant::Constant(f(POSEIDON24_ARG_B_PTR as u64)), + res: MemOrFp::MemoryAfterFp { + offset: POSEIDON24_RES_OFFSET, + }, + }, + Instruction::DotProductExtensionExtension { + arg0: MemOrConstant::Constant(f(DOT_ARG0_PTR as u64)), + arg1: MemOrConstant::Constant(f(DOT_ARG1_PTR as u64)), + res: MemOrFp::MemoryAfterFp { + offset: DOT_RES_OFFSET, + }, + size: DOT_PRODUCT_LEN, + }, + Instruction::MultilinearEval { + coeffs: MemOrConstant::Constant(f(MLE_COEFF_PTR as u64)), + point: MemOrConstant::Constant(f(MLE_POINT_PTR as u64)), + res: MemOrFp::MemoryAfterFp { + offset: MLE_RES_OFFSET, + }, + n_vars: MLE_N_VARS, + }, + ]; + + let bytecode = Bytecode { + instructions, + hints, + starting_frame_memory: 512, + ending_pc: 4, + }; + + (bytecode, public_input) + } + + fn embed_base_into_extension(value: F) -> EF { + let mut coeffs = [F::ZERO; DIMENSION]; + coeffs[0] = value; + EF::from_basis_coefficients_slice(&coeffs).unwrap() + } + + #[test] + fn execution_trace_uses_vm_events() { + let (bytecode, public_input) = build_test_case(); + let execution_result = + execute_bytecode(&bytecode, &public_input, &[], "", &BTreeMap::new(), false); + + let trace = get_execution_trace(&bytecode, &execution_result); + + assert_eq!(execution_result.vm_poseidon16_events.len(), 1); + assert_eq!(execution_result.vm_poseidon24_events.len(), 1); + assert_eq!(execution_result.vm_dot_product_events.len(), 1); + assert_eq!(execution_result.vm_multilinear_eval_events.len(), 1); + + assert_eq!(trace.n_poseidons_16, 1); + assert_eq!(trace.poseidons_16.len(), 1); + assert_eq!(trace.n_poseidons_24, 1); + assert_eq!(trace.poseidons_24.len(), 1); + assert_eq!(trace.dot_products.len(), 1); + assert_eq!(trace.vm_multilinear_evals.len(), 1); + + let poseidon16_event = &execution_result.vm_poseidon16_events[0]; + let witness16 = &trace.poseidons_16[0]; + assert_eq!(witness16.cycle, Some(poseidon16_event.cycle)); + assert_eq!(witness16.addr_input_a, poseidon16_event.addr_input_a); + assert_eq!(witness16.addr_input_b, poseidon16_event.addr_input_b); + assert_eq!(witness16.addr_output, poseidon16_event.addr_output); + assert_eq!(witness16.input, poseidon16_event.input); + assert_eq!(witness16.output, poseidon16_event.output); + + let mut expected_poseidon16_input = [F::ZERO; 16]; + expected_poseidon16_input[..VECTOR_LEN].copy_from_slice(&POSEIDON16_ARG_A_VALUES.map(f)); + expected_poseidon16_input[VECTOR_LEN..].copy_from_slice(&POSEIDON16_ARG_B_VALUES.map(f)); + assert_eq!(witness16.input, expected_poseidon16_input); + + let poseidon16_perm = get_poseidon16().permute(poseidon16_event.input); + assert_eq!(witness16.output, poseidon16_perm); + + let poseidon24_event = &execution_result.vm_poseidon24_events[0]; + let witness24 = &trace.poseidons_24[0]; + assert_eq!(witness24.cycle, Some(poseidon24_event.cycle)); + assert_eq!(witness24.addr_input_a, poseidon24_event.addr_input_a); + assert_eq!(witness24.addr_input_b, poseidon24_event.addr_input_b); + assert_eq!(witness24.addr_output, poseidon24_event.addr_output); + assert_eq!(witness24.input, poseidon24_event.input); + assert_eq!(witness24.output, poseidon24_event.output); + + let mut expected_poseidon24_input = [F::ZERO; 24]; + expected_poseidon24_input[..VECTOR_LEN].copy_from_slice(&POSEIDON24_ARG_A_VALUES[0].map(f)); + expected_poseidon24_input[VECTOR_LEN..2 * VECTOR_LEN] + .copy_from_slice(&POSEIDON24_ARG_A_VALUES[1].map(f)); + expected_poseidon24_input[2 * VECTOR_LEN..] + .copy_from_slice(&POSEIDON24_ARG_B_VALUES.map(f)); + assert_eq!(witness24.input, expected_poseidon24_input); + + let mut poseidon24_input = poseidon24_event.input; + get_poseidon24().permute_mut(&mut poseidon24_input); + let expected_poseidon24: [F; VECTOR_LEN] = + poseidon24_input[2 * VECTOR_LEN..].try_into().unwrap(); + assert_eq!(witness24.output, expected_poseidon24); + + let dot_event = &execution_result.vm_dot_product_events[0]; + let witness_dot = &trace.dot_products[0]; + assert_eq!(witness_dot.cycle, dot_event.cycle); + assert_eq!(witness_dot.addr_0, dot_event.addr_0); + assert_eq!(witness_dot.addr_1, dot_event.addr_1); + assert_eq!(witness_dot.addr_res, dot_event.addr_res); + assert_eq!(witness_dot.len, dot_event.len); + assert_eq!(witness_dot.slice_0, dot_event.slice_0); + assert_eq!(witness_dot.slice_1, dot_event.slice_1); + assert_eq!(witness_dot.res, dot_event.res); + + let expected_dot_slice_0 = DOT_ARG0_VALUES + .iter() + .map(|coeffs| coeffs.map(f)) + .map(|coeffs| EF::from_basis_coefficients_slice(&coeffs).unwrap()) + .collect::>(); + let expected_dot_slice_1 = DOT_ARG1_VALUES + .iter() + .map(|coeffs| coeffs.map(f)) + .map(|coeffs| EF::from_basis_coefficients_slice(&coeffs).unwrap()) + .collect::>(); + assert_eq!(witness_dot.slice_0, expected_dot_slice_0); + assert_eq!(witness_dot.slice_1, expected_dot_slice_1); + + let expected_dot = dot_product::( + witness_dot.slice_0.iter().copied(), + witness_dot.slice_1.iter().copied(), + ); + assert_eq!(witness_dot.res, expected_dot); + + let mle_event = &execution_result.vm_multilinear_eval_events[0]; + let witness_mle = &trace.vm_multilinear_evals[0]; + assert_eq!(witness_mle.cycle, mle_event.cycle); + assert_eq!(witness_mle.addr_coeffs, mle_event.addr_coeffs); + assert_eq!(witness_mle.addr_point, mle_event.addr_point); + assert_eq!(witness_mle.addr_res, mle_event.addr_res); + assert_eq!(witness_mle.n_vars, mle_event.n_vars); + assert_eq!(witness_mle.point, mle_event.point); + assert_eq!(witness_mle.res, mle_event.res); + + let expected_point = + vec![EF::from_basis_coefficients_slice(&MLE_POINT_VALUES.map(f)).unwrap()]; + assert_eq!(witness_mle.point, expected_point); + + let coeff_slice = execution_result + .memory + .slice( + mle_event.addr_coeffs << mle_event.n_vars, + 1 << mle_event.n_vars, + ) + .unwrap(); + let point = witness_mle.point[0]; + let c0 = embed_base_into_extension(coeff_slice[0]); + let c1 = embed_base_into_extension(coeff_slice[1]); + let expected_mle = c0 * (EF::ONE - point) + c1 * point; + assert_eq!(witness_mle.res, expected_mle); + + assert_eq!( + trace.poseidons_16.len(), + trace.n_poseidons_16.next_power_of_two() + ); + assert_eq!( + trace.poseidons_24.len(), + trace.n_poseidons_24.next_power_of_two() + ); + assert_eq!( + trace.public_memory_size, + execution_result.public_memory_size + ); + + let poseidon16_res_ptr = execution_result + .memory + .get(execution_result.fps[poseidon16_event.cycle] + POSEIDON16_RES_OFFSET) + .unwrap() + .to_usize(); + assert_eq!(witness16.addr_output, poseidon16_res_ptr); + + let poseidon24_res_ptr = execution_result + .memory + .get(execution_result.fps[poseidon24_event.cycle] + POSEIDON24_RES_OFFSET) + .unwrap() + .to_usize(); + assert_eq!(witness24.addr_output, poseidon24_res_ptr); + + let dot_res_ptr = execution_result + .memory + .get(execution_result.fps[dot_event.cycle] + DOT_RES_OFFSET) + .unwrap() + .to_usize(); + assert_eq!(witness_dot.addr_res, dot_res_ptr); + + let mle_res_ptr = execution_result + .memory + .get(execution_result.fps[mle_event.cycle] + MLE_RES_OFFSET) + .unwrap() + .to_usize(); + assert_eq!(witness_mle.addr_res, mle_res_ptr); + } +} diff --git a/crates/lean_vm/src/runner.rs b/crates/lean_vm/src/runner.rs index 3d8765b5..40ae47da 100644 --- a/crates/lean_vm/src/runner.rs +++ b/crates/lean_vm/src/runner.rs @@ -572,7 +572,7 @@ fn execute_bytecode_helper( .map(|chunk| EF::from_basis_coefficients_slice(chunk).unwrap()) .collect::>(); - let eval = slice_coeffs.evaluate(&MultilinearPoint(point)); + let eval = slice_coeffs.evaluate(&MultilinearPoint(point.clone())); let mut res_vec = eval.as_basis_coefficients_slice().to_vec(); res_vec.resize(VECTOR_LEN, F::ZERO); memory.set_vector(ptr_res, res_vec.try_into().unwrap())?; @@ -704,3 +704,373 @@ fn execute_bytecode_helper( vm_multilinear_eval_events, }) } + +#[cfg(test)] +mod tests { + use super::*; + use p3_field::BasedVectorSpace; + use p3_util::log2_ceil_usize; + use std::collections::BTreeMap; + use utils::ToUsize; + + // Pointers for precompile inputs allocated in public memory. + const POSEIDON16_ARG_A_PTR: usize = 6; + const POSEIDON16_ARG_B_PTR: usize = 7; + const POSEIDON24_ARG_A_PTR: usize = 11; // uses ptr and ptr + 1 + const POSEIDON24_ARG_B_PTR: usize = 13; + const DOT_ARG0_PTR: usize = 180; // normal pointer, len 2 + const DOT_ARG1_PTR: usize = 200; // normal pointer, len 2 + const MLE_COEFF_PTR: usize = 32; // interpreted with shift << n_vars + const MLE_POINT_PTR: usize = 15; // interpreted with shift << log_point_size + + // Offsets used in hints for storing result pointers at fp + offset. + const POSEIDON16_RES_OFFSET: usize = 0; + const POSEIDON24_RES_OFFSET: usize = 1; + const DOT_RES_OFFSET: usize = 2; + const MLE_RES_OFFSET: usize = 3; + + const DOT_PRODUCT_LEN: usize = 2; + const MLE_N_VARS: usize = 1; + + // Ensure public input covers the highest index used (dot product arg1 slice). + const MAX_MEMORY_INDEX: usize = DOT_ARG1_PTR + DOT_PRODUCT_LEN * DIMENSION - 1; + const PUBLIC_INPUT_LEN: usize = MAX_MEMORY_INDEX - PUBLIC_INPUT_START + 1; + + const POSEIDON16_ARG_A_VALUES: [u64; VECTOR_LEN] = [1, 2, 3, 4, 5, 6, 7, 8]; + const POSEIDON16_ARG_B_VALUES: [u64; VECTOR_LEN] = [101, 102, 103, 104, 105, 106, 107, 108]; + const POSEIDON24_ARG_A_VALUES: [[u64; VECTOR_LEN]; 2] = [ + [201, 202, 203, 204, 205, 206, 207, 208], + [211, 212, 213, 214, 215, 216, 217, 218], + ]; + const POSEIDON24_ARG_B_VALUES: [u64; VECTOR_LEN] = [221, 222, 223, 224, 225, 226, 227, 228]; + const DOT_ARG0_VALUES: [[u64; DIMENSION]; DOT_PRODUCT_LEN] = + [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]; + const DOT_ARG1_VALUES: [[u64; DIMENSION]; DOT_PRODUCT_LEN] = + [[11, 12, 13, 14, 15], [16, 17, 18, 19, 20]]; + const MLE_COEFF_VALUES: [u64; 1 << MLE_N_VARS] = [7, 9]; + const MLE_POINT_VALUES: [u64; DIMENSION] = [21, 22, 23, 24, 25]; + + fn f(value: u64) -> F { + F::from_isize(value as isize) + } + + fn set_public_input_cell(public_input: &mut [F], memory_index: usize, value: F) { + assert!(memory_index >= PUBLIC_INPUT_START); + let idx = memory_index - PUBLIC_INPUT_START; + assert!(idx < public_input.len()); + public_input[idx] = value; + } + + fn set_vector(public_input: &mut [F], ptr: usize, values: &[u64]) { + assert_eq!(values.len(), VECTOR_LEN); + for (i, &value) in values.iter().enumerate() { + set_public_input_cell(public_input, ptr * VECTOR_LEN + i, f(value)); + } + } + + fn set_multivector(public_input: &mut [F], ptr: usize, chunks: &[&[u64]]) { + for (chunk_index, chunk) in chunks.iter().enumerate() { + assert_eq!(chunk.len(), VECTOR_LEN); + for (i, &value) in chunk.iter().enumerate() { + set_public_input_cell(public_input, (ptr + chunk_index) * VECTOR_LEN + i, f(value)); + } + } + } + + fn set_ef_slice(public_input: &mut [F], ptr: usize, elements: &[[u64; DIMENSION]]) { + for (i, coeffs) in elements.iter().enumerate() { + for (j, &value) in coeffs.iter().enumerate() { + set_public_input_cell(public_input, ptr + i * DIMENSION + j, f(value)); + } + } + } + + fn set_base_slice(public_input: &mut [F], start_index: usize, values: &[u64]) { + for (i, &value) in values.iter().enumerate() { + set_public_input_cell(public_input, start_index + i, f(value)); + } + } + + fn build_test_case() -> (Bytecode, Vec) { + let mut public_input = vec![F::ZERO; PUBLIC_INPUT_LEN]; + + set_vector( + &mut public_input, + POSEIDON16_ARG_A_PTR, + &POSEIDON16_ARG_A_VALUES, + ); + set_vector( + &mut public_input, + POSEIDON16_ARG_B_PTR, + &POSEIDON16_ARG_B_VALUES, + ); + + let poseidon24_chunks = [ + &POSEIDON24_ARG_A_VALUES[0][..], + &POSEIDON24_ARG_A_VALUES[1][..], + ]; + set_multivector(&mut public_input, POSEIDON24_ARG_A_PTR, &poseidon24_chunks); + set_vector( + &mut public_input, + POSEIDON24_ARG_B_PTR, + &POSEIDON24_ARG_B_VALUES, + ); + + set_ef_slice(&mut public_input, DOT_ARG0_PTR, &DOT_ARG0_VALUES); + set_ef_slice(&mut public_input, DOT_ARG1_PTR, &DOT_ARG1_VALUES); + + let coeff_base = MLE_COEFF_PTR << MLE_N_VARS; + set_base_slice(&mut public_input, coeff_base, &MLE_COEFF_VALUES); + + let log_point_size = log2_ceil_usize(MLE_N_VARS * DIMENSION); + let point_base = MLE_POINT_PTR << log_point_size; + set_base_slice(&mut public_input, point_base, &MLE_POINT_VALUES); + + let mut hints = BTreeMap::new(); + hints.insert( + 0, + vec![Hint::RequestMemory { + offset: POSEIDON16_RES_OFFSET, + size: MemOrConstant::Constant(f(2)), + vectorized: true, + vectorized_len: LOG_VECTOR_LEN + 1, + }], + ); + hints.insert( + 1, + vec![Hint::RequestMemory { + offset: POSEIDON24_RES_OFFSET, + size: MemOrConstant::Constant(f(1)), + vectorized: true, + vectorized_len: LOG_VECTOR_LEN, + }], + ); + hints.insert( + 2, + vec![Hint::RequestMemory { + offset: DOT_RES_OFFSET, + size: MemOrConstant::Constant(f(1)), + vectorized: false, + vectorized_len: 0, + }], + ); + hints.insert( + 3, + vec![Hint::RequestMemory { + offset: MLE_RES_OFFSET, + size: MemOrConstant::Constant(f(1)), + vectorized: true, + vectorized_len: LOG_VECTOR_LEN, + }], + ); + + let instructions = vec![ + Instruction::Poseidon2_16 { + arg_a: MemOrConstant::Constant(f(POSEIDON16_ARG_A_PTR as u64)), + arg_b: MemOrConstant::Constant(f(POSEIDON16_ARG_B_PTR as u64)), + res: MemOrFp::MemoryAfterFp { + offset: POSEIDON16_RES_OFFSET, + }, + }, + Instruction::Poseidon2_24 { + arg_a: MemOrConstant::Constant(f(POSEIDON24_ARG_A_PTR as u64)), + arg_b: MemOrConstant::Constant(f(POSEIDON24_ARG_B_PTR as u64)), + res: MemOrFp::MemoryAfterFp { + offset: POSEIDON24_RES_OFFSET, + }, + }, + Instruction::DotProductExtensionExtension { + arg0: MemOrConstant::Constant(f(DOT_ARG0_PTR as u64)), + arg1: MemOrConstant::Constant(f(DOT_ARG1_PTR as u64)), + res: MemOrFp::MemoryAfterFp { + offset: DOT_RES_OFFSET, + }, + size: DOT_PRODUCT_LEN, + }, + Instruction::MultilinearEval { + coeffs: MemOrConstant::Constant(f(MLE_COEFF_PTR as u64)), + point: MemOrConstant::Constant(f(MLE_POINT_PTR as u64)), + res: MemOrFp::MemoryAfterFp { + offset: MLE_RES_OFFSET, + }, + n_vars: MLE_N_VARS, + }, + ]; + + let bytecode = Bytecode { + instructions, + hints, + starting_frame_memory: 512, + ending_pc: 4, + }; + + (bytecode, public_input) + } + + fn run_program() -> (Bytecode, ExecutionResult) { + let (bytecode, public_input) = build_test_case(); + let result = execute_bytecode(&bytecode, &public_input, &[], "", &BTreeMap::new(), false); + (bytecode, result) + } + + #[test] + fn vm_precompile_events_capture_expected_data() { + let (_bytecode, execution_result) = run_program(); + + assert_eq!(execution_result.vm_poseidon16_events.len(), 1); + assert_eq!(execution_result.vm_poseidon24_events.len(), 1); + assert_eq!(execution_result.vm_dot_product_events.len(), 1); + assert_eq!(execution_result.vm_multilinear_eval_events.len(), 1); + + let poseidon16_event = &execution_result.vm_poseidon16_events[0]; + assert_eq!(poseidon16_event.cycle, 0); + assert_eq!(poseidon16_event.addr_input_a, POSEIDON16_ARG_A_PTR); + assert_eq!(poseidon16_event.addr_input_b, POSEIDON16_ARG_B_PTR); + + let poseidon16_res_ptr = execution_result + .memory + .get(execution_result.fps[poseidon16_event.cycle] + POSEIDON16_RES_OFFSET) + .unwrap() + .to_usize(); + assert_eq!(poseidon16_event.addr_output, poseidon16_res_ptr); + + let poseidon16_input_a = POSEIDON16_ARG_A_VALUES.map(f); + let poseidon16_input_b = POSEIDON16_ARG_B_VALUES.map(f); + let mut expected_poseidon16_input = [F::ZERO; 16]; + expected_poseidon16_input[..VECTOR_LEN].copy_from_slice(&poseidon16_input_a); + expected_poseidon16_input[VECTOR_LEN..].copy_from_slice(&poseidon16_input_b); + assert_eq!(poseidon16_event.input, expected_poseidon16_input); + + let poseidon16_output = execution_result + .memory + .get_vectorized_slice(poseidon16_event.addr_output, 2) + .unwrap(); + assert_eq!(poseidon16_output, poseidon16_event.output.to_vec()); + + let poseidon24_event = &execution_result.vm_poseidon24_events[0]; + assert_eq!(poseidon24_event.cycle, 1); + assert_eq!(poseidon24_event.addr_input_a, POSEIDON24_ARG_A_PTR); + assert_eq!(poseidon24_event.addr_input_b, POSEIDON24_ARG_B_PTR); + + let poseidon24_res_ptr = execution_result + .memory + .get(execution_result.fps[poseidon24_event.cycle] + POSEIDON24_RES_OFFSET) + .unwrap() + .to_usize(); + assert_eq!(poseidon24_event.addr_output, poseidon24_res_ptr); + + let poseidon24_input_a0 = POSEIDON24_ARG_A_VALUES[0].map(f); + let poseidon24_input_a1 = POSEIDON24_ARG_A_VALUES[1].map(f); + let poseidon24_input_b = POSEIDON24_ARG_B_VALUES.map(f); + let mut expected_poseidon24_input = [F::ZERO; 24]; + expected_poseidon24_input[..VECTOR_LEN].copy_from_slice(&poseidon24_input_a0); + expected_poseidon24_input[VECTOR_LEN..2 * VECTOR_LEN].copy_from_slice(&poseidon24_input_a1); + expected_poseidon24_input[2 * VECTOR_LEN..].copy_from_slice(&poseidon24_input_b); + assert_eq!(poseidon24_event.input, expected_poseidon24_input); + + let poseidon24_output = execution_result + .memory + .get_vector(poseidon24_event.addr_output) + .unwrap(); + assert_eq!(poseidon24_output, poseidon24_event.output); + + let dot_event = &execution_result.vm_dot_product_events[0]; + assert_eq!(dot_event.cycle, 2); + assert_eq!(dot_event.addr_0, DOT_ARG0_PTR); + assert_eq!(dot_event.addr_1, DOT_ARG1_PTR); + + let dot_res_ptr = execution_result + .memory + .get(execution_result.fps[dot_event.cycle] + DOT_RES_OFFSET) + .unwrap() + .to_usize(); + assert_eq!(dot_event.addr_res, dot_res_ptr); + + let dot_slice_0 = DOT_ARG0_VALUES + .iter() + .map(|coeffs| coeffs.map(f)) + .map(|coeffs| EF::from_basis_coefficients_slice(&coeffs).unwrap()) + .collect::>(); + let dot_slice_1 = DOT_ARG1_VALUES + .iter() + .map(|coeffs| coeffs.map(f)) + .map(|coeffs| EF::from_basis_coefficients_slice(&coeffs).unwrap()) + .collect::>(); + assert_eq!(dot_event.slice_0, dot_slice_0); + assert_eq!(dot_event.slice_1, dot_slice_1); + + let dot_res = execution_result + .memory + .get_ef_element(dot_event.addr_res) + .unwrap(); + assert_eq!(dot_event.res, dot_res); + + let mle_event = &execution_result.vm_multilinear_eval_events[0]; + assert_eq!(mle_event.cycle, 3); + assert_eq!(mle_event.addr_coeffs, MLE_COEFF_PTR); + assert_eq!(mle_event.addr_point, MLE_POINT_PTR); + + let mle_res_ptr = execution_result + .memory + .get(execution_result.fps[mle_event.cycle] + MLE_RES_OFFSET) + .unwrap() + .to_usize(); + assert_eq!(mle_event.addr_res, mle_res_ptr); + + let expected_point = + vec![EF::from_basis_coefficients_slice(&MLE_POINT_VALUES.map(f)).unwrap()]; + assert_eq!(mle_event.point, expected_point); + + let mle_res_vec = execution_result + .memory + .get_vector(mle_event.addr_res) + .unwrap(); + let mle_res_coeffs: [F; DIMENSION] = mle_res_vec[..DIMENSION].try_into().unwrap(); + let mle_res = EF::from_basis_coefficients_slice(&mle_res_coeffs).unwrap(); + assert_eq!(mle_event.res, mle_res); + } + + #[test] + fn vm_precompile_events_only_final_pass() { + let (bytecode, public_input) = build_test_case(); + let mut std_out = String::new(); + let mut history = ExecutionHistory::default(); + + let first_pass = execute_bytecode_helper( + &bytecode, + &public_input, + &[], + MAX_MEMORY_SIZE / 2, + false, + &mut std_out, + &mut history, + false, + &BTreeMap::new(), + ) + .expect("first execution should succeed"); + + assert!(first_pass.vm_poseidon16_events.is_empty()); + assert!(first_pass.vm_poseidon24_events.is_empty()); + assert!(first_pass.vm_dot_product_events.is_empty()); + assert!(first_pass.vm_multilinear_eval_events.is_empty()); + + let mut history_final = ExecutionHistory::default(); + let final_pass = execute_bytecode_helper( + &bytecode, + &public_input, + &[], + first_pass.no_vec_runtime_memory, + true, + &mut String::new(), + &mut history_final, + false, + &BTreeMap::new(), + ) + .expect("final execution should succeed"); + + assert_eq!(final_pass.vm_poseidon16_events.len(), 1); + assert_eq!(final_pass.vm_poseidon24_events.len(), 1); + assert_eq!(final_pass.vm_dot_product_events.len(), 1); + assert_eq!(final_pass.vm_multilinear_eval_events.len(), 1); + } +} From 74f274a94187a08c936a005f4705cf4334466860 Mon Sep 17 00:00:00 2001 From: adust09 Date: Mon, 22 Sep 2025 14:35:15 +0900 Subject: [PATCH 08/11] remove related issue --- TODO.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/TODO.md b/TODO.md index a0594b0c..40b19e92 100644 --- a/TODO.md +++ b/TODO.md @@ -8,7 +8,6 @@ - one can "move out" the variable of the eq(.) polynomials out of the sumcheck computation in WHIR (as done in the PIOP) - Structured AIR: often no all the columns use both up/down -> only handle the used ones to speed up the PIOP zerocheck - use RowMAjorMatrix instead of Vec for witness, and avoid any transpositions as suggested by Thomas -- Fill Precompile tables during bytecode execution - Use Univariate Skip to commit to tables with k.2^n rows (k small) - avoid field embedding in the initial sumcheck of logup*, when table / values are in base field - opti logup* GKR: @@ -104,4 +103,4 @@ fn doesnt_work(x) inline -> 1 { } // the bug: we do not JUMP here, when inlined return 1; // will be compiled to `res = 1`; -> invalid (res = 0 and = 1 at the same time) } -``` \ No newline at end of file +``` From 0f2466831f9191297847163408ce29e208632fbd Mon Sep 17 00:00:00 2001 From: adust09 Date: Mon, 22 Sep 2025 15:14:30 +0900 Subject: [PATCH 09/11] Refactor execution trace structure by encapsulating multilinear evaluation details within a new RowMultilinearEval struct. Update related tests to reflect this change and ensure correctness of assertions. --- .../witness_generation/src/execution_trace.rs | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/crates/lean_prover/witness_generation/src/execution_trace.rs b/crates/lean_prover/witness_generation/src/execution_trace.rs index 8d23308b..6aed1f4d 100644 --- a/crates/lean_prover/witness_generation/src/execution_trace.rs +++ b/crates/lean_prover/witness_generation/src/execution_trace.rs @@ -267,12 +267,13 @@ pub fn get_execution_trace( for e in &execution_result.vm_multilinear_eval_events { vm_multilinear_evals.push(WitnessMultilinearEval { cycle: e.cycle, - addr_coeffs: e.addr_coeffs, - addr_point: e.addr_point, - addr_res: e.addr_res, - n_vars: e.n_vars, - point: e.point.clone(), - res: e.res, + inner: RowMultilinearEval { + addr_coeffs: e.addr_coeffs, + addr_point: e.addr_point, + addr_res: e.addr_res, + point: e.point.clone(), + res: e.res, + }, }); } @@ -616,17 +617,19 @@ mod tests { let mle_event = &execution_result.vm_multilinear_eval_events[0]; let witness_mle = &trace.vm_multilinear_evals[0]; + let witness_mle_row = &witness_mle.inner; assert_eq!(witness_mle.cycle, mle_event.cycle); - assert_eq!(witness_mle.addr_coeffs, mle_event.addr_coeffs); - assert_eq!(witness_mle.addr_point, mle_event.addr_point); - assert_eq!(witness_mle.addr_res, mle_event.addr_res); - assert_eq!(witness_mle.n_vars, mle_event.n_vars); - assert_eq!(witness_mle.point, mle_event.point); - assert_eq!(witness_mle.res, mle_event.res); + assert_eq!(witness_mle_row.addr_coeffs, mle_event.addr_coeffs); + assert_eq!(witness_mle_row.addr_point, mle_event.addr_point); + assert_eq!(witness_mle_row.addr_res, mle_event.addr_res); + assert_eq!(witness_mle_row.n_vars(), mle_event.n_vars); + let witness_point = &witness_mle_row.point; + assert_eq!(witness_point, &mle_event.point); + assert_eq!(witness_mle_row.res, mle_event.res); let expected_point = vec![EF::from_basis_coefficients_slice(&MLE_POINT_VALUES.map(f)).unwrap()]; - assert_eq!(witness_mle.point, expected_point); + assert_eq!(witness_point, &expected_point); let coeff_slice = execution_result .memory @@ -635,11 +638,11 @@ mod tests { 1 << mle_event.n_vars, ) .unwrap(); - let point = witness_mle.point[0]; + let point = witness_point[0]; let c0 = embed_base_into_extension(coeff_slice[0]); let c1 = embed_base_into_extension(coeff_slice[1]); let expected_mle = c0 * (EF::ONE - point) + c1 * point; - assert_eq!(witness_mle.res, expected_mle); + assert_eq!(witness_mle_row.res, expected_mle); assert_eq!( trace.poseidons_16.len(), @@ -680,6 +683,6 @@ mod tests { .get(execution_result.fps[mle_event.cycle] + MLE_RES_OFFSET) .unwrap() .to_usize(); - assert_eq!(witness_mle.addr_res, mle_res_ptr); + assert_eq!(witness_mle_row.addr_res, mle_res_ptr); } } From ce7a9f195c7bb731a3f83d437a0bde19aab73ebf Mon Sep 17 00:00:00 2001 From: adust09 Date: Mon, 22 Sep 2025 15:36:58 +0900 Subject: [PATCH 10/11] Update memory size constant in runner to MAX_RUNNER_MEMORY_SIZE for improved clarity and consistency. --- crates/lean_vm/src/runner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/lean_vm/src/runner.rs b/crates/lean_vm/src/runner.rs index 1da14878..ac66a703 100644 --- a/crates/lean_vm/src/runner.rs +++ b/crates/lean_vm/src/runner.rs @@ -1051,7 +1051,7 @@ mod tests { &bytecode, &public_input, &[], - MAX_MEMORY_SIZE / 2, + MAX_RUNNER_MEMORY_SIZE / 2, false, &mut std_out, &mut history, From 8fab114aadecca04aaccad7b56b9c6e06bd6d444 Mon Sep 17 00:00:00 2001 From: adust09 Date: Mon, 22 Sep 2025 20:17:09 +0900 Subject: [PATCH 11/11] Add execution trace updates to handle instruction representation correctly at ending program counter --- .../lean_prover/witness_generation/src/execution_trace.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/lean_prover/witness_generation/src/execution_trace.rs b/crates/lean_prover/witness_generation/src/execution_trace.rs index 6aed1f4d..3c9d76cd 100644 --- a/crates/lean_prover/witness_generation/src/execution_trace.rs +++ b/crates/lean_prover/witness_generation/src/execution_trace.rs @@ -165,6 +165,14 @@ pub fn get_execution_trace( .enumerate() { if pc == bytecode.ending_pc { + if pc < bytecode.instructions.len() { + let field_repr = field_representation(&bytecode.instructions[pc]); + for (j, field) in field_repr.iter().enumerate() { + trace[j][cycle] = *field; + } + } + trace[COL_INDEX_PC][cycle] = F::from_usize(pc); + trace[COL_INDEX_FP][cycle] = F::from_usize(fp); continue; } let instruction = &bytecode.instructions[pc];