diff --git a/Cargo.lock b/Cargo.lock
index 9e2ba88d..56011c03 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -658,9 +658,9 @@ dependencies = [
 
 [[package]]
 name = "pest"
-version = "2.8.1"
+version = "2.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323"
+checksum = "21e0a3a33733faeaf8651dfee72dd0f388f0c8e5ad496a3478fa5a922f49cfa8"
 dependencies = [
  "memchr",
  "thiserror",
@@ -669,9 +669,9 @@ dependencies = [
 
 [[package]]
 name = "pest_derive"
-version = "2.8.1"
+version = "2.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb056d9e8ea77922845ec74a1c4e8fb17e7c218cc4fc11a15c5d25e189aa40bc"
+checksum = "bc58706f770acb1dbd0973e6530a3cff4746fb721207feb3a8a6064cd0b6c663"
 dependencies = [
  "pest",
  "pest_generator",
@@ -679,9 +679,9 @@ dependencies = [
 
 [[package]]
 name = "pest_generator"
-version = "2.8.1"
+version = "2.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87e404e638f781eb3202dc82db6760c8ae8a1eeef7fb3fa8264b2ef280504966"
+checksum = "6d4f36811dfe07f7b8573462465d5cb8965fffc2e71ae377a33aecf14c2c9a2f"
 dependencies = [
  "pest",
  "pest_meta",
@@ -692,9 +692,9 @@ dependencies = [
 
 [[package]]
 name = "pest_meta"
-version = "2.8.1"
+version = "2.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edd1101f170f5903fde0914f899bb503d9ff5271d7ba76bbb70bea63690cc0d5"
+checksum = "42919b05089acbd0a5dcd5405fb304d17d1053847b81163d09c4ad18ce8e8420"
 dependencies = [
  "pest",
  "sha2",
@@ -1136,7 +1136,7 @@ dependencies = [
 [[package]]
 name = "whir-p3"
 version = "0.1.0"
-source = "git+https://github.com/TomWambsgans/whir-p3?branch=lean-multisig#23fb95bba8b012455baf4704f7472f7898b95368"
+source = "git+https://github.com/TomWambsgans/whir-p3?branch=lean-multisig#298a477f6984c0610c8183a8fa4a20bae8f01fc9"
 dependencies = [
  "clap",
  "itertools",
diff --git a/README.md b/README.md
index f591cbb0..f005b58b 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,15 @@ RUSTFLAGS='-C target-cpu=native' NUM_XMSS_AGGREGATED='500' cargo test --release
 ![Alt text](docs/benchmark_graphs/graphs/xmss_aggregated_time.svg)
 ![Alt text](docs/benchmark_graphs/graphs/xmss_aggregated_overhead.svg)
 
+### Proof size
+
+With conjecture "up to capacity", current proofs with rate = 1/2 are about about ≈ 400 - 500 KiB, in which ≈ 300 KiB comes from WHIR.
+
+- The remaining 100 - 200 KiB will be significantly reduced in the future (this part has not been optimized at all).
+- WHIR proof size will also be reduced, thanks to merkle pruning (TODO).
+
+Reasonable target: 256 KiB for fast proof, 128 KiB for slower proofs (rate = 1/4 or 1/8).
+
 ## Credits
 
 - [Plonky3](https://github.com/Plonky3/Plonky3) for its various performant crates (Finite fields, poseidon2 AIR etc)
diff --git a/TODO.md b/TODO.md
index 66baa02a..5c933e11 100644
--- a/TODO.md
+++ b/TODO.md
@@ -10,7 +10,6 @@
 - use RowMAjorMatrix instead of Vec<Vec> for witness, and avoid any transpositions as suggested by Thomas
 - Fill Precompile tables during bytecode execution
 - Use Univariate Skip to commit to tables with k.2^n rows (k small)
-- increase density of multi commitments -> we can almost reduce by 2x commitment costs (question: will perf be good enough in order to avoid using the "jagged pcs" (cf sp1 hypercube)?)
 - avoid field embedding in the initial sumcheck of logup*, when table / values are in base field
 - opti logup* GKR: 
     - when the indexes are not a power of 2 (which is the case in the execution table)
@@ -23,8 +22,50 @@
 - Sumcheck, case z = 0, no need to fold, only keep first half of the values (done in PR 33 by Lambda) (and also in WHIR?)
 - Custom AVX2 / AVX512 / Neon implem in Plonky3 for all of the finite field operations (done for degree 4 extension, but not degree 5)
 - the 2 executions of the program, before generating the validity proof, can be merged, using some kind of placeholders
-- both WHIR verif + XMSS aggregation programs have 40% of unused memory!! -> TODO improve the compiler to reduce memory fragmentation
 - Many times, we evaluate different multilinear polynomials (diferent columns of the same table etc) at a common point. OPTI = compute the eq(.) once, and then dot_product with everything
+- To commit to multiple AIR table using 1 single pcs, the most general form our "packed pcs" api should accept is:
+ a list of n (n not a power of 2) columns, each ending with m repeated values (in this manner we can reduce proof size when they are a lot of columns (poseidons ...))
+
+About "the packed pcs" (similar to SP1 Jagged PCS, slightly less efficient, but simpler (no sumchecks)):
+- The best strategy is probably to pack as much as possible (the cost increasing the density = additional inner evaluations), if we can fit below a power of 2 - epsilon  (epsilon = 20% for instance, tbd), if the sum of the non zero data is just above a power of 2, no packed technique, even the best, can help us, so we should spread aniway (to reduce the pressure of inner evaluations)
+- About those inner evaluations, there is a trick: we need to compute M1(a, b, c, d, ...) then M2(b, c, d, ...), then M3(c, d, ...) -> The trick = compute the "eq(.) for (b, c, d), then dot product with M3. Then expand to eq(b, c, d, ...), dot product with M2. Then expand to eq(a, b, c, d, ...), dot product with M1. The idea is that in this order, computing each "eq" is easier is we start from the previous one.
+- Currently the packed pcs works as follows:
+
+```
+┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐
+| || || || || || || || || || || || || || |
+| || || || || || || || || || || || || || |
+| || || || || || || || || || || || || || |
+| || || || || || || || || || || || || || |
+| || || || || || || || || || || || || || |
+| || || || || || || || || || || || || || |
+└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘
+┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐
+| || || || || || || || || || || || || || |
+| || || || || || || || || || || || || || |
+└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘
+┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐┌─┐
+└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘└─┘
+```
+
+But we reduce proof size a lot using instead (TODO):
+
+```
+┌────────────────────────┐┌──────────┐┌─┐
+|                        ||          || |
+|                        ||          || |
+|                        ||          || |
+|                        ||          || |
+|                        ||          || |
+|                        ||          || |
+└────────────────────────┘└──────────┘└─┘
+┌────────────────────────┐┌──────────┐┌─┐
+|                        ||          || |
+|                        ||          || |
+└────────────────────────┘└──────────┘└─┘
+┌────────────────────────┐┌──────────┐┌─┐
+└────────────────────────┘└──────────┘└─┘
+```
 
 ## Not Perf
 
@@ -37,6 +78,7 @@
 
 - KoalaBear extension of degree 5: the current implem (in a fork of Plonky3) has not been been optimized
 - KoalaBear extension of degree 6: in order to use the (proven) Johnson bound in WHIR
+- current "packed PCS" is not optimal in the end: can lead to [16][4][2][2] (instead of [16][8])
 
 ## Known leanISA compiler bugs:
 
diff --git a/crates/compiler/src/a_simplify_lang.rs b/crates/compiler/src/a_simplify_lang.rs
index 760eaee7..17b40a11 100644
--- a/crates/compiler/src/a_simplify_lang.rs
+++ b/crates/compiler/src/a_simplify_lang.rs
@@ -127,6 +127,7 @@ pub enum SimpleLine {
         var: Var,
         size: SimpleExpr,
         vectorized: bool,
+        vectorized_len: SimpleExpr,
     },
     ConstMalloc {
         // always not vectorized
@@ -283,6 +284,7 @@ fn simplify_lines(
                         arg1: right,
                     });
                 }
+                Expression::Log2Ceil { .. } => unreachable!(),
             },
             Line::ArrayAssign {
                 array,
@@ -589,9 +591,17 @@ fn simplify_lines(
                 var,
                 size,
                 vectorized,
+                vectorized_len,
             } => {
                 let simplified_size =
                     simplify_expr(size, &mut res, counters, array_manager, const_malloc);
+                let simplified_vectorized_len = simplify_expr(
+                    vectorized_len,
+                    &mut res,
+                    counters,
+                    array_manager,
+                    const_malloc,
+                );
                 if simplified_size.is_constant()
                     && !*vectorized
                     && const_malloc.forbidden_vars.contains(var)
@@ -619,6 +629,7 @@ fn simplify_lines(
                             var: var.clone(),
                             size: simplified_size,
                             vectorized: *vectorized,
+                            vectorized_len: simplified_vectorized_len,
                         });
                     }
                 }
@@ -724,6 +735,14 @@ fn simplify_expr(
             });
             SimpleExpr::Var(aux_var)
         }
+        Expression::Log2Ceil { value } => {
+            let const_value = simplify_expr(value, lines, counters, array_manager, const_malloc)
+                .as_constant()
+                .unwrap();
+            SimpleExpr::Constant(ConstExpression::Log2Ceil {
+                value: Box::new(const_value),
+            })
+        }
     }
 }
 
@@ -884,6 +903,9 @@ fn inline_expr(expr: &mut Expression, args: &BTreeMap<Var, SimpleExpr>, inlining
             inline_expr(left, args, inlining_count);
             inline_expr(right, args, inlining_count);
         }
+        Expression::Log2Ceil { value } => {
+            inline_expr(value, args, inlining_count);
+        }
     }
 }
 
@@ -1036,6 +1058,9 @@ fn vars_in_expression(expr: &Expression) -> BTreeSet<Var> {
             vars.extend(vars_in_expression(left));
             vars.extend(vars_in_expression(right));
         }
+        Expression::Log2Ceil { value } => {
+            vars.extend(vars_in_expression(value));
+        }
     }
     vars
 }
@@ -1221,6 +1246,15 @@ fn replace_vars_for_unroll_in_expr(
                 internal_vars,
             );
         }
+        Expression::Log2Ceil { value } => {
+            replace_vars_for_unroll_in_expr(
+                value,
+                iterator,
+                unroll_index,
+                iterator_value,
+                internal_vars,
+            );
+        }
     }
 }
 
@@ -1434,6 +1468,7 @@ fn replace_vars_for_unroll(
                 var,
                 size,
                 vectorized: _,
+                vectorized_len,
             } => {
                 assert!(var != iterator, "Weird");
                 *var = format!("@unrolled_{unroll_index}_{iterator_value}_{var}");
@@ -1444,7 +1479,13 @@ fn replace_vars_for_unroll(
                     iterator_value,
                     internal_vars,
                 );
-                // vectorized is not changed
+                replace_vars_for_unroll_in_expr(
+                    vectorized_len,
+                    iterator,
+                    unroll_index,
+                    iterator_value,
+                    internal_vars,
+                );
             }
             Line::DecomposeBits { var, to_decompose } => {
                 assert!(var != iterator, "Weird");
@@ -1734,6 +1775,9 @@ fn replace_vars_by_const_in_expr(expr: &mut Expression, map: &BTreeMap<Var, F>)
             replace_vars_by_const_in_expr(left, map);
             replace_vars_by_const_in_expr(right, map);
         }
+        Expression::Log2Ceil { value } => {
+            replace_vars_by_const_in_expr(value, map);
+        }
     }
 }
 
@@ -2029,13 +2073,13 @@ impl SimpleLine {
                 var,
                 size,
                 vectorized,
+                vectorized_len,
             } => {
-                let alloc_type = if *vectorized {
-                    "malloc_vectorized"
+                if *vectorized {
+                    format!("{var} = malloc_vec({size}, {vectorized_len})")
                 } else {
-                    "malloc"
-                };
-                format!("{var} = {alloc_type}({size})")
+                    format!("{var} = malloc({size})")
+                }
             }
             Self::ConstMalloc {
                 var,
diff --git a/crates/compiler/src/b_compile_intermediate.rs b/crates/compiler/src/b_compile_intermediate.rs
index 2131dec2..d40e81c2 100644
--- a/crates/compiler/src/b_compile_intermediate.rs
+++ b/crates/compiler/src/b_compile_intermediate.rs
@@ -491,9 +491,20 @@ fn compile_lines(
 
             SimpleLine::FunctionRet { return_data } => {
                 if compiler.func_name == "main" {
+                    // pC -> ending_pc, fp -> 0
+                    let zero_value_offset = IntermediateValue::MemoryAfterFp {
+                        offset: compiler.stack_size.into(),
+                    };
+                    compiler.stack_size += 1;
+                    instructions.push(IntermediateInstruction::Computation {
+                        operation: Operation::Add,
+                        arg_a: IntermediateValue::Constant(0.into()),
+                        arg_c: IntermediateValue::Constant(0.into()),
+                        res: zero_value_offset.clone(),
+                    });
                     instructions.push(IntermediateInstruction::Jump {
                         dest: IntermediateValue::label("@end_program".to_string()),
-                        updated_fp: None,
+                        updated_fp: Some(zero_value_offset),
                     });
                 } else {
                     compile_function_ret(&mut instructions, return_data, compiler);
@@ -504,12 +515,14 @@ fn compile_lines(
                 var,
                 size,
                 vectorized,
+                vectorized_len,
             } => {
                 declared_vars.insert(var.clone());
                 instructions.push(IntermediateInstruction::RequestMemory {
                     offset: compiler.get_offset(&var.clone().into()),
                     size: IntermediateValue::from_simple_expr(size, compiler),
                     vectorized: *vectorized,
+                    vectorized_len: IntermediateValue::from_simple_expr(vectorized_len, compiler),
                 });
             }
             SimpleLine::ConstMalloc { var, size, label } => {
@@ -631,6 +644,7 @@ fn setup_function_call(
             offset: new_fp_pos.into(),
             size: ConstExpression::function_size(func_name.to_string()).into(),
             vectorized: false,
+            vectorized_len: IntermediateValue::Constant(ConstExpression::zero()),
         },
         IntermediateInstruction::Deref {
             shift_0: new_fp_pos.into(),
diff --git a/crates/compiler/src/c_compile_final.rs b/crates/compiler/src/c_compile_final.rs
index 73b3fe42..3490b59d 100644
--- a/crates/compiler/src/c_compile_final.rs
+++ b/crates/compiler/src/c_compile_final.rs
@@ -303,12 +303,17 @@ fn compile_block(
                 offset,
                 size,
                 vectorized,
+                vectorized_len,
             } => {
                 let size = try_as_mem_or_constant(&size).unwrap();
+                let vectorized_len = try_as_constant(&vectorized_len, compiler)
+                    .unwrap()
+                    .to_usize();
                 let hint = Hint::RequestMemory {
                     offset: eval_const_expression_usize(&offset, compiler),
                     vectorized,
                     size,
+                    vectorized_len,
                 };
                 hints.entry(pc).or_default().push(hint);
             }
diff --git a/crates/compiler/src/grammar.pest b/crates/compiler/src/grammar.pest
index 1f9e8464..e0b5ddd4 100644
--- a/crates/compiler/src/grammar.pest
+++ b/crates/compiler/src/grammar.pest
@@ -72,9 +72,11 @@ div_expr = { exp_expr ~ ("/" ~ exp_expr)* }
 exp_expr = { primary ~ ("**" ~ primary)* }
 primary = { 
     "(" ~ expression ~ ")" | 
+    log2_ceil_expr |
     array_access_expr |
     var_or_constant
 }
+log2_ceil_expr = { "log2_ceil" ~ "(" ~ expression ~ ")" }
 array_access_expr = { identifier ~ "[" ~ expression ~ "]" }
 
 // Basic elements
diff --git a/crates/compiler/src/intermediate_bytecode.rs b/crates/compiler/src/intermediate_bytecode.rs
index b2b8b700..12ad4b1b 100644
--- a/crates/compiler/src/intermediate_bytecode.rs
+++ b/crates/compiler/src/intermediate_bytecode.rs
@@ -134,7 +134,8 @@ pub enum IntermediateInstruction {
     RequestMemory {
         offset: ConstExpression, // m[fp + offset] where the hint will be stored
         size: IntermediateValue, // the hint
-        vectorized: bool, // if true, will be 8-alligned, and the returned pointer will be "divied" by 8 (i.e. everything is in chunks of 8 field elements)
+        vectorized: bool, // if true, will be (2^vectorized_len)-alligned, and the returned pointer will be "divied" by 2^vectorized_len
+        vectorized_len: IntermediateValue,
     },
     DecomposeBits {
         res_offset: usize, // m[fp + res_offset..fp + res_offset + 31 * len(to_decompose)] will contain the decomposed bits
@@ -297,13 +298,17 @@ impl Display for IntermediateInstruction {
                 offset,
                 size,
                 vectorized,
-            } => write!(
-                f,
-                "m[fp + {}] = {}({})",
-                offset,
-                if *vectorized { "malloc_vec" } else { "malloc" },
-                size
-            ),
+                vectorized_len,
+            } => {
+                if *vectorized {
+                    write!(
+                        f,
+                        "m[fp + {offset}] = request_memory_vec({size}, {vectorized_len})"
+                    )
+                } else {
+                    write!(f, "m[fp + {offset}] = request_memory({size})")
+                }
+            }
             Self::Print { line_info, content } => {
                 write!(f, "print {line_info}: ")?;
                 for (i, c) in content.iter().enumerate() {
diff --git a/crates/compiler/src/lang.rs b/crates/compiler/src/lang.rs
index 6f52b457..4710c49f 100644
--- a/crates/compiler/src/lang.rs
+++ b/crates/compiler/src/lang.rs
@@ -1,4 +1,5 @@
 use p3_field::PrimeCharacteristicRing;
+use p3_util::log2_ceil_usize;
 use std::collections::BTreeMap;
 use std::fmt::{Display, Formatter};
 use utils::ToUsize;
@@ -118,6 +119,9 @@ pub enum ConstExpression {
         operation: HighLevelOperation,
         right: Box<Self>,
     },
+    Log2Ceil {
+        value: Box<Self>,
+    },
 }
 
 impl From<usize> for ConstExpression {
@@ -147,6 +151,12 @@ impl TryFrom<Expression> for ConstExpression {
                     right: Box::new(right_expr),
                 })
             }
+            Expression::Log2Ceil { value } => {
+                let value_expr = Self::try_from(*value)?;
+                Ok(Self::Log2Ceil {
+                    value: Box::new(value_expr),
+                })
+            }
         }
     }
 }
@@ -182,6 +192,10 @@ impl ConstExpression {
                 operation,
                 right,
             } => Some(operation.eval(left.eval_with(func)?, right.eval_with(func)?)),
+            Self::Log2Ceil { value } => {
+                let value = value.eval_with(func)?;
+                Some(F::from_usize(log2_ceil_usize(value.to_usize())))
+            }
         }
     }
 
@@ -219,6 +233,9 @@ pub enum Expression {
         operation: HighLevelOperation,
         right: Box<Self>,
     },
+    Log2Ceil {
+        value: Box<Expression>,
+    }, // only for const expressions
 }
 
 impl From<SimpleExpr> for Expression {
@@ -259,8 +276,20 @@ impl Expression {
                 left.eval_with(value_fn, array_fn)?,
                 right.eval_with(value_fn, array_fn)?,
             )),
+            Self::Log2Ceil { value } => {
+                let value = value.eval_with(value_fn, array_fn)?;
+                Some(F::from_usize(log2_ceil_usize(value.to_usize())))
+            }
         }
     }
+
+    pub fn scalar(scalar: usize) -> Self {
+        SimpleExpr::scalar(scalar).into()
+    }
+
+    pub fn zero() -> Self {
+        Self::scalar(0)
+    }
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -316,6 +345,7 @@ pub enum Line {
         var: Var,
         size: Expression,
         vectorized: bool,
+        vectorized_len: Expression,
     },
     DecomposeBits {
         var: Var, // a pointer to 31 * len(to_decompose) field elements, containing the bits of "to_decompose"
@@ -343,6 +373,9 @@ impl Display for Expression {
             } => {
                 write!(f, "({left} {operation} {right})")
             }
+            Self::Log2Ceil { value } => {
+                write!(f, "log2_ceil({value})")
+            }
         }
     }
 }
@@ -488,9 +521,10 @@ impl Line {
                 var,
                 size,
                 vectorized,
+                vectorized_len,
             } => {
                 if *vectorized {
-                    format!("{var} = malloc_vectorized({size})")
+                    format!("{var} = malloc_vec({size}, {vectorized_len})")
                 } else {
                     format!("{var} = malloc({size})")
                 }
@@ -573,6 +607,9 @@ impl Display for ConstExpression {
             } => {
                 write!(f, "({left} {operation} {right})")
             }
+            Self::Log2Ceil { value } => {
+                write!(f, "log2_ceil({value})")
+            }
         }
     }
 }
diff --git a/crates/compiler/src/parser.rs b/crates/compiler/src/parser.rs
index e1c8992f..2c323cd1 100644
--- a/crates/compiler/src/parser.rs
+++ b/crates/compiler/src/parser.rs
@@ -8,6 +8,7 @@ use pest_derive::Parser;
 use std::collections::BTreeMap;
 use utils::ToUsize;
 use vm::F;
+use vm::LOG_VECTOR_LEN;
 use vm::LocationInSourceCode;
 
 #[derive(Parser)]
@@ -444,12 +445,25 @@ fn parse_primary(
         Rule::expression => parse_expression(inner, constants),
         Rule::var_or_constant => Ok(Expression::Value(parse_var_or_constant(inner, constants)?)),
         Rule::array_access_expr => parse_array_access(inner, constants),
+        Rule::log2_ceil_expr => parse_log2_ceil(inner, constants),
         _ => Err(ParseError::SemanticError(
             "Invalid primary expression".to_string(),
         )),
     }
 }
 
+// Add this new function to handle log2_ceil parsing
+fn parse_log2_ceil(
+    pair: Pair<'_, Rule>,
+    constants: &BTreeMap<String, usize>,
+) -> Result<Expression, ParseError> {
+    let mut inner = pair.into_inner();
+    let expr = parse_expression(inner.next().unwrap(), constants)?;
+    Ok(Expression::Log2Ceil {
+        value: Box::new(expr),
+    })
+}
+
 fn parse_tuple_expression(
     pair: Pair<'_, Rule>,
     constants: &BTreeMap<String, usize>,
@@ -510,17 +524,21 @@ fn parse_function_call(
                 var: return_data[0].clone(),
                 size: args[0].clone(),
                 vectorized: false,
+                vectorized_len: Expression::zero(),
             })
         }
         "malloc_vec" => {
-            assert!(
-                args.len() == 1 && return_data.len() == 1,
-                "Invalid malloc_vec call"
-            );
+            let vectorized_len = if args.len() == 1 {
+                Expression::scalar(LOG_VECTOR_LEN)
+            } else {
+                assert_eq!(args.len(), 2, "Invalid malloc_vec call");
+                args[1].clone()
+            };
             Ok(Line::MAlloc {
                 var: return_data[0].clone(),
                 size: args[0].clone(),
                 vectorized: true,
+                vectorized_len,
             })
         }
         "print" => {
diff --git a/crates/pcs/src/combinatorics.rs b/crates/pcs/src/combinatorics.rs
deleted file mode 100644
index 84168330..00000000
--- a/crates/pcs/src/combinatorics.rs
+++ /dev/null
@@ -1,132 +0,0 @@
-use std::cmp::Reverse;
-use std::collections::BinaryHeap;
-
-#[derive(Debug, Clone)]
-pub struct TreeOfVariables {
-    pub vars_per_polynomial: Vec<usize>,
-    pub root: TreeOfVariablesInner,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum TreeOfVariablesInner {
-    Polynomial(usize),
-    Composed {
-        left: Box<TreeOfVariablesInner>,
-        right: Box<TreeOfVariablesInner>,
-    },
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-struct TreeNode {
-    tree: TreeOfVariablesInner,
-    var_count: usize,
-}
-
-impl PartialOrd for TreeNode {
-    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for TreeNode {
-    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.var_count.cmp(&other.var_count)
-    }
-}
-
-impl TreeOfVariables {
-    pub fn total_vars(&self) -> usize {
-        self.root.total_vars(&self.vars_per_polynomial)
-    }
-
-    pub fn density(&self) -> f64 {
-        self.root.density(&self.vars_per_polynomial)
-    }
-}
-
-impl TreeOfVariablesInner {
-    pub fn total_vars(&self, vars_per_polynomial: &[usize]) -> usize {
-        match self {
-            Self::Polynomial(i) => vars_per_polynomial[*i],
-            Self::Composed { left, right } => {
-                1 + left
-                    .total_vars(vars_per_polynomial)
-                    .max(right.total_vars(vars_per_polynomial))
-            }
-        }
-    }
-
-    fn density(&self, vars_per_polynomial: &[usize]) -> f64 {
-        match self {
-            Self::Polynomial(_) => 1.,
-            Self::Composed { left, right } => {
-                let density_left = left.density(vars_per_polynomial);
-                let density_right = right.density(vars_per_polynomial);
-                let total_vars = self.total_vars(vars_per_polynomial) as f64;
-                let left_vars = left.total_vars(vars_per_polynomial) as f64;
-                let right_vars = right.total_vars(vars_per_polynomial) as f64;
-                let left_weight = (left_vars - total_vars).exp2();
-                let right_weight = (right_vars - total_vars).exp2();
-                left_weight.mul_add(density_left, right_weight * density_right)
-            }
-        }
-    }
-}
-impl TreeOfVariables {
-    pub fn compute_optimal(vars_per_polynomial: Vec<usize>) -> Self {
-        let n = vars_per_polynomial.len();
-        assert!(n > 0);
-
-        let root = Self::compute_greedy(&vars_per_polynomial);
-
-        Self {
-            root,
-            vars_per_polynomial,
-        }
-    }
-
-    fn compute_greedy(vars_per_polynomial: &[usize]) -> TreeOfVariablesInner {
-        let mut heap: BinaryHeap<Reverse<TreeNode>> = vars_per_polynomial
-            .iter()
-            .enumerate()
-            .map(|(i, &var_count)| {
-                Reverse(TreeNode {
-                    tree: TreeOfVariablesInner::Polynomial(i),
-                    var_count,
-                })
-            })
-            .collect();
-
-        while heap.len() > 1 {
-            let Reverse(left_node) = heap.pop().unwrap();
-            let Reverse(right_node) = heap.pop().unwrap();
-
-            let combined_var_count = 1 + left_node.var_count.max(right_node.var_count);
-
-            let combined_tree = TreeOfVariablesInner::Composed {
-                left: Box::new(left_node.tree),
-                right: Box::new(right_node.tree),
-            };
-
-            heap.push(Reverse(TreeNode {
-                tree: combined_tree,
-                var_count: combined_var_count,
-            }));
-        }
-
-        heap.pop().unwrap().0.tree
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[cfg(test)]
-    #[test]
-    fn test_tree_of_variables() {
-        let vars_per_polynomial = vec![2];
-        let tree = TreeOfVariables::compute_optimal(vars_per_polynomial);
-        tree.total_vars();
-    }
-}
diff --git a/crates/pcs/src/lib.rs b/crates/pcs/src/lib.rs
index e2d5bad2..8b295906 100644
--- a/crates/pcs/src/lib.rs
+++ b/crates/pcs/src/lib.rs
@@ -6,8 +6,6 @@ pub use pcs::*;
 mod ring_switch;
 pub use ring_switch::*;
 
-mod combinatorics;
-
 mod packed_pcs;
 pub use packed_pcs::*;
 
diff --git a/crates/pcs/src/packed_pcs.rs b/crates/pcs/src/packed_pcs.rs
index 188df188..ceed2467 100644
--- a/crates/pcs/src/packed_pcs.rs
+++ b/crates/pcs/src/packed_pcs.rs
@@ -1,189 +1,550 @@
+use std::{cmp::Reverse, collections::BTreeMap};
+
 use p3_field::{ExtensionField, Field};
-use p3_util::log2_strict_usize;
+use p3_util::{log2_ceil_usize, log2_strict_usize};
+use rayon::prelude::*;
 use tracing::instrument;
-use utils::{Evaluation, FSProver, FSVerifier, PF};
+use utils::{
+    Evaluation, FSProver, FSVerifier, PF, from_end, multilinear_eval_constants_at_right,
+    to_big_endian_bits, to_big_endian_in_field,
+};
+use whir_p3::poly::evals::EvaluationsList;
 use whir_p3::{
     dft::EvalsDft,
     fiat_shamir::{FSChallenger, errors::ProofError},
     poly::multilinear::MultilinearPoint,
 };
 
-use crate::{
-    PCS,
-    combinatorics::{TreeOfVariables, TreeOfVariablesInner},
-};
+use crate::PCS;
 
-#[derive(Debug)]
-pub struct MultiCommitmentWitness<F: Field, EF: ExtensionField<F>, Pcs: PCS<F, EF>> {
-    pub tree: TreeOfVariables,
-    pub inner_witness: Pcs::Witness,
-    pub packed_polynomial: Vec<F>,
+#[derive(Debug, Clone)]
+struct Chunk {
+    original_poly_index: usize,
+    original_n_vars: usize,
+    n_vars: usize,
+    offset_in_original: usize,
+    public_data: bool,
+    offset_in_packed: Option<usize>,
 }
 
-pub fn num_packed_vars_for_pols<F: Field>(polynomials: &[&[F]]) -> usize {
-    let vars_per_polynomial = polynomials
-        .iter()
-        .map(|p| log2_strict_usize(p.len()))
-        .collect::<Vec<_>>();
-    TreeOfVariables::compute_optimal(vars_per_polynomial).total_vars()
+impl Chunk {
+    fn bits_offset_in_original<EF: Field>(&self) -> Vec<EF> {
+        to_big_endian_in_field(
+            self.offset_in_original >> self.n_vars,
+            self.original_n_vars - self.n_vars,
+        )
+    }
+}
+
+impl Chunk {
+    fn global_point_for_statement<F: Field>(
+        &self,
+        point: &[F],
+        packed_n_vars: usize,
+    ) -> MultilinearPoint<F> {
+        MultilinearPoint(
+            [
+                to_big_endian_in_field(
+                    self.offset_in_packed.unwrap() >> self.n_vars,
+                    packed_n_vars - self.n_vars,
+                ),
+                point.to_vec(),
+            ]
+            .concat(),
+        )
+    }
 }
 
-pub fn num_packed_vars_for_vars(vars_per_polynomial: &[usize]) -> usize {
-    TreeOfVariables::compute_optimal(vars_per_polynomial.to_vec()).total_vars()
+/*
+General layout: [public data][committed data][repeated value] (the thing has length 2^n_vars, public data is a power of two)
+*/
+#[derive(Debug, Clone, Copy)]
+pub struct ColDims<F: Field> {
+    pub n_vars: usize,
+    pub log_public: Option<usize>,
+    pub n_committed: usize,
+    pub default_value: F,
+}
+
+impl<F: Field> ColDims<F> {
+    pub fn dense(n_vars: usize) -> Self {
+        Self {
+            n_vars,
+            log_public: None,
+            n_committed: 1 << n_vars,
+            default_value: F::ZERO,
+        }
+    }
+
+    pub fn sparse(n_committed: usize, default_value: F) -> Self {
+        Self::sparse_with_public_data(None, n_committed, default_value)
+    }
+
+    pub fn sparse_with_public_data(
+        log_public: Option<usize>,
+        n_committed: usize,
+        default_value: F,
+    ) -> Self {
+        let n_public = log_public.map_or(0, |l| 1 << l);
+        let n_vars = log2_ceil_usize(n_public + n_committed);
+        Self {
+            n_vars,
+            log_public,
+            n_committed,
+            default_value,
+        }
+    }
+}
+
+fn split_in_chunks<F: Field>(
+    poly_index: usize,
+    dims: &ColDims<F>,
+    log_smallest_decomposition_chunk: usize,
+) -> Vec<Chunk> {
+    let mut offset_in_original = 0;
+    let mut res = Vec::new();
+    if let Some(log_public) = dims.log_public {
+        assert!(log_public >= log_smallest_decomposition_chunk);
+        res.push(Chunk {
+            original_poly_index: poly_index,
+            original_n_vars: dims.n_vars,
+            n_vars: log_public,
+            offset_in_original,
+            public_data: true,
+            offset_in_packed: None,
+        });
+        offset_in_original += 1 << log_public;
+    }
+    let mut remaining = dims.n_committed;
+
+    loop {
+        let mut chunk_size =
+            if remaining.next_power_of_two() - remaining <= 1 << log_smallest_decomposition_chunk {
+                log2_ceil_usize(remaining)
+            } else {
+                remaining.ilog2() as usize
+            };
+        if let Some(log_public) = dims.log_public {
+            chunk_size = chunk_size.min(log_public);
+        }
+
+        res.push(Chunk {
+            original_poly_index: poly_index,
+            original_n_vars: dims.n_vars,
+            n_vars: chunk_size,
+            offset_in_original,
+            public_data: false,
+            offset_in_packed: None,
+        });
+        offset_in_original += 1 << chunk_size;
+        if remaining <= 1 << chunk_size {
+            return res;
+        }
+        remaining -= 1 << chunk_size;
+    }
+}
+
+fn compute_chunks<F: Field, EF: ExtensionField<F>>(
+    dims: &[ColDims<F>],
+    log_smallest_decomposition_chunk: usize,
+) -> (BTreeMap<usize, Vec<Chunk>>, usize) {
+    let mut all_chunks = Vec::new();
+    for (i, dim) in dims.iter().enumerate() {
+        all_chunks.extend(split_in_chunks(i, dim, log_smallest_decomposition_chunk));
+    }
+    all_chunks.sort_by_key(|c| (Reverse(c.public_data), Reverse(c.n_vars)));
+
+    let mut offset_in_packed = 0;
+    let mut chunks_decomposition: BTreeMap<_, Vec<_>> = BTreeMap::new();
+    for chunk in &mut all_chunks {
+        if !chunk.public_data {
+            chunk.offset_in_packed = Some(offset_in_packed);
+            offset_in_packed += 1 << chunk.n_vars;
+        }
+        chunks_decomposition
+            .entry(chunk.original_poly_index)
+            .or_default()
+            .push(chunk.clone());
+    }
+    let packed_n_vars = log2_ceil_usize(
+        all_chunks
+            .iter()
+            .filter(|c| !c.public_data)
+            .map(|c| 1 << c.n_vars)
+            .sum::<usize>(),
+    );
+    (chunks_decomposition, packed_n_vars)
+}
+
+pub fn num_packed_vars_for_dims<F: Field, EF: ExtensionField<F>>(
+    dims: &[ColDims<F>],
+    log_smallest_decomposition_chunk: usize,
+) -> usize {
+    let (_, packed_n_vars) = compute_chunks::<F, EF>(dims, log_smallest_decomposition_chunk);
+    packed_n_vars
+}
+
+#[derive(Debug, Clone)]
+pub struct MultiCommitmentWitness<F: Field, EF: ExtensionField<F>, Pcs: PCS<F, EF>> {
+    pub inner_witness: Pcs::Witness,
+    pub packed_polynomial: Vec<F>,
 }
 
 #[instrument(skip_all)]
 pub fn packed_pcs_commit<F: Field, EF: ExtensionField<F>, Pcs: PCS<F, EF>>(
     pcs: &Pcs,
     polynomials: &[&[F]],
+    dims: &[ColDims<F>],
     dft: &EvalsDft<PF<EF>>,
     prover_state: &mut FSProver<EF, impl FSChallenger<EF>>,
+    log_smallest_decomposition_chunk: usize,
 ) -> MultiCommitmentWitness<F, EF, Pcs> {
-    let polynomials: Vec<&[F]> = polynomials.to_vec();
-    let vars_per_polynomial = polynomials
-        .iter()
-        .map(|p| log2_strict_usize(p.len()))
-        .collect::<Vec<_>>();
-    let tree = TreeOfVariables::compute_optimal(vars_per_polynomial);
-    let mut packed_polynomial = F::zero_vec(1 << tree.total_vars());
-    tree.root.fill_commitment(
-        &mut packed_polynomial,
-        &polynomials,
-        &tree.vars_per_polynomial,
-    );
+    assert_eq!(polynomials.len(), dims.len());
+    for (i, (poly, dim)) in polynomials.iter().zip(dims.iter()).enumerate() {
+        assert_eq!(
+            poly.len(),
+            1 << dim.n_vars,
+            "poly {} has {} vars, but dim should be {}",
+            i,
+            log2_strict_usize(poly.len()),
+            dim.n_vars
+        );
+    }
+    let (chunks_decomposition, packed_n_vars) =
+        compute_chunks::<F, EF>(dims, log_smallest_decomposition_chunk);
+
+    {
+        // logging
+        let total_commited_data: usize = dims.iter().map(|d| d.n_committed).sum();
+        let packed_commited_data: usize = chunks_decomposition
+            .values()
+            .flatten()
+            .filter(|c| !c.public_data)
+            .map(|c| 1 << c.n_vars)
+            .sum();
+
+        println!(
+            "Total committed data (full granularity): {} = 2^{:.3} | packed to 2^{:.3} -> 2^{}",
+            total_commited_data,
+            (total_commited_data as f64).log2(),
+            (packed_commited_data as f64).log2(),
+            packed_n_vars
+        );
+    }
+
+    let packed_polynomial = F::zero_vec(1 << packed_n_vars); // TODO avoid this huge cloning of all witness data
+    chunks_decomposition
+        .values()
+        .flatten()
+        .collect::<Vec<_>>()
+        .par_iter()
+        .filter(|chunk| !chunk.public_data)
+        .for_each(|chunk| {
+            let start = chunk.offset_in_packed.unwrap();
+            let end = start + (1 << chunk.n_vars);
+            let original_poly = &polynomials[chunk.original_poly_index];
+            unsafe {
+                let slice = std::slice::from_raw_parts_mut(
+                    (packed_polynomial.as_ptr() as *mut F).add(start),
+                    end - start,
+                );
+                slice.copy_from_slice(
+                    &original_poly
+                        [chunk.offset_in_original..chunk.offset_in_original + (1 << chunk.n_vars)],
+                );
+            }
+        });
+
     let inner_witness = pcs.commit(dft, prover_state, &packed_polynomial);
     MultiCommitmentWitness {
-        tree,
         inner_witness,
         packed_polynomial,
     }
 }
 
-pub fn packed_pcs_global_statements<EF: Field>(
-    tree: &TreeOfVariables,
+#[instrument(skip_all)]
+pub fn packed_pcs_global_statements_for_prover<
+    F: Field,
+    EF: ExtensionField<F> + ExtensionField<PF<EF>>,
+>(
+    polynomials: &[&[F]],
+    dims: &[ColDims<F>],
+    log_smallest_decomposition_chunk: usize,
     statements_per_polynomial: &[Vec<Evaluation<EF>>],
+    prover_state: &mut FSProver<EF, impl FSChallenger<EF>>,
 ) -> Vec<Evaluation<EF>> {
-    check_tree(tree, statements_per_polynomial).expect("Invalid tree structure for multi-open");
+    // TODO:
+    // - cache the "eq" poly, and then use dot product
+    // - current packing is not optimal in the end: can lead to [16][4][2][2] (instead of [16][8])
 
-    tree.root
-        .global_statement(&tree.vars_per_polynomial, statements_per_polynomial, &[])
-}
+    let (chunks_decomposition, packed_vars) =
+        compute_chunks::<F, EF>(dims, log_smallest_decomposition_chunk);
+
+    let statements_flattened = statements_per_polynomial
+        .iter()
+        .enumerate()
+        .map(|(poly_index, poly_statements)| {
+            poly_statements
+                .iter()
+                .map(move |statement| (poly_index, statement))
+        })
+        .flatten()
+        .collect::<Vec<_>>();
+
+    let sub_packed_statements_and_evals_to_send = statements_flattened
+        .par_iter()
+        .map(|(poly_index, statement)| {
+            let dim = &dims[*poly_index];
+            let pol = polynomials[*poly_index];
+
+            let chunks = &chunks_decomposition[&poly_index];
+            assert!(!chunks.is_empty());
+            let mut sub_packed_statements = Vec::new();
+            let mut evals_to_send = Vec::new();
+            if chunks.len() == 1 {
+                assert!(!chunks[0].public_data, "TODO");
+                assert_eq!(
+                    chunks[0].n_vars,
+                    statement.point.0.len(),
+                    "poly: {}",
+                    poly_index
+                );
+                assert!(chunks[0].offset_in_packed.unwrap() % (1 << chunks[0].n_vars) == 0);
+
+                sub_packed_statements.push(Evaluation {
+                    point: chunks[0].global_point_for_statement(&statement.point, packed_vars),
+                    value: statement.value,
+                });
+            } else {
+                let initial_booleans = statement
+                    .point
+                    .iter()
+                    .take_while(|&&x| x == EF::ZERO || x == EF::ONE)
+                    .map(|&x| x == EF::ONE)
+                    .collect::<Vec<_>>();
+                let mut all_chunk_evals = Vec::new();
+
+                // skip the first one, we will deduce it (if it's not public)
+                // TODO do we really need to parallelize this?
+                chunks[1..]
+                    .par_iter()
+                    .map(|chunk| {
+                        let missing_vars = statement.point.0.len() - chunk.n_vars;
+
+                        let offset_in_original_booleans = to_big_endian_bits(
+                            chunk.offset_in_original >> chunk.n_vars,
+                            missing_vars,
+                        );
 
-#[derive(Debug)]
-pub struct ParsedMultiCommitment<F: Field, EF: ExtensionField<F>, Pcs: PCS<F, EF>> {
-    pub tree: TreeOfVariables,
-    pub inner_parsed_commitment: Pcs::ParsedCommitment,
+                        if !initial_booleans.is_empty()
+                            && initial_booleans.len() < offset_in_original_booleans.len()
+                            && &initial_booleans
+                                == &offset_in_original_booleans[..initial_booleans.len()]
+                        {
+                            tracing::warn!("TODO: sparse statement accroos mutiple chunks");
+                        }
+
+                        if initial_booleans.len() >= offset_in_original_booleans.len() {
+                            if &initial_booleans[..missing_vars] != &offset_in_original_booleans {
+                                // this chunk is not concerned by this sparse evaluation
+                                return (None, EF::ZERO);
+                            } else {
+                                // the evaluation only depends on this chunk, no need to recompute and  = statement.value
+                                return (None, statement.value);
+                            }
+                        }
+
+                        let sub_point =
+                            MultilinearPoint(statement.point.0[missing_vars..].to_vec());
+                        let sub_value = (&pol[chunk.offset_in_original
+                            ..chunk.offset_in_original + (1 << chunk.n_vars)])
+                            .evaluate(&sub_point);
+                        (
+                            Some(Evaluation {
+                                point: chunk.global_point_for_statement(&sub_point, packed_vars),
+                                value: sub_value,
+                            }),
+                            sub_value,
+                        )
+                    })
+                    .collect::<Vec<_>>()
+                    .into_iter()
+                    .for_each(|(statement, sub_value)| {
+                        if let Some(statement) = statement {
+                            evals_to_send.push(statement.value);
+                            sub_packed_statements.push(statement);
+                        }
+                        all_chunk_evals.push(sub_value);
+                    });
+
+                // deduce the first sub_value, if it's not public
+                if dim.log_public.is_none() {
+                    let retrieved_eval = compute_multilinear_value_from_chunks(
+                        &chunks[1..],
+                        &all_chunk_evals,
+                        &statement.point,
+                        1 << chunks[0].n_vars,
+                        dim.default_value,
+                    );
+
+                    let initial_missing_vars = statement.point.0.len() - chunks[0].n_vars;
+                    let initial_sub_value = (statement.value - retrieved_eval)
+                        / MultilinearPoint(statement.point.0[..initial_missing_vars].to_vec())
+                            .eq_poly_outside(&MultilinearPoint(
+                                chunks[0].bits_offset_in_original(),
+                            ));
+                    let initial_sub_point =
+                        MultilinearPoint(statement.point.0[initial_missing_vars..].to_vec());
+
+                    let initial_packed_point =
+                        chunks[0].global_point_for_statement(&initial_sub_point, packed_vars);
+                    sub_packed_statements.insert(
+                        0,
+                        Evaluation {
+                            point: initial_packed_point,
+                            value: initial_sub_value,
+                        },
+                    );
+                    evals_to_send.insert(0, initial_sub_value);
+                }
+            }
+            (sub_packed_statements, evals_to_send)
+        })
+        .collect::<Vec<_>>();
+
+    let mut packed_statements = Vec::new();
+    for (sub_packed_statements, evals_to_send) in sub_packed_statements_and_evals_to_send {
+        packed_statements.extend(sub_packed_statements);
+        prover_state.add_extension_scalars(&evals_to_send);
+    }
+    packed_statements
 }
 
 pub fn packed_pcs_parse_commitment<F: Field, EF: ExtensionField<F>, Pcs: PCS<F, EF>>(
     pcs: &Pcs,
     verifier_state: &mut FSVerifier<EF, impl FSChallenger<EF>>,
-    vars_per_polynomial: Vec<usize>,
-) -> Result<ParsedMultiCommitment<F, EF, Pcs>, ProofError> {
-    let tree = TreeOfVariables::compute_optimal(vars_per_polynomial);
-    let inner_parsed_commitment = pcs.parse_commitment(verifier_state, tree.total_vars())?;
-    Ok(ParsedMultiCommitment {
-        tree,
-        inner_parsed_commitment,
-    })
+    dims: &[ColDims<F>],
+    log_smallest_decomposition_chunk: usize,
+) -> Result<Pcs::ParsedCommitment, ProofError> {
+    let (_, packed_n_vars) = compute_chunks::<F, EF>(&dims, log_smallest_decomposition_chunk);
+    pcs.parse_commitment(verifier_state, packed_n_vars)
 }
 
-fn check_tree<EF: Field>(
-    tree: &TreeOfVariables,
+pub fn packed_pcs_global_statements_for_verifier<
+    F: Field,
+    EF: ExtensionField<F> + ExtensionField<PF<EF>>,
+>(
+    dims: &[ColDims<F>],
+    log_smallest_decomposition_chunk: usize,
     statements_per_polynomial: &[Vec<Evaluation<EF>>],
-) -> Result<(), ProofError> {
-    assert_eq!(
-        statements_per_polynomial.len(),
-        tree.vars_per_polynomial.len()
-    );
-    for (evals, &vars) in statements_per_polynomial
-        .iter()
-        .zip(&tree.vars_per_polynomial)
-    {
-        for eval in evals {
-            assert_eq!(eval.point.len(), vars);
-        }
-    }
-    Ok(())
-}
+    verifier_state: &mut FSVerifier<EF, impl FSChallenger<EF>>,
+    public_data: &BTreeMap<usize, Vec<F>>, // poly_index -> public data slice (power of 2)
+) -> Result<Vec<Evaluation<EF>>, ProofError> {
+    assert_eq!(dims.len(), statements_per_polynomial.len());
+    let (chunks_decomposition, packed_n_vars) =
+        compute_chunks::<F, EF>(dims, log_smallest_decomposition_chunk);
+    let mut packed_statements = Vec::new();
+    for (poly_index, statements) in statements_per_polynomial.iter().enumerate() {
+        let dim = &dims[poly_index];
+        let has_public_data = dim.log_public.is_some();
+        let chunks = &chunks_decomposition[&poly_index];
+        assert!(!chunks.is_empty());
+        for statement in statements {
+            if chunks.len() == 1 {
+                assert!(!chunks[0].public_data, "TODO");
+                assert_eq!(chunks[0].n_vars, statement.point.0.len());
+                assert!(chunks[0].offset_in_packed.unwrap() % (1 << chunks[0].n_vars) == 0);
+                packed_statements.push(Evaluation {
+                    point: chunks[0].global_point_for_statement(&statement.point, packed_n_vars),
+                    value: statement.value,
+                });
+            } else {
+                let initial_booleans = statement
+                    .point
+                    .iter()
+                    .take_while(|&&x| x == EF::ZERO || x == EF::ONE)
+                    .map(|&x| x == EF::ONE)
+                    .collect::<Vec<_>>();
+                let mut sub_values = vec![];
+                if has_public_data {
+                    sub_values.push(public_data[&poly_index].evaluate(&MultilinearPoint(
+                        from_end(&statement.point, chunks[0].n_vars).to_vec(),
+                    )));
+                }
+                for chunk in chunks {
+                    if chunk.public_data {
+                        continue;
+                    }
+                    let missing_vars = statement.point.0.len() - chunk.n_vars;
+                    let offset_in_original_booleans =
+                        to_big_endian_bits(chunk.offset_in_original >> chunk.n_vars, missing_vars);
 
-impl TreeOfVariablesInner {
-    fn fill_commitment<F: Field>(
-        &self,
-        buff: &mut [F],
-        polynomials: &[&[F]],
-        vars_per_polynomial: &[usize],
-    ) {
-        match self {
-            Self::Polynomial(i) => {
-                let len = polynomials[*i].len();
-                buff[..len].copy_from_slice(polynomials[*i]);
-            }
-            Self::Composed { left, right } => {
-                let (left_buff, right_buff) = buff.split_at_mut(buff.len() / 2);
-                let left_buff = &mut left_buff[..1 << left.total_vars(vars_per_polynomial)];
-                let right_buff = &mut right_buff[..1 << right.total_vars(vars_per_polynomial)];
-                rayon::join(
-                    || left.fill_commitment(left_buff, polynomials, vars_per_polynomial),
-                    || right.fill_commitment(right_buff, polynomials, vars_per_polynomial),
-                );
+                    if initial_booleans.len() >= offset_in_original_booleans.len() {
+                        if &initial_booleans[..missing_vars] != &offset_in_original_booleans {
+                            // this chunk is not concerned by this sparse evaluation
+                            sub_values.push(EF::ZERO);
+                        } else {
+                            // the evaluation only depends on this chunk, no need to recompute and  = statement.value
+                            sub_values.push(statement.value);
+                        }
+                    } else {
+                        let sub_value = verifier_state.next_extension_scalar()?;
+                        sub_values.push(sub_value);
+                        let sub_point =
+                            MultilinearPoint(statement.point.0[missing_vars..].to_vec());
+                        packed_statements.push(Evaluation {
+                            point: chunk.global_point_for_statement(&sub_point, packed_n_vars),
+                            value: sub_value,
+                        });
+                    }
+                }
+                // consistency check
+                if statement.value
+                    != compute_multilinear_value_from_chunks(
+                        chunks,
+                        &sub_values,
+                        &statement.point,
+                        0,
+                        dim.default_value,
+                    )
+                {
+                    return Err(ProofError::InvalidProof);
+                }
             }
         }
     }
+    Ok(packed_statements)
+}
 
-    fn global_statement<F: Field, EF: ExtensionField<F>>(
-        &self,
-        vars_per_polynomial: &[usize],
-        statements_per_polynomial: &[Vec<Evaluation<EF>>],
-        selectors: &[EF],
-    ) -> Vec<Evaluation<EF>> {
-        match self {
-            Self::Polynomial(i) => {
-                let mut res = Vec::new();
-                for eval in &statements_per_polynomial[*i] {
-                    res.push(Evaluation {
-                        point: MultilinearPoint(
-                            [selectors.to_vec(), eval.point.0.clone()].concat(),
-                        ),
-                        value: eval.value,
-                    });
-                }
-                res
-            }
-            Self::Composed { left, right } => {
-                let left_vars = left.total_vars(vars_per_polynomial);
-                let right_vars = right.total_vars(vars_per_polynomial);
-
-                let mut left_selectors = selectors.to_vec();
-                left_selectors.push(EF::ZERO);
-                left_selectors.extend(EF::zero_vec(right_vars.saturating_sub(left_vars)));
-
-                let mut right_selectors = selectors.to_vec();
-                right_selectors.push(EF::ONE);
-                right_selectors.extend(EF::zero_vec(left_vars.saturating_sub(right_vars)));
-
-                let left_statements = left.global_statement(
-                    vars_per_polynomial,
-                    statements_per_polynomial,
-                    &left_selectors,
-                );
-                let right_statements = right.global_statement(
-                    vars_per_polynomial,
-                    statements_per_polynomial,
-                    &right_selectors,
-                );
-                [left_statements, right_statements].concat()
-            }
-        }
+fn compute_multilinear_value_from_chunks<F: Field, EF: ExtensionField<F>>(
+    chunks: &[Chunk],
+    evals_per_chunk: &[EF],
+    point: &[EF],
+    size_of_first_chunk_mising: usize,
+    default_value: F,
+) -> EF {
+    assert_eq!(chunks.len(), evals_per_chunk.len());
+    let mut eval = EF::ZERO;
+
+    let mut chunk_offset_sums = size_of_first_chunk_mising;
+    for (chunk, &sub_value) in chunks.iter().zip(evals_per_chunk) {
+        let missing_vars = point.len() - chunk.n_vars;
+        eval += sub_value
+            * MultilinearPoint(point[..missing_vars].to_vec())
+                .eq_poly_outside(&MultilinearPoint(chunk.bits_offset_in_original()));
+        chunk_offset_sums += 1 << chunk.n_vars;
     }
+    eval += multilinear_eval_constants_at_right(chunk_offset_sums, point) * default_value;
+    eval
 }
 
 #[cfg(test)]
 mod tests {
     use std::marker::PhantomData;
 
-    use p3_field::extension::BinomialExtensionField;
+    use p3_field::{PrimeCharacteristicRing, extension::BinomialExtensionField};
     use p3_koala_bear::KoalaBear;
+    use p3_util::log2_strict_usize;
     use rand::{Rng, SeedableRng, rngs::StdRng};
     use utils::{
         build_merkle_compress, build_merkle_hash, build_prover_state, build_verifier_state,
@@ -196,49 +557,111 @@ mod tests {
     use super::*;
 
     type F = KoalaBear;
-    type EF = BinomialExtensionField<F, 4>;
+    type EF = BinomialExtensionField<KoalaBear, 4>;
 
     #[test]
-    fn test_check_tree() {
+    fn test_packed_pcs() {
         let pcs = WhirConfigBuilder {
             folding_factor: FoldingFactor::ConstantFromSecondRound(4, 4),
             soundness_type: SecurityAssumption::CapacityBound,
             merkle_hash: build_merkle_hash(),
             merkle_compress: build_merkle_compress(),
-            pow_bits: 16,
+            pow_bits: 13,
             max_num_variables_to_send_coeffs: 6,
             rs_domain_initial_reduction_factor: 1,
-            security_level: 90,
+            security_level: 75,
             starting_log_inv_rate: 1,
             base_field: PhantomData::<F>,
             extension_field: PhantomData::<EF>,
         };
 
         let mut rng = StdRng::seed_from_u64(0);
-        let vars_per_polynomial = [4, 4, 5, 3, 7, 10, 8];
+        let log_smallest_decomposition_chunk = 3;
+        let committed_length_lengths_and_default_value_and_log_public_data: [(
+            usize,
+            F,
+            Option<usize>,
+        ); _] = [
+            (16, F::from_usize(8), Some(4)),
+            (854, F::from_usize(0), Some(7)),
+            (854, F::from_usize(1), Some(5)),
+            (16, F::from_usize(0), Some(3)),
+            (17, F::from_usize(0), Some(4)),
+            (95, F::from_usize(3), Some(4)),
+            (17, F::from_usize(0), None),
+            (95, F::from_usize(3), None),
+            (256, F::from_usize(8), None),
+            (1088, F::from_usize(9), None),
+            (512, F::from_usize(0), None),
+            (256, F::from_usize(8), Some(3)),
+            (1088, F::from_usize(9), Some(4)),
+            (512, F::from_usize(0), Some(5)),
+            (754, F::from_usize(4), Some(4)),
+            (1023, F::from_usize(7), Some(4)),
+            (2025, F::from_usize(11), Some(8)),
+            (16, F::from_usize(8), None),
+            (854, F::from_usize(0), None),
+            (854, F::from_usize(1), None),
+            (16, F::from_usize(0), None),
+            (754, F::from_usize(4), None),
+            (1023, F::from_usize(7), None),
+            (2025, F::from_usize(11), None),
+        ];
+        let mut public_data = BTreeMap::new();
         let mut polynomials = Vec::new();
+        let mut dims = Vec::new();
         let mut statements_per_polynomial = Vec::new();
-        for &vars in &vars_per_polynomial {
-            let poly = (0..1 << vars).map(|_| rng.random()).collect::<Vec<F>>();
+        for (pol_index, &(committed_length, default_value, log_public_data)) in
+            committed_length_lengths_and_default_value_and_log_public_data
+                .iter()
+                .enumerate()
+        {
+            let mut poly = (0..committed_length + log_public_data.map_or(0, |l| 1 << l))
+                .map(|_| rng.random())
+                .collect::<Vec<F>>();
+            poly.resize(poly.len().next_power_of_two(), default_value);
+            if let Some(log_public) = log_public_data {
+                public_data.insert(pol_index, poly[..1 << log_public].to_vec());
+            }
+            let n_vars = log2_strict_usize(poly.len());
             let n_points = rng.random_range(1..5);
             let mut statements = Vec::new();
             for _ in 0..n_points {
-                let point = MultilinearPoint((0..vars).map(|_| rng.random()).collect::<Vec<EF>>());
+                let point =
+                    MultilinearPoint((0..n_vars).map(|_| rng.random()).collect::<Vec<EF>>());
                 let value = poly.evaluate(&point);
                 statements.push(Evaluation { point, value });
             }
             polynomials.push(poly);
+            dims.push(ColDims {
+                n_vars,
+                log_public: log_public_data,
+                n_committed: committed_length,
+                default_value,
+            });
             statements_per_polynomial.push(statements);
         }
 
         let mut prover_state = build_prover_state();
         let dft = EvalsDft::<F>::default();
 
-        let polynomials_refs = polynomials.iter().map(|p| p.as_slice()).collect::<Vec<_>>();
-        let witness = packed_pcs_commit(&pcs, &polynomials_refs, &dft, &mut prover_state);
+        let polynomials_ref = polynomials.iter().map(|p| p.as_slice()).collect::<Vec<_>>();
+        let witness = packed_pcs_commit(
+            &pcs,
+            &polynomials_ref,
+            &dims,
+            &dft,
+            &mut prover_state,
+            log_smallest_decomposition_chunk,
+        );
 
-        let packed_statements =
-            packed_pcs_global_statements(&witness.tree, &statements_per_polynomial);
+        let packed_statements = packed_pcs_global_statements_for_prover(
+            &polynomials_ref,
+            &dims,
+            log_smallest_decomposition_chunk,
+            &statements_per_polynomial,
+            &mut prover_state,
+        );
         pcs.open(
             &dft,
             &mut prover_state,
@@ -249,16 +672,22 @@ mod tests {
 
         let mut verifier_state = build_verifier_state(&prover_state);
 
-        let parsed_commitment =
-            packed_pcs_parse_commitment(&pcs, &mut verifier_state, vars_per_polynomial.to_vec())
-                .unwrap();
-        let packed_statements =
-            packed_pcs_global_statements(&parsed_commitment.tree, &statements_per_polynomial);
-        pcs.verify(
+        let parsed_commitment = packed_pcs_parse_commitment(
+            &pcs,
             &mut verifier_state,
-            &parsed_commitment.inner_parsed_commitment,
-            &packed_statements,
+            &dims,
+            log_smallest_decomposition_chunk,
+        )
+        .unwrap();
+        let packed_statements = packed_pcs_global_statements_for_verifier(
+            &dims,
+            log_smallest_decomposition_chunk,
+            &statements_per_polynomial,
+            &mut verifier_state,
+            &public_data,
         )
         .unwrap();
+        pcs.verify(&mut verifier_state, &parsed_commitment, &packed_statements)
+            .unwrap();
     }
 }
diff --git a/crates/pcs/src/pcs.rs b/crates/pcs/src/pcs.rs
index 60a2486d..0180b6ec 100644
--- a/crates/pcs/src/pcs.rs
+++ b/crates/pcs/src/pcs.rs
@@ -25,7 +25,7 @@ pub trait NumVariables {
 
 pub trait PCS<F: Field, EF: ExtensionField<F>> {
     type ParsedCommitment: NumVariables;
-    type Witness;
+    type Witness: Sync + Send;
     fn commit(
         &self,
         dft: &EvalsDft<PF<EF>>,
diff --git a/crates/rec_aggregation/recursion_program.lean_lang b/crates/rec_aggregation/recursion_program.lean_lang
index e9de21cb..9bbfd3c4 100644
--- a/crates/rec_aggregation/recursion_program.lean_lang
+++ b/crates/rec_aggregation/recursion_program.lean_lang
@@ -351,8 +351,15 @@ fn sample_stir_indexes_and_fold(fs_state, num_queries, merkle_leaves_in_basefiel
 
     folds = malloc(num_queries * DIM);
     if merkle_leaves_in_basefield == 1 {
+        folding_randomness_alligned = malloc_vec(1, log2_ceil(DIM * FOLDING_FACTOR_0));
+        folding_randomness_alligned_ptr = folding_randomness_alligned * (2 ** log2_ceil(DIM * FOLDING_FACTOR_0));
+        for i in 0..FOLDING_FACTOR_0 unroll {
+            assert_eq_extension(folding_randomness + i*DIM, folding_randomness_alligned_ptr + i*DIM);
+        }
         for i in 0..num_queries {
-            multilinear_eval((answers[i] * 8) / 2**FOLDING_FACTOR_0, folding_randomness, folds + i*DIM, FOLDING_FACTOR_0); // TODO batching: use a single call to multilinear eval
+            res = malloc_vec(1);
+            multilinear_eval((answers[i] * 8) / 2**FOLDING_FACTOR_0, folding_randomness_alligned, res, FOLDING_FACTOR_0); // TODO batching: use a single call to multilinear eval
+            assert_eq_extension(res * 8, folds + i*DIM);
         }
     } else {
         poly_eq = poly_eq_extension(folding_randomness, folding_factor, two_pow_folding_factor);
diff --git a/crates/rec_aggregation/src/recursion.rs b/crates/rec_aggregation/src/recursion.rs
index a3cf39c9..568b9bef 100644
--- a/crates/rec_aggregation/src/recursion.rs
+++ b/crates/rec_aggregation/src/recursion.rs
@@ -4,6 +4,7 @@ use std::time::Instant;
 
 use compiler::compile_program;
 use p3_field::BasedVectorSpace;
+use p3_field::Field;
 use p3_field::PrimeCharacteristicRing;
 use rand::{Rng, SeedableRng, rngs::StdRng};
 use utils::padd_with_zero_to_next_multiple_of;
@@ -215,7 +216,7 @@ pub fn test_whir_recursion() {
     let (bytecode, function_locations) = compile_program(&program_str);
     let batch_pcs = build_batch_pcs();
     let time = Instant::now();
-    let proof_data = prove_execution(
+    let (proof_data, proof_size) = prove_execution(
         &bytecode,
         &program_str,
         &function_locations,
@@ -224,6 +225,10 @@ pub fn test_whir_recursion() {
         &batch_pcs,
         false,
     );
-    println!("WHIR recursion, proving time: {:?}", time.elapsed());
+    println!(
+        "\nWHIR recursion, proving time: {:?}, proof size: {} KiB (not optimized)",
+        time.elapsed(),
+        proof_size * F::bits() / (8 * 1024)
+    );
     verify_execution(&bytecode, &public_input, proof_data, &batch_pcs).unwrap();
 }
diff --git a/crates/rec_aggregation/src/xmss_aggregate.rs b/crates/rec_aggregation/src/xmss_aggregate.rs
index da84f3d0..dea59b13 100644
--- a/crates/rec_aggregation/src/xmss_aggregate.rs
+++ b/crates/rec_aggregation/src/xmss_aggregate.rs
@@ -1,6 +1,7 @@
 use std::{env, time::Instant};
 
 use compiler::*;
+use p3_field::Field;
 use p3_field::PrimeCharacteristicRing;
 use rand::{Rng, SeedableRng, rngs::StdRng};
 use rayon::prelude::*;
@@ -278,7 +279,7 @@ fn test_xmss_aggregate() {
         let (bytecode, function_locations) = compile_program(&program_str);
         let batch_pcs = build_batch_pcs();
         let time = Instant::now();
-        let proof_data = prove_execution(
+        let (proof_data, proof_size) = prove_execution(
             &bytecode,
             &program_str,
             &function_locations,
@@ -290,11 +291,15 @@ fn test_xmss_aggregate() {
         let proving_time = time.elapsed();
         verify_execution(&bytecode, &public_input, proof_data, &batch_pcs).unwrap();
         println!(
-            "XMSS aggregation (n_signatures = {}, lifetime = 2^{})",
+            "\nXMSS aggregation (n_signatures = {}, lifetime = 2^{})",
             n_public_keys / INV_BITFIELD_DENSITY,
             LOG_LIFETIME
         );
-        println!("Proving time: {proving_time:?}");
+        println!(
+            "Proving time: {:?}, proof size: {} KiB (not optimized)",
+            proving_time,
+            proof_size * F::bits() / (8 * 1024)
+        );
     } else {
         compile_and_run(&program_str, &public_input, &private_input, false);
     }
diff --git a/crates/utils/src/misc.rs b/crates/utils/src/misc.rs
index fc2873ab..ecfe6051 100644
--- a/crates/utils/src/misc.rs
+++ b/crates/utils/src/misc.rs
@@ -82,6 +82,13 @@ pub fn to_big_endian_bits(value: usize, bit_count: usize) -> Vec<bool> {
         .collect()
 }
 
+pub fn to_big_endian_in_field<F: Field>(value: usize, bit_count: usize) -> Vec<F> {
+    (0..bit_count)
+        .rev()
+        .map(|i| F::from_bool((value >> i) & 1 == 1))
+        .collect()
+}
+
 pub fn to_little_endian_bits(value: usize, bit_count: usize) -> Vec<bool> {
     let mut res = to_big_endian_bits(value, bit_count);
     res.reverse();
diff --git a/crates/utils/src/multilinear.rs b/crates/utils/src/multilinear.rs
index d7ead411..6365ca22 100644
--- a/crates/utils/src/multilinear.rs
+++ b/crates/utils/src/multilinear.rs
@@ -167,6 +167,39 @@ pub fn batch_fold_multilinear_in_small_field_packed<EF: ExtensionField<PF<EF>>>(
         .collect()
 }
 
+pub fn multilinear_eval_constants_at_right<F: Field>(limit: usize, point: &[F]) -> F {
+    let n_vars = point.len();
+
+    // multilinear polynomial = [0 0 --- 0][1 1 --- 1] (`limit` times 0, then `2^n_vars - limit` times 1) evaluated at `point`
+
+    assert!(
+        limit <= (1 << n_vars),
+        "limit {} is too large for n_vars {}",
+        limit,
+        n_vars
+    );
+
+    if limit == 1 << n_vars {
+        return F::ZERO;
+    }
+
+    if point.len() == 0 {
+        assert!(limit <= 1);
+        if limit == 1 { F::ZERO } else { F::ONE }
+    } else {
+        let main_bit = limit >> (n_vars - 1);
+        if main_bit == 1 {
+            // limit is at the right half
+            return point[0]
+                * multilinear_eval_constants_at_right(limit - (1 << (n_vars - 1)), &point[1..]);
+        } else {
+            // limit is at left half
+            return point[0]
+                + (F::ONE - point[0]) * multilinear_eval_constants_at_right(limit, &point[1..]);
+        }
+    }
+}
+
 // pub fn packed_multilinear<F: Field>(pols: &[Vec<F>]) -> Vec<F> {
 //     let n_vars = pols[0].num_variables();
 //     assert!(pols.iter().all(|p| p.num_variables() == n_vars));
@@ -255,4 +288,19 @@ mod tests {
             pol.evaluate(&MultilinearPoint(point))
         );
     }
+
+    #[test]
+    fn test_multilinear_eval_constants_at_right() {
+        let n_vars = 10;
+        let mut rng = StdRng::seed_from_u64(0);
+        let point = (0..n_vars).map(|_| rng.random()).collect::<Vec<F>>();
+        for limit in [0, 1, 2, 45, 74, 451, 741, 1022, 1023] {
+            let eval = multilinear_eval_constants_at_right(limit, &point);
+            let mut pol = F::zero_vec(1 << n_vars);
+            for i in limit..(1 << n_vars) {
+                pol[i] = F::ONE;
+            }
+            assert_eq!(eval, pol.evaluate(&MultilinearPoint(point.clone())));
+        }
+    }
 }
diff --git a/crates/vm/src/bytecode.rs b/crates/vm/src/bytecode.rs
index e6ea39b6..9edd5ee7 100644
--- a/crates/vm/src/bytecode.rs
+++ b/crates/vm/src/bytecode.rs
@@ -78,7 +78,7 @@ pub enum Instruction {
     MultilinearEval {
         coeffs: MemOrConstant, // vectorized pointer, chunk size = 2^n_vars
         point: MemOrConstant, // normal pointer, pointing to `n_vars` continuous extension field elements
-        res: MemOrFp,         // normal pointer, pointing to 1 extension field element
+        res: MemOrFp, // vectorized pointer, pointing to 1 EF element (ending with 8 - DIM zeros)
         n_vars: usize,
     },
 }
@@ -114,7 +114,8 @@ pub enum Hint {
     RequestMemory {
         offset: usize,       // m[fp + offset] where the hint will be stored
         size: MemOrConstant, // the hint
-        vectorized: bool,
+        vectorized: bool, // if true, will be (2^vectorized_len)-alligned, and the returned pointer will be "divied" by 2^vectorized_len
+        vectorized_len: usize,
     },
     DecomposeBits {
         res_offset: usize, // m[fp + res_offset..fp + res_offset + 31 * len(to_decompose)] will contain the decomposed bits
@@ -251,14 +252,16 @@ impl Display for Hint {
                 offset,
                 size,
                 vectorized,
+                vectorized_len,
             } => {
-                write!(
-                    f,
-                    "m[fp + {}] = {}({})",
-                    offset,
-                    if *vectorized { "malloc_vec" } else { "malloc" },
-                    size
-                )
+                if *vectorized {
+                    write!(
+                        f,
+                        "m[fp + {offset}] = request_memory_vec({size}, {vectorized_len})"
+                    )
+                } else {
+                    write!(f, "m[fp + {offset}] = request_memory({size})")
+                }
             }
             Self::DecomposeBits {
                 res_offset,
diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs
index a71ea6f4..79484c69 100644
--- a/crates/vm/src/lib.rs
+++ b/crates/vm/src/lib.rs
@@ -14,7 +14,8 @@ pub use runner::*;
 pub type LocationInSourceCode = usize;
 
 pub const DIMENSION: usize = 5;
-pub const VECTOR_LEN: usize = 8;
+pub const LOG_VECTOR_LEN: usize = 3;
+pub const VECTOR_LEN: usize = 1 << LOG_VECTOR_LEN;
 pub type F = KoalaBear;
 pub type EF = QuinticExtensionFieldKB;
 
diff --git a/crates/vm/src/runner.rs b/crates/vm/src/runner.rs
index 8a51d5d1..3ff87ebd 100644
--- a/crates/vm/src/runner.rs
+++ b/crates/vm/src/runner.rs
@@ -1,5 +1,7 @@
+use p3_field::BasedVectorSpace;
 use p3_field::PrimeCharacteristicRing;
 use p3_field::dot_product;
+use p3_util::log2_ceil_usize;
 use std::collections::BTreeMap;
 use utils::ToUsize;
 use utils::get_poseidon16;
@@ -202,7 +204,7 @@ fn execute_bytecode_helper(
 
     let initial_ap = fp + bytecode.starting_frame_memory;
     let initial_ap_vec =
-        (initial_ap + no_vec_runtime_memory).next_multiple_of(DIMENSION) / DIMENSION;
+        (initial_ap + no_vec_runtime_memory).next_multiple_of(VECTOR_LEN) / VECTOR_LEN;
 
     let mut pc = 0;
     let mut ap = initial_ap;
@@ -246,13 +248,22 @@ fn execute_bytecode_helper(
                     offset,
                     size,
                     vectorized,
+                    vectorized_len,
                 } => {
                     let size = size.read_value(&memory, fp)?.to_usize();
 
                     if *vectorized {
-                        // find the next multiple of VECTOR_LEN
-                        memory.set(fp + *offset, F::from_usize(ap_vec))?;
-                        ap_vec += size;
+                        assert!(*vectorized_len >= LOG_VECTOR_LEN, "TODO");
+
+                        // padding:
+                        while !(ap_vec * VECTOR_LEN).is_multiple_of(1 << *vectorized_len) {
+                            ap_vec += 1;
+                        }
+                        memory.set(
+                            fp + *offset,
+                            F::from_usize(ap_vec >> (*vectorized_len - LOG_VECTOR_LEN)),
+                        )?;
+                        ap_vec += size << (*vectorized_len - LOG_VECTOR_LEN);
                     } else {
                         memory.set(fp + *offset, F::from_usize(ap))?;
                         ap += size;
@@ -486,10 +497,21 @@ fn execute_bytecode_helper(
                 let ptr_res = res.read_value(&memory, fp)?.to_usize();
                 let n_coeffs = 1 << *n_vars;
                 let slice_coeffs = memory.slice(ptr_coeffs << *n_vars, n_coeffs)?;
-                let point = memory.get_continuous_slice_of_ef_elements(ptr_point, *n_vars)?;
+
+                let log_point_size = log2_ceil_usize(*n_vars * DIMENSION);
+                let point_slice = memory.slice(ptr_point << log_point_size, *n_vars * DIMENSION)?;
+                for i in *n_vars * DIMENSION..(*n_vars * DIMENSION).next_power_of_two() {
+                    memory.set((ptr_point << log_point_size) + i, F::ZERO)?; // padding
+                }
+                let point = point_slice[..*n_vars * DIMENSION]
+                    .chunks_exact(DIMENSION)
+                    .map(|chunk| EF::from_basis_coefficients_slice(chunk).unwrap())
+                    .collect::<Vec<_>>();
 
                 let eval = slice_coeffs.evaluate(&MultilinearPoint(point));
-                memory.set_ef_element(ptr_res, eval)?;
+                let mut res_vec = eval.as_basis_coefficients_slice().to_vec();
+                res_vec.resize(VECTOR_LEN, F::ZERO);
+                memory.set_vector(ptr_res, res_vec.try_into().unwrap())?;
 
                 pc += 1;
             }
diff --git a/crates/zk_vm/src/common.rs b/crates/zk_vm/src/common.rs
index dd56961c..fb8dae3d 100644
--- a/crates/zk_vm/src/common.rs
+++ b/crates/zk_vm/src/common.rs
@@ -1,6 +1,7 @@
 use p3_air::BaseAir;
 use p3_field::{ExtensionField, Field, PrimeCharacteristicRing};
 use p3_util::log2_ceil_usize;
+use pcs::ColDims;
 use rayon::prelude::*;
 use std::ops::Range;
 use sumcheck::{SumcheckComputation, SumcheckComputationPacked};
@@ -19,25 +20,81 @@ use whir_p3::{
 use crate::*;
 use vm::*;
 
+pub fn get_base_dims(
+    n_cycles: usize,
+    log_public_memory: usize,
+    private_memory_len: usize,
+    bytecode_ending_pc: usize,
+    n_poseidons_16: usize,
+    n_poseidons_24: usize,
+    p16_air_width: usize,
+    p24_air_width: usize,
+    n_rows_table_dot_products: usize,
+) -> Vec<ColDims<F>> {
+    let (default_p16_row, default_p24_row) = build_poseidon_columns(
+        &[WitnessPoseidon16::poseidon_of_zero()],
+        &[WitnessPoseidon24::poseidon_of_zero()],
+    );
+
+    [
+        vec![
+            ColDims::sparse_with_public_data(Some(log_public_memory), private_memory_len, F::ZERO), //  memory
+            ColDims::sparse(n_cycles, F::from_usize(bytecode_ending_pc)), // pc
+            ColDims::sparse(n_cycles, F::ZERO),                           // fp
+            ColDims::sparse(n_cycles, F::ZERO),                           // mem_addr_a
+            ColDims::sparse(n_cycles, F::ZERO),                           // mem_addr_b
+            ColDims::sparse(n_cycles, F::ZERO),                           // mem_addr_c
+            ColDims::sparse(n_poseidons_16, F::from_usize(ZERO_VEC_PTR)), // poseidon16 index a
+            ColDims::sparse(n_poseidons_16, F::from_usize(ZERO_VEC_PTR)), // poseidon16 index b
+            ColDims::sparse(n_poseidons_16, F::from_usize(POSEIDON_16_NULL_HASH_PTR)), // poseidon16 index res
+            ColDims::sparse(n_poseidons_24, F::from_usize(ZERO_VEC_PTR)), // poseidon24 index a
+            ColDims::sparse(n_poseidons_24, F::from_usize(ZERO_VEC_PTR)), // poseidon24 index b
+            ColDims::sparse(n_poseidons_24, F::from_usize(POSEIDON_24_NULL_HASH_PTR)), // poseidon24 index res
+        ],
+        (0..p16_air_width - 16 * 2)
+            .map(|i| ColDims::sparse(n_poseidons_16, default_p16_row[16 + i][0]))
+            .collect::<Vec<_>>(), // rest of poseidon16 table
+        (0..p24_air_width - 24 * 2)
+            .map(|i| ColDims::sparse(n_poseidons_24, default_p24_row[24 + i][0]))
+            .collect::<Vec<_>>(), // rest of poseidon24 table
+        vec![
+            ColDims::sparse(n_rows_table_dot_products, F::ONE), // dot product: (start) flag
+            ColDims::sparse(n_rows_table_dot_products, F::ONE), // dot product: length
+            ColDims::sparse(n_rows_table_dot_products, F::ZERO), // dot product: index a
+            ColDims::sparse(n_rows_table_dot_products, F::ZERO), // dot product: index b
+            ColDims::sparse(n_rows_table_dot_products, F::ZERO), // dot product: index res
+        ],
+        vec![ColDims::sparse(n_rows_table_dot_products, F::ZERO); DIMENSION], // dot product: computation
+    ]
+    .concat()
+}
+
 pub fn poseidon_16_column_groups(poseidon_16_air: &Poseidon16Air<F>) -> Vec<Range<usize>> {
-    vec![
-        0..8,
-        8..16,
-        16..poseidon_16_air.width() - 16,
-        poseidon_16_air.width() - 16..poseidon_16_air.width() - 8,
-        poseidon_16_air.width() - 8..poseidon_16_air.width(),
+    [
+        vec![0..8, 8..16],
+        (16..poseidon_16_air.width() - 16)
+            .map(|i| i..i + 1)
+            .collect(),
+        vec![
+            poseidon_16_air.width() - 16..poseidon_16_air.width() - 8,
+            poseidon_16_air.width() - 8..poseidon_16_air.width(),
+        ],
     ]
+    .concat()
 }
 
 pub fn poseidon_24_column_groups(poseidon_24_air: &Poseidon24Air<F>) -> Vec<Range<usize>> {
-    vec![
-        0..8,
-        8..16,
-        16..24,
-        24..poseidon_24_air.width() - 24,
-        poseidon_24_air.width() - 24..poseidon_24_air.width() - 8, // TODO should we commit to this part ? Probably not, but careful here, we will not check evaluations for this part
-        poseidon_24_air.width() - 8..poseidon_24_air.width(),
+    [
+        vec![0..8, 8..16, 16..24],
+        (24..poseidon_24_air.width() - 24)
+            .map(|i| i..i + 1)
+            .collect(),
+        vec![
+            poseidon_24_air.width() - 24..poseidon_24_air.width() - 8, // TODO should we commit to this part ? Probably not, but careful here, we will not check evaluations for this part
+            poseidon_24_air.width() - 8..poseidon_24_air.width(),
+        ],
     ]
+    .concat()
 }
 
 pub fn poseidon_lookup_value<EF: Field>(
@@ -67,22 +124,28 @@ pub fn poseidon_lookup_value<EF: Field>(
     [
         poseidon16_evals[0].value * s16,
         poseidon16_evals[1].value * s16,
-        poseidon16_evals[3].value * s16,
-        poseidon16_evals[4].value * s16,
+        poseidon16_evals[poseidon16_evals.len() - 2].value * s16,
+        poseidon16_evals[poseidon16_evals.len() - 1].value * s16,
         poseidon24_evals[0].value * s24,
         poseidon24_evals[1].value * s24,
         poseidon24_evals[2].value * s24,
-        poseidon24_evals[5].value * s24,
+        poseidon24_evals[poseidon24_evals.len() - 1].value * s24,
     ]
     .evaluate(poseidon_lookup_batching_chalenges)
 }
 
-pub fn poseidon_lookup_index_statements(
+pub fn add_poseidon_lookup_index_statements(
     poseidon_index_evals: &[EF],
     n_poseidons_16: usize,
     n_poseidons_24: usize,
     poseidon_logup_star_statements_indexes_point: &MultilinearPoint<EF>,
-) -> Result<(Vec<Evaluation<EF>>, Vec<Evaluation<EF>>), ProofError> {
+    p16_indexes_a_statements: &mut Vec<Evaluation<EF>>,
+    p16_indexes_b_statements: &mut Vec<Evaluation<EF>>,
+    p16_indexes_res_statements: &mut Vec<Evaluation<EF>>,
+    p24_indexes_a_statements: &mut Vec<Evaluation<EF>>,
+    p24_indexes_b_statements: &mut Vec<Evaluation<EF>>,
+    p24_indexes_res_statements: &mut Vec<Evaluation<EF>>,
+) -> Result<(), ProofError> {
     let log_n_p16 = log2_ceil_usize(n_poseidons_16);
     let log_n_p24 = log2_ceil_usize(n_poseidons_24);
     let correcting_factor = from_end(
@@ -97,56 +160,42 @@ pub fn poseidon_lookup_index_statements(
     } else {
         (correcting_factor, EF::ONE)
     };
-    let mut idx_point_right_p16 = poseidon_logup_star_statements_indexes_point[3..].to_vec();
-    let mut idx_point_right_p24 = remove_end(
-        &poseidon_logup_star_statements_indexes_point[3..],
-        log_n_p16.abs_diff(log_n_p24),
-    )
-    .to_vec();
+    let mut idx_point_right_p16 =
+        MultilinearPoint(poseidon_logup_star_statements_indexes_point[3..].to_vec());
+    let mut idx_point_right_p24 = MultilinearPoint(
+        remove_end(
+            &poseidon_logup_star_statements_indexes_point[3..],
+            log_n_p16.abs_diff(log_n_p24),
+        )
+        .to_vec(),
+    );
     if n_poseidons_16 < n_poseidons_24 {
         std::mem::swap(&mut idx_point_right_p16, &mut idx_point_right_p24);
     }
-    let p16_indexes_statements = vec![
-        Evaluation {
-            point: MultilinearPoint(
-                [vec![EF::ZERO, EF::ZERO], idx_point_right_p16.clone()].concat(),
-            ),
-            value: poseidon_index_evals[0] / correcting_factor_p16,
-        },
-        Evaluation {
-            point: MultilinearPoint(
-                [vec![EF::ZERO, EF::ONE], idx_point_right_p16.clone()].concat(),
-            ),
-            value: poseidon_index_evals[1] / correcting_factor_p16,
-        },
-        Evaluation {
-            point: MultilinearPoint(
-                [vec![EF::ONE, EF::ZERO], idx_point_right_p16.clone()].concat(),
-            ),
-            value: poseidon_index_evals[2] / correcting_factor_p16,
-        },
-    ];
-
-    let p24_indexes_statements = vec![
-        Evaluation {
-            point: MultilinearPoint(
-                [vec![EF::ZERO, EF::ZERO], idx_point_right_p24.clone()].concat(),
-            ),
-            value: poseidon_index_evals[4] / correcting_factor_p24,
-        },
-        Evaluation {
-            point: MultilinearPoint(
-                [vec![EF::ZERO, EF::ONE], idx_point_right_p24.clone()].concat(),
-            ),
-            value: poseidon_index_evals[6] / correcting_factor_p24,
-        },
-        Evaluation {
-            point: MultilinearPoint(
-                [vec![EF::ONE, EF::ZERO], idx_point_right_p24.clone()].concat(),
-            ),
-            value: poseidon_index_evals[7] / correcting_factor_p24,
-        },
-    ];
+    p16_indexes_a_statements.push(Evaluation {
+        point: idx_point_right_p16.clone(),
+        value: poseidon_index_evals[0] / correcting_factor_p16,
+    });
+    p16_indexes_b_statements.push(Evaluation {
+        point: idx_point_right_p16.clone(),
+        value: poseidon_index_evals[1] / correcting_factor_p16,
+    });
+    p16_indexes_res_statements.push(Evaluation {
+        point: idx_point_right_p16.clone(),
+        value: poseidon_index_evals[2] / correcting_factor_p16,
+    });
+    p24_indexes_a_statements.push(Evaluation {
+        point: idx_point_right_p24.clone(),
+        value: poseidon_index_evals[4] / correcting_factor_p24,
+    });
+    p24_indexes_b_statements.push(Evaluation {
+        point: idx_point_right_p24.clone(),
+        value: poseidon_index_evals[6] / correcting_factor_p24,
+    });
+    p24_indexes_res_statements.push(Evaluation {
+        point: idx_point_right_p24.clone(),
+        value: poseidon_index_evals[7] / correcting_factor_p24,
+    });
 
     if poseidon_index_evals[3] != poseidon_index_evals[2] + correcting_factor_p16 {
         return Err(ProofError::InvalidProof);
@@ -154,7 +203,7 @@ pub fn poseidon_lookup_index_statements(
     if poseidon_index_evals[5] != poseidon_index_evals[4] + correcting_factor_p24 {
         return Err(ProofError::InvalidProof);
     }
-    Ok((p16_indexes_statements, p24_indexes_statements))
+    Ok(())
 }
 
 pub fn fold_bytecode(bytecode: &Bytecode, folding_challenges: &MultilinearPoint<EF>) -> Vec<EF> {
diff --git a/crates/zk_vm/src/lib.rs b/crates/zk_vm/src/lib.rs
index 54539bde..30f3cbc5 100644
--- a/crates/zk_vm/src/lib.rs
+++ b/crates/zk_vm/src/lib.rs
@@ -16,6 +16,7 @@ pub mod prove_execution;
 pub mod verify_execution;
 
 const UNIVARIATE_SKIPS: usize = 3;
+const LOG_SMALLEST_DECOMPOSITION_CHUNK: usize = 8; // TODO optimize
 
 fn exec_column_groups() -> Vec<Range<usize>> {
     [(0..N_EXEC_AIR_COLUMNS)
diff --git a/crates/zk_vm/src/prove_execution.rs b/crates/zk_vm/src/prove_execution.rs
index a2e850df..27ef3b23 100644
--- a/crates/zk_vm/src/prove_execution.rs
+++ b/crates/zk_vm/src/prove_execution.rs
@@ -9,8 +9,8 @@ use p3_field::BasedVectorSpace;
 use p3_field::ExtensionField;
 use p3_field::PrimeCharacteristicRing;
 use p3_util::{log2_ceil_usize, log2_strict_usize};
-use pcs::num_packed_vars_for_pols;
-use pcs::{BatchPCS, packed_pcs_commit, packed_pcs_global_statements};
+use pcs::{BatchPCS, packed_pcs_commit, packed_pcs_global_statements_for_prover};
+use pcs::{ColDims, num_packed_vars_for_dims};
 use rayon::prelude::*;
 use std::collections::BTreeMap;
 use sumcheck::MleGroupRef;
@@ -20,7 +20,6 @@ use utils::assert_eq_many;
 use utils::dot_product_with_base;
 use utils::field_slice_as_base;
 use utils::pack_extension;
-use utils::to_big_endian_bits;
 use utils::{
     Evaluation, PF, build_poseidon_16_air, build_poseidon_24_air, build_prover_state,
     padd_with_zero_to_next_power_of_two,
@@ -41,7 +40,7 @@ pub fn prove_execution(
     private_input: &[F],
     pcs: &impl BatchPCS<PF<EF>, EF, EF>,
     vm_profiler: bool,
-) -> Vec<PF<EF>> {
+) -> (Vec<PF<EF>>, usize) {
     let ExecutionTrace {
         full_trace,
         n_poseidons_16,
@@ -68,6 +67,7 @@ pub fn prove_execution(
     let private_memory = &memory[public_memory_size..];
     let log_memory = log2_ceil_usize(memory.len());
     let log_public_memory = log2_strict_usize(public_memory.len());
+    let padded_memory = padd_with_zero_to_next_power_of_two(&memory); // TODO avoid this padding
 
     let n_cycles = full_trace[0].len();
     let log_n_cycles = log2_strict_usize(n_cycles);
@@ -88,8 +88,7 @@ pub fn prove_execution(
     let exec_witness = AirWitness::<PF<EF>>::new(&exec_columns, &exec_column_groups());
     let exec_table = AirTable::<EF, _, _>::new(VMAir, VMAir);
 
-    // TODO remove
-    exec_table.check_trace_validity(&exec_witness).unwrap();
+    // exec_table.check_trace_validity(&exec_witness).unwrap();
 
     let _validity_proof_span = info_span!("Validity proof generation").entered();
 
@@ -108,29 +107,10 @@ pub fn prove_execution(
 
     let (dot_product_columns, dot_product_padding_len) = build_dot_product_columns(&dot_products);
     let dot_product_witness = AirWitness::new(&dot_product_columns, &DOT_PRODUCT_AIR_COLUMN_GROUPS);
-    // TODO remove
-    dot_product_table
-        .check_trace_validity(&dot_product_witness)
-        .unwrap();
-
-    let p16_commited =
-        padd_with_zero_to_next_power_of_two(&p16_columns[16..p16_air.width() - 16].concat());
-    let p24_commited =
-        padd_with_zero_to_next_power_of_two(&p24_columns[24..p24_air.width() - 24].concat());
 
     let dot_product_flags: Vec<PF<EF>> = field_slice_as_base(&dot_product_columns[0]).unwrap();
     let dot_product_lengths: Vec<PF<EF>> = field_slice_as_base(&dot_product_columns[1]).unwrap();
-    let dot_product_indexes: Vec<PF<EF>> = padd_with_zero_to_next_power_of_two(
-        &field_slice_as_base(
-            &[
-                dot_product_columns[2].clone(),
-                dot_product_columns[3].clone(),
-                dot_product_columns[4].clone(),
-            ]
-            .concat(),
-        )
-        .unwrap(),
-    );
+
     let dot_product_computations: &[EF] = &dot_product_columns[8];
     let mut dot_product_computations_base = (0..DIMENSION).map(|_| Vec::new()).collect::<Vec<_>>();
     for row in dot_product_computations {
@@ -142,12 +122,7 @@ pub fn prove_execution(
         }
     }
 
-    assert!(private_memory.len() % public_memory.len() == 0);
-    let n_private_memory_chunks = private_memory.len() / public_memory.len();
-    let private_memory_commited_chunks = (0..n_private_memory_chunks)
-        .map(|i| &private_memory[i * public_memory.len()..(i + 1) * public_memory.len()])
-        .collect::<Vec<_>>();
-
+    let n_rows_table_dot_products = dot_product_columns[0].len() - dot_product_padding_len;
     let log_n_rows_dot_product_table = log2_strict_usize(dot_product_columns[0].len());
 
     let mut prover_state = build_prover_state::<EF>();
@@ -157,8 +132,7 @@ pub fn prove_execution(
             n_poseidons_16,
             n_poseidons_24,
             dot_products.len(),
-            log_n_rows_dot_product_table,
-            dot_product_padding_len,
+            n_rows_table_dot_products,
             private_memory.len(),
             vm_multilinear_evals.len(),
         ]
@@ -178,106 +152,132 @@ pub fn prove_execution(
         prover_state.add_extension_scalar(vm_multilinear_eval.res);
     }
 
-    let mut private_memory_statements = vec![vec![]; n_private_memory_chunks];
+    let mut memory_statements = vec![];
     for multilinear_eval in &vm_multilinear_evals {
         let addr_point = multilinear_eval.addr_point;
         let addr_coeffs = multilinear_eval.addr_coeffs;
         let addr_res = multilinear_eval.addr_res;
 
-        // TODO only 1 statement for the evaluation point (instead of `n_vars` ones)
-        // TODO we can group together most of the equality constraints (by chuncks alligned on powers of 2)
-        for (ef_addr, ef_value) in multilinear_eval
-            .point
-            .iter()
-            .enumerate()
-            .map(|(i, p)| (addr_point + i * DIMENSION, *p))
-            .chain(std::iter::once((addr_res, multilinear_eval.res)))
-        {
-            for (f_address, f_value) in
-                <EF as BasedVectorSpace<PF<EF>>>::as_basis_coefficients_slice(&ef_value)
-                    .iter()
-                    .enumerate()
-                    .map(|(a, v)| (ef_addr + a, *v))
-            {
-                let memory_chunk_index =
-                    f_address >> (log_memory - log2_ceil_usize(n_private_memory_chunks + 1));
-                let addr_bits = &to_big_endian_bits(
-                    f_address,
-                    log_memory - log2_ceil_usize(n_private_memory_chunks + 1),
-                );
-                let memory_sparse_point = addr_bits
-                    .iter()
-                    .map(|&x| EF::from_bool(x))
-                    .collect::<Vec<_>>();
-                let statement = Evaluation {
-                    point: MultilinearPoint(memory_sparse_point),
-                    value: EF::from(f_value), // TODO avoid embedding
-                };
-                if memory_chunk_index != 0 {
-                    private_memory_statements[memory_chunk_index - 1].push(statement);
-                }
-            }
-        }
+        // point lookup into memory
+        let log_point_len = log2_ceil_usize(multilinear_eval.point.len() * DIMENSION);
+        let point_random_challenge = prover_state.sample_vec(log_point_len);
+        let point_random_value = {
+            let mut point_mle = multilinear_eval
+                .point
+                .iter()
+                .flat_map(|v| {
+                    <EF as BasedVectorSpace<PF<EF>>>::as_basis_coefficients_slice(v).to_vec()
+                })
+                .collect::<Vec<_>>();
+            point_mle.resize(point_mle.len().next_power_of_two(), F::ZERO);
+            point_mle.evaluate(&MultilinearPoint(point_random_challenge.clone()))
+        };
+        memory_statements.push(Evaluation {
+            point: MultilinearPoint(
+                [
+                    to_big_endian_in_field(addr_point, log_memory - log_point_len),
+                    point_random_challenge.clone(),
+                ]
+                .concat(),
+            ),
+            value: point_random_value,
+        });
+
+        // result lookup into memory
+        let res_random_challenge = prover_state.sample_vec(LOG_VECTOR_LEN);
+        let res_random_value = {
+            let mut res_mle = multilinear_eval.res.as_basis_coefficients_slice().to_vec();
+            res_mle.resize(VECTOR_LEN, F::ZERO);
+            res_mle.evaluate(&MultilinearPoint(res_random_challenge.clone()))
+        };
+        memory_statements.push(Evaluation {
+            point: MultilinearPoint(
+                [
+                    to_big_endian_in_field(addr_res, log_memory - LOG_VECTOR_LEN),
+                    res_random_challenge.clone(),
+                ]
+                .concat(),
+            ),
+            value: res_random_value,
+        });
 
         {
             let n_vars = multilinear_eval.n_vars;
             assert!(n_vars <= log_memory);
             assert!(addr_coeffs < 1 << (log_memory - n_vars));
 
-            if n_vars >= log_public_memory {
-                todo!("vm multilinear eval accross multiple memory chunks")
-            }
-            let memory_chunk_index =
-                addr_coeffs >> (log_memory - n_vars - log2_ceil_usize(n_private_memory_chunks + 1));
-            let addr_bits = &to_big_endian_bits(
-                addr_coeffs,
-                log_memory - n_vars - log2_ceil_usize(n_private_memory_chunks + 1),
-            );
-            let mut memory_sparse_point = addr_bits
-                .iter()
-                .map(|&x| EF::from_bool(x))
-                .collect::<Vec<_>>();
-            memory_sparse_point.extend(multilinear_eval.point.clone());
+            let addr_bits = to_big_endian_in_field(addr_coeffs, log_memory - n_vars);
             let statement = Evaluation {
-                point: MultilinearPoint(memory_sparse_point),
+                point: MultilinearPoint([addr_bits, multilinear_eval.point.clone()].concat()),
                 value: multilinear_eval.res,
             };
-            if memory_chunk_index != 0 {
-                private_memory_statements[memory_chunk_index - 1].push(statement);
-            }
+            memory_statements.push(statement);
         }
     }
-
     let p16_indexes = all_poseidon_16_indexes(&poseidons_16);
     let p24_indexes = all_poseidon_24_indexes(&poseidons_24);
 
+    let base_dims = get_base_dims(
+        n_cycles,
+        log_public_memory,
+        private_memory.len(),
+        bytecode.ending_pc,
+        n_poseidons_16,
+        n_poseidons_24,
+        p16_air.width(),
+        p24_air.width(),
+        n_rows_table_dot_products,
+    );
+
+    let dot_product_col_index_a = field_slice_as_base(&dot_product_columns[2]).unwrap();
+    let dot_product_col_index_b = field_slice_as_base(&dot_product_columns[3]).unwrap();
+    let dot_product_col_index_res = field_slice_as_base(&dot_product_columns[4]).unwrap();
+
+    let base_pols = [
+        vec![
+            padded_memory.as_slice(),
+            full_trace[COL_INDEX_PC].as_slice(),
+            full_trace[COL_INDEX_FP].as_slice(),
+            full_trace[COL_INDEX_MEM_ADDRESS_A].as_slice(),
+            full_trace[COL_INDEX_MEM_ADDRESS_B].as_slice(),
+            full_trace[COL_INDEX_MEM_ADDRESS_C].as_slice(),
+            p16_indexes[0].as_slice(),
+            p16_indexes[1].as_slice(),
+            p16_indexes[2].as_slice(),
+            p24_indexes[0].as_slice(),
+            p24_indexes[1].as_slice(),
+            p24_indexes[2].as_slice(),
+        ],
+        p16_columns[16..p16_air.width() - 16]
+            .iter()
+            .map(Vec::as_slice)
+            .collect::<Vec<_>>(),
+        p24_columns[24..p24_air.width() - 24]
+            .iter()
+            .map(Vec::as_slice)
+            .collect::<Vec<_>>(),
+        vec![
+            dot_product_flags.as_slice(),
+            dot_product_lengths.as_slice(),
+            dot_product_col_index_a.as_slice(),
+            dot_product_col_index_b.as_slice(),
+            dot_product_col_index_res.as_slice(),
+        ],
+        dot_product_computations_base
+            .iter()
+            .map(Vec::as_slice)
+            .collect(),
+    ]
+    .concat();
+
     // 1st Commitment
     let packed_pcs_witness_base = packed_pcs_commit(
         pcs.pcs_a(),
-        &[
-            vec![
-                full_trace[COL_INDEX_PC].as_slice(),
-                full_trace[COL_INDEX_FP].as_slice(),
-                full_trace[COL_INDEX_MEM_ADDRESS_A].as_slice(),
-                full_trace[COL_INDEX_MEM_ADDRESS_B].as_slice(),
-                full_trace[COL_INDEX_MEM_ADDRESS_C].as_slice(),
-                p16_indexes.as_slice(),
-                p24_indexes.as_slice(),
-                p16_commited.as_slice(),
-                p24_commited.as_slice(),
-                dot_product_flags.as_slice(),
-                dot_product_lengths.as_slice(),
-                dot_product_indexes.as_slice(),
-            ],
-            dot_product_computations_base
-                .iter()
-                .map(Vec::as_slice)
-                .collect(),
-            private_memory_commited_chunks.clone(),
-        ]
-        .concat(),
+        &base_pols,
+        &base_dims,
         &dft,
         &mut prover_state,
+        LOG_SMALLEST_DECOMPOSITION_CHUNK,
     );
 
     // Grand Product for consistency with precompiles
@@ -438,66 +438,54 @@ pub fn prove_execution(
     );
 
     let p16_grand_product_evals_on_indexes_a =
-        (&p16_indexes[0..poseidons_16.len()]).evaluate(&grand_product_p16_statement.point);
-    let p16_grand_product_evals_on_indexes_b = (&p16_indexes
-        [poseidons_16.len()..2 * poseidons_16.len()])
-        .evaluate(&grand_product_p16_statement.point);
-    let p16_grand_product_evals_on_indexes_res = (&p16_indexes
-        [poseidons_16.len() * 2..3 * poseidons_16.len()])
-        .evaluate(&grand_product_p16_statement.point);
+        p16_indexes[0].evaluate(&grand_product_p16_statement.point);
+    let p16_grand_product_evals_on_indexes_b =
+        p16_indexes[1].evaluate(&grand_product_p16_statement.point);
+    let p16_grand_product_evals_on_indexes_res =
+        p16_indexes[2].evaluate(&grand_product_p16_statement.point);
     prover_state.add_extension_scalars(&[
         p16_grand_product_evals_on_indexes_a,
         p16_grand_product_evals_on_indexes_b,
         p16_grand_product_evals_on_indexes_res,
     ]);
-    let p16_mixing_scalars_grand_product = MultilinearPoint(prover_state.sample_vec(2));
-    let p16_final_statement_grand_product = Evaluation {
-        point: MultilinearPoint(
-            [
-                p16_mixing_scalars_grand_product.0.clone(),
-                grand_product_p16_statement.point.0,
-            ]
-            .concat(),
-        ),
-        value: [
-            p16_grand_product_evals_on_indexes_a,
-            p16_grand_product_evals_on_indexes_b,
-            p16_grand_product_evals_on_indexes_res,
-            EF::ZERO,
-        ]
-        .evaluate(&p16_mixing_scalars_grand_product),
-    };
+
+    let mut p16_indexes_a_statements = vec![Evaluation {
+        point: grand_product_p16_statement.point.clone(),
+        value: p16_grand_product_evals_on_indexes_a,
+    }];
+    let mut p16_indexes_b_statements = vec![Evaluation {
+        point: grand_product_p16_statement.point.clone(),
+        value: p16_grand_product_evals_on_indexes_b,
+    }];
+    let mut p16_indexes_res_statements = vec![Evaluation {
+        point: grand_product_p16_statement.point.clone(),
+        value: p16_grand_product_evals_on_indexes_res,
+    }];
 
     let p24_grand_product_evals_on_indexes_a =
-        (&p24_indexes[0..poseidons_24.len()]).evaluate(&grand_product_p24_statement.point);
-    let p24_grand_product_evals_on_indexes_b = (&p24_indexes
-        [poseidons_24.len()..2 * poseidons_24.len()])
-        .evaluate(&grand_product_p24_statement.point);
-    let p24_grand_product_evals_on_indexes_res = (&p24_indexes
-        [poseidons_24.len() * 2..3 * poseidons_24.len()])
-        .evaluate(&grand_product_p24_statement.point);
+        p24_indexes[0].evaluate(&grand_product_p24_statement.point);
+    let p24_grand_product_evals_on_indexes_b =
+        p24_indexes[1].evaluate(&grand_product_p24_statement.point);
+    let p24_grand_product_evals_on_indexes_res =
+        p24_indexes[2].evaluate(&grand_product_p24_statement.point);
     prover_state.add_extension_scalars(&[
         p24_grand_product_evals_on_indexes_a,
         p24_grand_product_evals_on_indexes_b,
         p24_grand_product_evals_on_indexes_res,
     ]);
-    let p24_mixing_scalars_grand_product = MultilinearPoint(prover_state.sample_vec(2));
-    let p24_final_statement_grand_product = Evaluation {
-        point: MultilinearPoint(
-            [
-                p24_mixing_scalars_grand_product.0.clone(),
-                grand_product_p24_statement.point.0,
-            ]
-            .concat(),
-        ),
-        value: [
-            p24_grand_product_evals_on_indexes_a,
-            p24_grand_product_evals_on_indexes_b,
-            p24_grand_product_evals_on_indexes_res,
-            EF::ZERO,
-        ]
-        .evaluate(&p24_mixing_scalars_grand_product),
-    };
+
+    let mut p24_indexes_a_statements = vec![Evaluation {
+        point: grand_product_p24_statement.point.clone(),
+        value: p24_grand_product_evals_on_indexes_a,
+    }];
+    let mut p24_indexes_b_statements = vec![Evaluation {
+        point: grand_product_p24_statement.point.clone(),
+        value: p24_grand_product_evals_on_indexes_b,
+    }];
+    let mut p24_indexes_res_statements = vec![Evaluation {
+        point: grand_product_p24_statement.point.clone(),
+        value: p24_grand_product_evals_on_indexes_res,
+    }];
 
     let dot_product_footprint_computation = DotProductFootprint {
         grand_product_challenge_global,
@@ -540,25 +528,18 @@ pub fn prove_execution(
         point: grand_product_dot_product_sumcheck_point.clone(),
         value: grand_product_dot_product_sumcheck_inner_evals[1],
     };
-    let grand_product_dot_product_table_indexes_mixing_challenges =
-        MultilinearPoint(prover_state.sample_vec(2));
-    let grand_product_dot_product_table_indexes_statement = Evaluation {
-        point: MultilinearPoint(
-            [
-                grand_product_dot_product_table_indexes_mixing_challenges
-                    .0
-                    .clone(),
-                grand_product_dot_product_sumcheck_point.0,
-            ]
-            .concat(),
-        ),
-        value: [
-            grand_product_dot_product_sumcheck_inner_evals[2],
-            grand_product_dot_product_sumcheck_inner_evals[3],
-            grand_product_dot_product_sumcheck_inner_evals[4],
-            EF::ZERO,
-        ]
-        .evaluate(&grand_product_dot_product_table_indexes_mixing_challenges),
+
+    let grand_product_dot_product_table_indexes_statement_index_a = Evaluation {
+        point: grand_product_dot_product_sumcheck_point.clone(),
+        value: grand_product_dot_product_sumcheck_inner_evals[2],
+    };
+    let grand_product_dot_product_table_indexes_statement_index_b = Evaluation {
+        point: grand_product_dot_product_sumcheck_point.clone(),
+        value: grand_product_dot_product_sumcheck_inner_evals[3],
+    };
+    let grand_product_dot_product_table_indexes_statement_index_res = Evaluation {
+        point: grand_product_dot_product_sumcheck_point.clone(),
+        value: grand_product_dot_product_sumcheck_inner_evals[4],
     };
 
     let precompile_foot_print_computation = PrecompileFootprint {
@@ -624,10 +605,7 @@ pub fn prove_execution(
         .try_into()
         .unwrap();
 
-    let padded_memory = padd_with_zero_to_next_power_of_two(&memory); // TODO avoid this padding
-
     let max_n_poseidons = poseidons_16.len().max(poseidons_24.len());
-    let min_n_poseidons = poseidons_16.len().min(poseidons_24.len());
     let (
         all_poseidon_indexes,
         folded_memory,
@@ -638,23 +616,6 @@ pub fn prove_execution(
     ) = {
         // Poseidons 16/24 memory addresses lookup
 
-        assert_eq_many!(
-            &p16_evals_to_prove[0].point,
-            &p16_evals_to_prove[1].point,
-            &p16_evals_to_prove[3].point,
-            &p16_evals_to_prove[4].point,
-        );
-        assert_eq_many!(
-            &p24_evals_to_prove[0].point,
-            &p24_evals_to_prove[1].point,
-            &p24_evals_to_prove[2].point,
-            &p24_evals_to_prove[5].point,
-        );
-        assert_eq!(
-            &p16_evals_to_prove[0].point[..3 + log2_ceil_usize(min_n_poseidons)],
-            &p24_evals_to_prove[0].point[..3 + log2_ceil_usize(min_n_poseidons)]
-        );
-
         let memory_folding_challenges = MultilinearPoint(p16_evals_to_prove[0].point[..3].to_vec());
         let poseidon_lookup_batching_chalenges = MultilinearPoint(prover_state.sample_vec(3));
         let mut poseidon_lookup_point = poseidon_lookup_batching_chalenges.0.clone();
@@ -732,17 +693,6 @@ pub fn prove_execution(
 
         assert_eq!(all_poseidon_indexes.len(), all_poseidon_values.len(),);
 
-        // TODO remove these checks
-        {
-            for (index, value) in all_poseidon_indexes.iter().zip(&all_poseidon_values) {
-                assert_eq!(value, &poseidon_folded_memory[index.to_usize()]);
-            }
-            assert_eq!(
-                all_poseidon_values.evaluate(&poseidon_lookup_challenge.point),
-                poseidon_lookup_challenge.value
-            );
-        }
-
         let poseidon_poly_eq_point = eval_eq(&poseidon_lookup_challenge.point);
 
         let poseidon_pushforward = compute_pushforward(
@@ -835,11 +785,11 @@ pub fn prove_execution(
         .collect::<Vec<_>>();
     let dot_product_evals_spread = dot_product_values_spread
         .iter()
-        .map(|slice| slice.evaluate(&dot_product_evals_to_prove[3].point))
+        .map(|slice| slice.evaluate(&dot_product_evals_to_prove[5].point))
         .collect::<Vec<_>>();
     assert_eq!(
         dot_product_with_base(&dot_product_evals_spread),
-        dot_product_evals_to_prove[3].value
+        dot_product_evals_to_prove[5].value
     );
     prover_state.add_extension_scalars(&dot_product_evals_spread);
     let dot_product_values_batching_scalars = MultilinearPoint(prover_state.sample_vec(3));
@@ -847,7 +797,7 @@ pub fn prove_execution(
     let dot_product_values_batched_point = MultilinearPoint(
         [
             dot_product_values_batching_scalars.0.clone(),
-            dot_product_evals_to_prove[3].point.0.clone(),
+            dot_product_evals_to_prove[5].point.0.clone(),
         ]
         .concat(),
     );
@@ -858,12 +808,6 @@ pub fn prove_execution(
     let concatenated_dot_product_values_spread =
         padd_with_zero_to_next_power_of_two(&dot_product_values_spread.concat());
 
-    // TODO remove
-    assert_eq!(
-        concatenated_dot_product_values_spread.evaluate(&dot_product_values_batched_point),
-        dot_product_values_batched_eval
-    );
-
     let padded_dot_product_indexes_spread =
         padd_with_zero_to_next_power_of_two(&dot_product_indexes_spread.concat());
 
@@ -992,19 +936,28 @@ pub fn prove_execution(
     );
 
     // 2nd Commitment
-    let commited_extension = [
+    let extension_pols = vec![
         base_memory_pushforward.as_slice(),
         poseidon_pushforward.as_slice(),
         bytecode_pushforward.as_slice(),
     ];
+
+    let extension_dims = vec![
+        ColDims::sparse(memory.len(), EF::ZERO), // memory
+        ColDims::sparse(memory.len().div_ceil(VECTOR_LEN), EF::ZERO), // memory (folded)
+        ColDims::sparse(bytecode.instructions.len(), EF::ZERO), // bytecode
+    ];
+
     let packed_pcs_witness_extension = packed_pcs_commit(
         &pcs.pcs_b(
             log2_strict_usize(packed_pcs_witness_base.packed_polynomial.len()),
-            num_packed_vars_for_pols(&commited_extension),
+            num_packed_vars_for_dims::<EF, EF>(&extension_dims, LOG_SMALLEST_DECOMPOSITION_CHUNK),
         ),
-        &commited_extension,
+        &extension_pols,
+        &extension_dims,
         &dft,
         &mut prover_state,
+        LOG_SMALLEST_DECOMPOSITION_CHUNK,
     );
 
     let base_memory_logup_star_statements = prove_logup_star(
@@ -1044,30 +997,11 @@ pub fn prove_execution(
         .concat(),
     );
 
-    // open memory
-    let exec_lookup_chunk_point = MultilinearPoint(
-        base_memory_logup_star_statements.on_table.point[log_memory - log_public_memory..].to_vec(),
-    );
-    let poseidon_lookup_chunk_point =
-        MultilinearPoint(poseidon_lookup_memory_point[log_memory - log_public_memory..].to_vec());
-
-    for (i, private_memory_chunk) in private_memory_commited_chunks.iter().enumerate() {
-        let chunk_eval_exec_lookup = private_memory_chunk.evaluate(&exec_lookup_chunk_point);
-        let chunk_eval_poseidon_lookup =
-            private_memory_chunk.evaluate(&poseidon_lookup_chunk_point);
-        prover_state.add_extension_scalar(chunk_eval_exec_lookup);
-        prover_state.add_extension_scalar(chunk_eval_poseidon_lookup);
-        private_memory_statements[i].extend(vec![
-            Evaluation {
-                point: exec_lookup_chunk_point.clone(),
-                value: chunk_eval_exec_lookup,
-            },
-            Evaluation {
-                point: poseidon_lookup_chunk_point.clone(),
-                value: chunk_eval_poseidon_lookup,
-            },
-        ]);
-    }
+    memory_statements.push(base_memory_logup_star_statements.on_table.clone());
+    memory_statements.push(Evaluation {
+        point: poseidon_lookup_memory_point.clone(),
+        value: poseidon_logup_star_statements.on_table.value,
+    });
 
     // index opening for poseidon lookup
     let poseidon_index_evals = fold_multilinear(
@@ -1076,33 +1010,36 @@ pub fn prove_execution(
     );
     prover_state.add_extension_scalars(&poseidon_index_evals);
 
-    let (mut p16_indexes_statements, mut p24_indexes_statements) =
-        poseidon_lookup_index_statements(
-            &poseidon_index_evals,
-            n_poseidons_16,
-            n_poseidons_24,
-            &poseidon_logup_star_statements.on_indexes.point,
-        )
-        .unwrap();
-    p16_indexes_statements.push(p16_final_statement_grand_product);
-    p24_indexes_statements.push(p24_final_statement_grand_product);
+    add_poseidon_lookup_index_statements(
+        &poseidon_index_evals,
+        n_poseidons_16,
+        n_poseidons_24,
+        &poseidon_logup_star_statements.on_indexes.point,
+        &mut p16_indexes_a_statements,
+        &mut p16_indexes_b_statements,
+        &mut p16_indexes_res_statements,
+        &mut p24_indexes_a_statements,
+        &mut p24_indexes_b_statements,
+        &mut p24_indexes_res_statements,
+    )
+    .unwrap();
 
     let (initial_pc_statement, final_pc_statement) =
         intitial_and_final_pc_conditions(bytecode, log_n_cycles);
 
     let dot_product_computation_column_evals = dot_product_computations_base
         .par_iter()
-        .map(|slice| slice.evaluate(&dot_product_evals_to_prove[4].point))
+        .map(|slice| slice.evaluate(&dot_product_evals_to_prove[6].point))
         .collect::<Vec<_>>();
     assert_eq!(
         dot_product_with_base(&dot_product_computation_column_evals),
-        dot_product_evals_to_prove[4].value
+        dot_product_evals_to_prove[6].value
     );
     prover_state.add_extension_scalars(&dot_product_computation_column_evals);
     let dot_product_computation_column_statements = (0..DIMENSION)
         .map(|i| {
             vec![Evaluation {
-                point: dot_product_evals_to_prove[4].point.clone(),
+                point: dot_product_evals_to_prove[6].point.clone(),
                 value: dot_product_computation_column_evals[i],
             }]
         })
@@ -1150,51 +1087,42 @@ pub fn prove_execution(
         base_memory_logup_star_statements.on_indexes.value
     );
 
-    // proving validity of mem_lookup_eval_spread_indexes_dot_product
-    let dot_product_logup_star_indexes_inner_eval = {
-        let dot_product_logup_star_indexes_inner_point =
-            MultilinearPoint(mem_lookup_eval_indexes_partial_point.0[3 + index_diff..].to_vec());
-        let dot_product_logup_star_indexes_inner_value =
-            dot_product_indexes.evaluate(&dot_product_logup_star_indexes_inner_point);
-        prover_state.add_extension_scalar(dot_product_logup_star_indexes_inner_value);
-        let dot_product_logup_star_indexes_inner_eval = Evaluation {
-            point: dot_product_logup_star_indexes_inner_point,
-            value: dot_product_logup_star_indexes_inner_value,
-        };
+    let dot_product_logup_star_indexes_inner_point =
+        MultilinearPoint(mem_lookup_eval_indexes_partial_point.0[5 + index_diff..].to_vec());
+    let dot_product_logup_star_indexes_inner_value_a =
+        dot_product_columns[2].evaluate(&dot_product_logup_star_indexes_inner_point);
+    let dot_product_logup_star_indexes_inner_value_b =
+        dot_product_columns[3].evaluate(&dot_product_logup_star_indexes_inner_point);
+    let dot_product_logup_star_indexes_inner_value_res =
+        dot_product_columns[4].evaluate(&dot_product_logup_star_indexes_inner_point);
 
-        // TODO remove
-        {
-            assert_eq!(
-                padded_dot_product_indexes_spread.evaluate(&MultilinearPoint(
-                    mem_lookup_eval_indexes_partial_point.0[index_diff..].to_vec()
-                )),
-                mem_lookup_eval_spread_indexes_dot_product
-            );
-
-            let mut dot_product_indexes_inner_evals_incr = vec![EF::ZERO; 8];
-            for i in 0..DIMENSION {
-                dot_product_indexes_inner_evals_incr[i] = dot_product_logup_star_indexes_inner_value
-                    + EF::from_usize(i)
-                        * [F::ONE, F::ONE, F::ONE, F::ZERO].evaluate(&MultilinearPoint(
-                            mem_lookup_eval_indexes_partial_point.0[3 + index_diff..5 + index_diff]
-                                .to_vec(),
-                        ));
-            }
-            assert_eq!(
-                dot_product_indexes_inner_evals_incr.evaluate(&MultilinearPoint(
-                    mem_lookup_eval_indexes_partial_point.0[index_diff..3 + index_diff].to_vec(),
-                )),
-                mem_lookup_eval_spread_indexes_dot_product
-            );
-        }
-        dot_product_logup_star_indexes_inner_eval
+    prover_state.add_extension_scalars(&[
+        dot_product_logup_star_indexes_inner_value_a,
+        dot_product_logup_star_indexes_inner_value_b,
+        dot_product_logup_star_indexes_inner_value_res,
+    ]);
+
+    let dot_product_logup_star_indexes_statement_a = Evaluation {
+        point: dot_product_logup_star_indexes_inner_point.clone(),
+        value: dot_product_logup_star_indexes_inner_value_a,
+    };
+    let dot_product_logup_star_indexes_statement_b = Evaluation {
+        point: dot_product_logup_star_indexes_inner_point.clone(),
+        value: dot_product_logup_star_indexes_inner_value_b,
+    };
+    let dot_product_logup_star_indexes_statement_res = Evaluation {
+        point: dot_product_logup_star_indexes_inner_point.clone(),
+        value: dot_product_logup_star_indexes_inner_value_res,
     };
 
     // First Opening
-    let global_statements_base = packed_pcs_global_statements(
-        &packed_pcs_witness_base.tree,
+    let global_statements_base = packed_pcs_global_statements_for_prover(
+        &base_pols,
+        &base_dims,
+        LOG_SMALLEST_DECOMPOSITION_CHUNK,
         &[
             vec![
+                memory_statements,
                 vec![
                     exec_evals_to_prove[COL_INDEX_PC.index_in_air()].clone(),
                     bytecode_logup_star_statements.on_indexes.clone(),
@@ -1226,10 +1154,22 @@ pub fn prove_execution(
                         value: mem_lookup_eval_indexes_c,
                     },
                 ], // exec memory address C
-                p16_indexes_statements,
-                p24_indexes_statements,
-                vec![p16_evals_to_prove[2].clone()],
-                vec![p24_evals_to_prove[3].clone()],
+                p16_indexes_a_statements,
+                p16_indexes_b_statements,
+                p16_indexes_res_statements,
+                p24_indexes_a_statements,
+                p24_indexes_b_statements,
+                p24_indexes_res_statements,
+            ],
+            p16_evals_to_prove[2..p16_air.width() + 2 - 16 * 2]
+                .iter()
+                .map(|e| vec![e.clone()])
+                .collect(),
+            p24_evals_to_prove[3..p24_air.width() + 3 - 24 * 2]
+                .iter()
+                .map(|e| vec![e.clone()])
+                .collect(),
+            vec![
                 vec![
                     dot_product_evals_to_prove[0].clone(),
                     grand_product_dot_product_flag_statement,
@@ -1239,25 +1179,38 @@ pub fn prove_execution(
                     grand_product_dot_product_len_statement,
                 ], // dot product: length
                 vec![
-                    dot_product_evals_to_prove[2].clone(),
-                    dot_product_logup_star_indexes_inner_eval,
-                    grand_product_dot_product_table_indexes_statement,
-                ], // dot product: indexes
+                    dot_product_evals_to_prove[2].clone(), // // dot product: indexe a
+                    dot_product_logup_star_indexes_statement_a,
+                    grand_product_dot_product_table_indexes_statement_index_a,
+                ],
+                vec![
+                    dot_product_evals_to_prove[3].clone(), // // dot product: indexe b
+                    dot_product_logup_star_indexes_statement_b,
+                    grand_product_dot_product_table_indexes_statement_index_b,
+                ],
+                vec![
+                    dot_product_evals_to_prove[4].clone(), // // dot product: indexe res
+                    dot_product_logup_star_indexes_statement_res,
+                    grand_product_dot_product_table_indexes_statement_index_res,
+                ],
             ],
             dot_product_computation_column_statements,
-            private_memory_statements,
         ]
         .concat(),
+        &mut prover_state,
     );
 
     // Second Opening
-    let global_statements_extension = packed_pcs_global_statements(
-        &packed_pcs_witness_extension.tree,
+    let global_statements_extension = packed_pcs_global_statements_for_prover(
+        &extension_pols,
+        &extension_dims,
+        LOG_SMALLEST_DECOMPOSITION_CHUNK,
         &[
             base_memory_logup_star_statements.on_pushforward,
             poseidon_logup_star_statements.on_pushforward,
             bytecode_logup_star_statements.on_pushforward,
         ],
+        &mut prover_state,
     );
 
     pcs.batch_open(
@@ -1271,5 +1224,8 @@ pub fn prove_execution(
         &packed_pcs_witness_extension.packed_polynomial,
     );
 
-    prover_state.proof_data().to_vec()
+    (
+        prover_state.proof_data().to_vec(),
+        prover_state.proof_size(),
+    )
 }
diff --git a/crates/zk_vm/src/verify_execution.rs b/crates/zk_vm/src/verify_execution.rs
index bb00c340..e3b7579d 100644
--- a/crates/zk_vm/src/verify_execution.rs
+++ b/crates/zk_vm/src/verify_execution.rs
@@ -7,18 +7,20 @@ use lookup::verify_logup_star;
 use p3_air::BaseAir;
 use p3_field::BasedVectorSpace;
 use p3_field::PrimeCharacteristicRing;
+use p3_field::dot_product;
 use p3_util::{log2_ceil_usize, log2_strict_usize};
-use pcs::num_packed_vars_for_vars;
-use pcs::packed_pcs_global_statements;
+use pcs::ColDims;
+use pcs::num_packed_vars_for_dims;
+use pcs::packed_pcs_global_statements_for_verifier;
 use pcs::{BatchPCS, NumVariables as _, packed_pcs_parse_commitment};
 use sumcheck::SumcheckComputation;
 use utils::dot_product_with_base;
-use utils::to_big_endian_bits;
 use utils::{Evaluation, PF, build_challenger, padd_with_zero_to_next_power_of_two};
 use utils::{ToUsize, build_poseidon_16_air, build_poseidon_24_air};
 use vm::*;
 use whir_p3::fiat_shamir::{errors::ProofError, verifier::VerifierState};
 use whir_p3::poly::evals::EvaluationsList;
+use whir_p3::poly::evals::eval_eq;
 use whir_p3::poly::multilinear::MultilinearPoint;
 use zk_vm_air::*;
 
@@ -44,12 +46,11 @@ pub fn verify_execution(
         n_poseidons_16,
         n_poseidons_24,
         n_dot_products,
-        table_dot_products_log_n_rows,
-        dot_product_padding_len,
+        n_rows_table_dot_products,
         private_memory_len,
         n_vm_multilinear_evals,
     ] = verifier_state
-        .next_base_scalars_const::<8>()?
+        .next_base_scalars_const::<7>()?
         .into_iter()
         .map(|x| x.to_usize())
         .collect::<Vec<_>>()
@@ -60,9 +61,8 @@ pub fn verify_execution(
         || n_poseidons_16 > 1 << 32
         || n_poseidons_24 > 1 << 32
         || n_dot_products > 1 << 32
-        || table_dot_products_log_n_rows > 32
+        || n_rows_table_dot_products > 1 << 32
         || private_memory_len > 1 << 32
-        || dot_product_padding_len > 1 << table_dot_products_log_n_rows
         || n_vm_multilinear_evals > 1 << 10
     {
         // To avoid "DOS" attack
@@ -72,22 +72,15 @@ pub fn verify_execution(
 
     let public_memory = build_public_memory(public_input);
     let public_memory_len = public_memory.len();
-    if private_memory_len % public_memory_len != 0 {
-        return Err(ProofError::InvalidProof);
-    }
-    let n_private_memory_chunks = private_memory_len / public_memory_len;
+
     let log_public_memory = log2_strict_usize(public_memory_len);
     let log_memory = log2_ceil_usize(public_memory_len + private_memory_len);
     let log_n_p16 = log2_ceil_usize(n_poseidons_16);
     let log_n_p24 = log2_ceil_usize(n_poseidons_24);
 
-    let vars_p16_indexes = log_n_p16 + 2;
-    let vars_p24_indexes = log_n_p24 + 2;
-
-    let vars_p16_table = log_n_p16 + log2_ceil_usize(p16_air.width() - 16 * 2);
-    let vars_p24_table = log_n_p24 + log2_ceil_usize(p24_air.width() - 24 * 2);
-
-    let vars_private_memory = vec![log_public_memory; n_private_memory_chunks];
+    let table_dot_products_log_n_rows = log2_ceil_usize(n_rows_table_dot_products);
+    let dot_product_padding_len =
+        n_rows_table_dot_products.next_power_of_two() - n_rows_table_dot_products;
 
     struct RowMultilinearEval {
         addr_coeffs: F,
@@ -114,51 +107,58 @@ pub fn verify_execution(
         });
     }
 
-    let mut private_memory_statements = vec![vec![]; n_private_memory_chunks];
+    let mut memory_statements = vec![];
     for row_multilinear_eval in &vm_multilinear_evals {
         let addr_point = row_multilinear_eval.addr_point.to_usize();
         let addr_coeffs = row_multilinear_eval.addr_coeffs.to_usize();
         let addr_res = row_multilinear_eval.addr_res.to_usize();
         let n_vars = row_multilinear_eval.n_vars.to_usize();
 
-        // TODO only 1 statement for the evaluation point (instead of `n_vars` ones)
-        // TODO we can group together most of the equality constraints (by chuncks alligned on powers of 2)
-        for (ef_addr, ef_value) in row_multilinear_eval
-            .point
-            .iter()
-            .enumerate()
-            .map(|(i, p)| (addr_point + i * DIMENSION, *p))
-            .chain(std::iter::once((addr_res, row_multilinear_eval.res)))
-        {
-            for (f_address, f_value) in
-                <EF as BasedVectorSpace<PF<EF>>>::as_basis_coefficients_slice(&ef_value)
-                    .iter()
-                    .enumerate()
-                    .map(|(a, v)| (ef_addr + a, *v))
-            {
-                let memory_chunk_index =
-                    f_address >> (log_memory - log2_ceil_usize(n_private_memory_chunks + 1));
-                let addr_bits = &to_big_endian_bits(
-                    f_address,
-                    log_memory - log2_ceil_usize(n_private_memory_chunks + 1),
-                );
-                let memory_sparse_point = addr_bits
-                    .iter()
-                    .map(|&x| EF::from_bool(x))
-                    .collect::<Vec<_>>();
-                let statement = Evaluation {
-                    point: MultilinearPoint(memory_sparse_point),
-                    value: EF::from(f_value), // TODO avoid embedding
-                };
-                if memory_chunk_index == 0 {
-                    if public_memory.evaluate(&statement.point) != statement.value {
-                        return Err(ProofError::InvalidProof);
-                    }
-                } else {
-                    private_memory_statements[memory_chunk_index - 1].push(statement);
-                }
-            }
-        }
+        // point lookup into memory
+        let log_point_len = log2_ceil_usize(row_multilinear_eval.point.len() * DIMENSION);
+        let point_random_challenge = verifier_state.sample_vec(log_point_len);
+        let point_random_value = {
+            let mut point_mle = row_multilinear_eval
+                .point
+                .iter()
+                .flat_map(|v| {
+                    <EF as BasedVectorSpace<PF<EF>>>::as_basis_coefficients_slice(v).to_vec()
+                })
+                .collect::<Vec<_>>();
+            point_mle.resize(point_mle.len().next_power_of_two(), F::ZERO);
+            point_mle.evaluate(&MultilinearPoint(point_random_challenge.clone()))
+        };
+        memory_statements.push(Evaluation {
+            point: MultilinearPoint(
+                [
+                    to_big_endian_in_field(addr_point, log_memory - log_point_len),
+                    point_random_challenge.clone(),
+                ]
+                .concat(),
+            ),
+            value: point_random_value,
+        });
+
+        // result lookup into memory
+        let random_challenge = verifier_state.sample_vec(LOG_VECTOR_LEN);
+        let res_random_value = {
+            let mut res_mle = row_multilinear_eval
+                .res
+                .as_basis_coefficients_slice()
+                .to_vec();
+            res_mle.resize(VECTOR_LEN, F::ZERO);
+            res_mle.evaluate(&MultilinearPoint(random_challenge.clone()))
+        };
+        memory_statements.push(Evaluation {
+            point: MultilinearPoint(
+                [
+                    to_big_endian_in_field(addr_res, log_memory - LOG_VECTOR_LEN),
+                    random_challenge.clone(),
+                ]
+                .concat(),
+            ),
+            value: res_random_value,
+        });
 
         {
             if n_vars > log_memory {
@@ -170,53 +170,33 @@ pub fn verify_execution(
             if n_vars >= log_public_memory {
                 todo!("vm multilinear eval accross multiple memory chunks")
             }
-            let memory_chunk_index =
-                addr_coeffs >> (log_memory - n_vars - log2_ceil_usize(n_private_memory_chunks + 1));
-            let addr_bits = &to_big_endian_bits(
-                addr_coeffs,
-                log_memory - n_vars - log2_ceil_usize(n_private_memory_chunks + 1),
-            );
-            let mut memory_sparse_point = addr_bits
-                .iter()
-                .map(|&x| EF::from_bool(x))
-                .collect::<Vec<_>>();
-            memory_sparse_point.extend(row_multilinear_eval.point.clone());
+            let addr_bits = to_big_endian_in_field(addr_coeffs, log_memory - n_vars);
             let statement = Evaluation {
-                point: MultilinearPoint(memory_sparse_point),
+                point: MultilinearPoint([addr_bits, row_multilinear_eval.point.clone()].concat()),
                 value: row_multilinear_eval.res,
             };
-            if memory_chunk_index == 0 {
-                if public_memory.evaluate(&statement.point) != statement.value {
-                    return Err(ProofError::InvalidProof);
-                }
-            } else {
-                private_memory_statements[memory_chunk_index - 1].push(statement);
-            }
+            memory_statements.push(statement);
         }
     }
 
-    let vars_pcs_base = [
-        vec![
-            log_n_cycles, // pc
-            log_n_cycles, // fp
-            log_n_cycles, // index A
-            log_n_cycles, // index B
-            log_n_cycles, // index C
-            vars_p16_indexes,
-            vars_p24_indexes,
-            vars_p16_table,
-            vars_p24_table,
-            table_dot_products_log_n_rows, // dot product: (start) flag
-            table_dot_products_log_n_rows, // dot product: length
-            table_dot_products_log_n_rows + 2, // dot product: indexes
-        ],
-        vec![table_dot_products_log_n_rows; DIMENSION], // dot product: computation
-        vars_private_memory,
-    ]
-    .concat();
+    let base_dims = get_base_dims(
+        n_cycles,
+        log_public_memory,
+        private_memory_len,
+        bytecode.ending_pc,
+        n_poseidons_16,
+        n_poseidons_24,
+        p16_air.width(),
+        p24_air.width(),
+        n_rows_table_dot_products,
+    );
 
-    let parsed_commitment_base =
-        packed_pcs_parse_commitment(pcs.pcs_a(), &mut verifier_state, vars_pcs_base)?;
+    let parsed_commitment_base = packed_pcs_parse_commitment(
+        pcs.pcs_a(),
+        &mut verifier_state,
+        &base_dims,
+        LOG_SMALLEST_DECOMPOSITION_CHUNK,
+    )?;
 
     let grand_product_challenge_global = verifier_state.sample();
     let grand_product_challenge_p16 = verifier_state.sample().powers().collect_n(5);
@@ -296,23 +276,19 @@ pub fn verify_execution(
     {
         return Err(ProofError::InvalidProof);
     }
-    let p16_mixing_scalars_grand_product = MultilinearPoint(verifier_state.sample_vec(2));
-    let p16_final_statement_grand_product = Evaluation {
-        point: MultilinearPoint(
-            [
-                p16_mixing_scalars_grand_product.0.clone(),
-                grand_product_p16_statement.point.0,
-            ]
-            .concat(),
-        ),
-        value: [
-            p16_grand_product_evals_on_indexes_a,
-            p16_grand_product_evals_on_indexes_b,
-            p16_grand_product_evals_on_indexes_res,
-            EF::ZERO,
-        ]
-        .evaluate(&p16_mixing_scalars_grand_product),
-    };
+
+    let mut p16_indexes_a_statements = vec![Evaluation {
+        point: grand_product_p16_statement.point.clone(),
+        value: p16_grand_product_evals_on_indexes_a,
+    }];
+    let mut p16_indexes_b_statements = vec![Evaluation {
+        point: grand_product_p16_statement.point.clone(),
+        value: p16_grand_product_evals_on_indexes_b,
+    }];
+    let mut p16_indexes_res_statements = vec![Evaluation {
+        point: grand_product_p16_statement.point.clone(),
+        value: p16_grand_product_evals_on_indexes_res,
+    }];
 
     let [
         p24_grand_product_evals_on_indexes_a,
@@ -328,23 +304,19 @@ pub fn verify_execution(
     {
         return Err(ProofError::InvalidProof);
     }
-    let p24_mixing_scalars_grand_product = MultilinearPoint(verifier_state.sample_vec(2));
-    let p24_final_statement_grand_product = Evaluation {
-        point: MultilinearPoint(
-            [
-                p24_mixing_scalars_grand_product.0.clone(),
-                grand_product_p24_statement.point.0,
-            ]
-            .concat(),
-        ),
-        value: [
-            p24_grand_product_evals_on_indexes_a,
-            p24_grand_product_evals_on_indexes_b,
-            p24_grand_product_evals_on_indexes_res,
-            EF::ZERO,
-        ]
-        .evaluate(&p24_mixing_scalars_grand_product),
-    };
+
+    let mut p24_indexes_a_statements = vec![Evaluation {
+        point: grand_product_p24_statement.point.clone(),
+        value: p24_grand_product_evals_on_indexes_a,
+    }];
+    let mut p24_indexes_b_statements = vec![Evaluation {
+        point: grand_product_p24_statement.point.clone(),
+        value: p24_grand_product_evals_on_indexes_b,
+    }];
+    let mut p24_indexes_res_statements = vec![Evaluation {
+        point: grand_product_p24_statement.point.clone(),
+        value: p24_grand_product_evals_on_indexes_res,
+    }];
 
     // Grand product statements
     let (grand_product_final_dot_product_eval, grand_product_dot_product_sumcheck_claim) =
@@ -381,25 +353,17 @@ pub fn verify_execution(
         point: grand_product_dot_product_sumcheck_claim.point.clone(),
         value: grand_product_dot_product_sumcheck_inner_evals[1],
     };
-    let grand_product_dot_product_table_indexes_mixing_challenges =
-        MultilinearPoint(verifier_state.sample_vec(2));
-    let grand_product_dot_product_table_indexes_statement = Evaluation {
-        point: MultilinearPoint(
-            [
-                grand_product_dot_product_table_indexes_mixing_challenges
-                    .0
-                    .clone(),
-                grand_product_dot_product_sumcheck_claim.point.0,
-            ]
-            .concat(),
-        ),
-        value: [
-            grand_product_dot_product_sumcheck_inner_evals[2],
-            grand_product_dot_product_sumcheck_inner_evals[3],
-            grand_product_dot_product_sumcheck_inner_evals[4],
-            EF::ZERO,
-        ]
-        .evaluate(&grand_product_dot_product_table_indexes_mixing_challenges),
+    let grand_product_dot_product_table_indexes_statement_index_a = Evaluation {
+        point: grand_product_dot_product_sumcheck_claim.point.clone(),
+        value: grand_product_dot_product_sumcheck_inner_evals[2],
+    };
+    let grand_product_dot_product_table_indexes_statement_index_b = Evaluation {
+        point: grand_product_dot_product_sumcheck_claim.point.clone(),
+        value: grand_product_dot_product_sumcheck_inner_evals[3],
+    };
+    let grand_product_dot_product_table_indexes_statement_index_res = Evaluation {
+        point: grand_product_dot_product_sumcheck_claim.point.clone(),
+        value: grand_product_dot_product_sumcheck_inner_evals[4],
     };
 
     let (grand_product_final_exec_eval, grand_product_exec_sumcheck_claim) =
@@ -508,14 +472,14 @@ pub fn verify_execution(
     let alpha_bytecode_lookup = verifier_state.sample();
 
     let dot_product_evals_spread = verifier_state.next_extension_scalars_vec(DIMENSION)?;
-    if dot_product_with_base(&dot_product_evals_spread) != dot_product_evals_to_verify[3].value {
+    if dot_product_with_base(&dot_product_evals_spread) != dot_product_evals_to_verify[5].value {
         return Err(ProofError::InvalidProof);
     }
     let dot_product_values_batching_scalars = MultilinearPoint(verifier_state.sample_vec(3));
     let dot_product_values_batched_point = MultilinearPoint(
         [
             dot_product_values_batching_scalars.0.clone(),
-            dot_product_evals_to_verify[3].point.0.clone(),
+            dot_product_evals_to_verify[5].point.0.clone(),
         ]
         .concat(),
     );
@@ -586,17 +550,23 @@ pub fn verify_execution(
     };
     let memory_poly_eq_point_alpha = verifier_state.sample();
 
-    let log_bytecode_len = log2_ceil_usize(bytecode.instructions.len());
-    let vars_pcs_extension = vec![log_memory, log_memory - 3, log_bytecode_len];
+    let extension_dims = vec![
+        ColDims::sparse(public_memory.len() + private_memory_len, EF::ZERO), // memory
+        ColDims::sparse(
+            (public_memory.len() + private_memory_len).div_ceil(VECTOR_LEN),
+            EF::ZERO,
+        ), // memory (folded)
+        ColDims::sparse(bytecode.instructions.len(), EF::ZERO),
+    ];
+
     let parsed_commitment_extension = packed_pcs_parse_commitment(
         &pcs.pcs_b(
-            parsed_commitment_base
-                .inner_parsed_commitment
-                .num_variables(),
-            num_packed_vars_for_vars(&vars_pcs_extension),
+            parsed_commitment_base.num_variables(),
+            num_packed_vars_for_dims::<EF, EF>(&extension_dims, LOG_SMALLEST_DECOMPOSITION_CHUNK),
         ),
         &mut verifier_state,
-        vars_pcs_extension,
+        &extension_dims,
+        LOG_SMALLEST_DECOMPOSITION_CHUNK,
     )
     .unwrap();
 
@@ -644,7 +614,7 @@ pub fn verify_execution(
 
     let bytecode_logup_star_statements = verify_logup_star(
         &mut verifier_state,
-        log_bytecode_len,
+        log2_ceil_usize(bytecode.instructions.len()),
         log_n_cycles,
         &[bytecode_lookup_claim_1, bytecode_lookup_claim_2],
         alpha_bytecode_lookup,
@@ -665,53 +635,11 @@ pub fn verify_execution(
         .concat(),
     );
 
-    let exec_lookup_chunk_point = MultilinearPoint(
-        base_memory_logup_star_statements.on_table.point[log_memory - log_public_memory..].to_vec(),
-    );
-    let poseidon_lookup_chunk_point =
-        MultilinearPoint(poseidon_lookup_memory_point[log_memory - log_public_memory..].to_vec());
-
-    let mut chunk_evals_exec_lookup = vec![public_memory.evaluate(&exec_lookup_chunk_point)];
-    let mut chunk_evals_poseidon_lookup =
-        vec![public_memory.evaluate(&poseidon_lookup_chunk_point)];
-
-    for i in 0..n_private_memory_chunks {
-        let chunk_eval_exec_lookup = verifier_state.next_extension_scalar()?;
-        let chunk_eval_poseidon_lookup = verifier_state.next_extension_scalar()?;
-        chunk_evals_exec_lookup.push(chunk_eval_exec_lookup);
-        chunk_evals_poseidon_lookup.push(chunk_eval_poseidon_lookup);
-        private_memory_statements[i].extend(vec![
-            Evaluation {
-                point: exec_lookup_chunk_point.clone(),
-                value: chunk_eval_exec_lookup,
-            },
-            Evaluation {
-                point: poseidon_lookup_chunk_point.clone(),
-                value: chunk_eval_poseidon_lookup,
-            },
-        ]);
-    }
-
-    if base_memory_logup_star_statements.on_table.value
-        != padd_with_zero_to_next_power_of_two(&chunk_evals_exec_lookup).evaluate(
-            &MultilinearPoint(
-                base_memory_logup_star_statements.on_table.point[..log_memory - log_public_memory]
-                    .to_vec(),
-            ),
-        )
-    {
-        return Err(ProofError::InvalidProof);
-    }
-    if poseidon_logup_star_statements.on_table.value
-        != padd_with_zero_to_next_power_of_two(&chunk_evals_poseidon_lookup).evaluate(
-            &MultilinearPoint(
-                poseidon_logup_star_statements.on_table.point[..log_memory - log_public_memory]
-                    .to_vec(),
-            ),
-        )
-    {
-        return Err(ProofError::InvalidProof);
-    }
+    memory_statements.push(base_memory_logup_star_statements.on_table.clone());
+    memory_statements.push(Evaluation {
+        point: poseidon_lookup_memory_point.clone(),
+        value: poseidon_logup_star_statements.on_table.value,
+    });
 
     // index opening for poseidon lookup
     let poseidon_index_evals = verifier_state.next_extension_scalars_vec(8)?;
@@ -722,15 +650,18 @@ pub fn verify_execution(
         return Err(ProofError::InvalidProof);
     }
 
-    let (mut p16_indexes_statements, mut p24_indexes_statements) =
-        poseidon_lookup_index_statements(
-            &poseidon_index_evals,
-            n_poseidons_16,
-            n_poseidons_24,
-            &poseidon_logup_star_statements.on_indexes.point,
-        )?;
-    p16_indexes_statements.push(p16_final_statement_grand_product);
-    p24_indexes_statements.push(p24_final_statement_grand_product);
+    add_poseidon_lookup_index_statements(
+        &poseidon_index_evals,
+        n_poseidons_16,
+        n_poseidons_24,
+        &poseidon_logup_star_statements.on_indexes.point,
+        &mut p16_indexes_a_statements,
+        &mut p16_indexes_b_statements,
+        &mut p16_indexes_res_statements,
+        &mut p24_indexes_a_statements,
+        &mut p24_indexes_b_statements,
+        &mut p24_indexes_res_statements,
+    )?;
 
     let (initial_pc_statement, final_pc_statement) =
         intitial_and_final_pc_conditions(bytecode, log_n_cycles);
@@ -738,14 +669,14 @@ pub fn verify_execution(
     let dot_product_computation_column_evals =
         verifier_state.next_extension_scalars_const::<DIMENSION>()?;
     if dot_product_with_base(&dot_product_computation_column_evals)
-        != dot_product_evals_to_verify[4].value
+        != dot_product_evals_to_verify[6].value
     {
         return Err(ProofError::InvalidProof);
     }
     let dot_product_computation_column_statements = (0..DIMENSION)
         .map(|i| {
             vec![Evaluation {
-                point: dot_product_evals_to_verify[4].point.clone(),
+                point: dot_product_evals_to_verify[6].point.clone(),
                 value: dot_product_computation_column_evals[i],
             }]
         })
@@ -780,13 +711,40 @@ pub fn verify_execution(
     );
 
     let dot_product_logup_star_indexes_inner_point =
-        MultilinearPoint(mem_lookup_eval_indexes_partial_point.0[3 + index_diff..].to_vec());
-    let dot_product_logup_star_indexes_inner_value = verifier_state.next_extension_scalar()?;
-    let dot_product_logup_star_indexes_inner_eval = Evaluation {
-        point: dot_product_logup_star_indexes_inner_point,
-        value: dot_product_logup_star_indexes_inner_value,
+        MultilinearPoint(mem_lookup_eval_indexes_partial_point.0[5 + index_diff..].to_vec());
+
+    let [
+        dot_product_logup_star_indexes_inner_value_a,
+        dot_product_logup_star_indexes_inner_value_b,
+        dot_product_logup_star_indexes_inner_value_res,
+    ] = verifier_state.next_extension_scalars_const()?;
+
+    let dot_product_logup_star_indexes_statement_a = Evaluation {
+        point: dot_product_logup_star_indexes_inner_point.clone(),
+        value: dot_product_logup_star_indexes_inner_value_a,
+    };
+    let dot_product_logup_star_indexes_statement_b = Evaluation {
+        point: dot_product_logup_star_indexes_inner_point.clone(),
+        value: dot_product_logup_star_indexes_inner_value_b,
     };
+    let dot_product_logup_star_indexes_statement_res = Evaluation {
+        point: dot_product_logup_star_indexes_inner_point.clone(),
+        value: dot_product_logup_star_indexes_inner_value_res,
+    };
+
     {
+        let dot_product_logup_star_indexes_inner_value: EF = dot_product(
+            eval_eq(&mem_lookup_eval_indexes_partial_point.0[3 + index_diff..5 + index_diff])
+                .into_iter(),
+            [
+                dot_product_logup_star_indexes_inner_value_a,
+                dot_product_logup_star_indexes_inner_value_b,
+                dot_product_logup_star_indexes_inner_value_res,
+                EF::ZERO,
+            ]
+            .into_iter(),
+        );
+
         let mut dot_product_indexes_inner_evals_incr = vec![EF::ZERO; 8];
         for i in 0..DIMENSION {
             dot_product_indexes_inner_evals_incr[i] = dot_product_logup_star_indexes_inner_value
@@ -804,10 +762,12 @@ pub fn verify_execution(
         }
     }
 
-    let global_statements_base = packed_pcs_global_statements(
-        &parsed_commitment_base.tree,
+    let global_statements_base = packed_pcs_global_statements_for_verifier(
+        &base_dims,
+        LOG_SMALLEST_DECOMPOSITION_CHUNK,
         &[
             vec![
+                memory_statements,
                 vec![
                     exec_evals_to_verify[COL_INDEX_PC.index_in_air()].clone(),
                     bytecode_logup_star_statements.on_indexes.clone(),
@@ -839,10 +799,22 @@ pub fn verify_execution(
                         value: mem_lookup_eval_indexes_c,
                     },
                 ], // exec memory address C
-                p16_indexes_statements,
-                p24_indexes_statements,
-                vec![p16_evals_to_verify[2].clone()],
-                vec![p24_evals_to_verify[3].clone()],
+                p16_indexes_a_statements,
+                p16_indexes_b_statements,
+                p16_indexes_res_statements,
+                p24_indexes_a_statements,
+                p24_indexes_b_statements,
+                p24_indexes_res_statements,
+            ],
+            p16_evals_to_verify[2..p16_air.width() + 2 - 16 * 2]
+                .iter()
+                .map(|e| vec![e.clone()])
+                .collect(),
+            p24_evals_to_verify[3..p24_air.width() + 3 - 24 * 2]
+                .iter()
+                .map(|e| vec![e.clone()])
+                .collect(),
+            vec![
                 vec![
                     dot_product_evals_to_verify[0].clone(),
                     grand_product_dot_product_flag_statement,
@@ -852,31 +824,45 @@ pub fn verify_execution(
                     grand_product_dot_product_len_statement,
                 ], // dot product: length
                 vec![
-                    dot_product_evals_to_verify[2].clone(),
-                    dot_product_logup_star_indexes_inner_eval,
-                    grand_product_dot_product_table_indexes_statement,
-                ], // dot product: indexes
+                    dot_product_evals_to_verify[2].clone(), // // dot product: indexe a
+                    dot_product_logup_star_indexes_statement_a,
+                    grand_product_dot_product_table_indexes_statement_index_a,
+                ],
+                vec![
+                    dot_product_evals_to_verify[3].clone(), // // dot product: indexe b
+                    dot_product_logup_star_indexes_statement_b,
+                    grand_product_dot_product_table_indexes_statement_index_b,
+                ],
+                vec![
+                    dot_product_evals_to_verify[4].clone(), // // dot product: indexe res
+                    dot_product_logup_star_indexes_statement_res,
+                    grand_product_dot_product_table_indexes_statement_index_res,
+                ],
             ],
             dot_product_computation_column_statements,
-            private_memory_statements,
         ]
         .concat(),
-    );
+        &mut verifier_state,
+        &[(0, public_memory.clone())].into_iter().collect(),
+    )?;
 
-    let global_statements_extension = packed_pcs_global_statements(
-        &parsed_commitment_extension.tree,
+    let global_statements_extension = packed_pcs_global_statements_for_verifier(
+        &extension_dims,
+        LOG_SMALLEST_DECOMPOSITION_CHUNK,
         &[
             base_memory_logup_star_statements.on_pushforward,
             poseidon_logup_star_statements.on_pushforward,
             bytecode_logup_star_statements.on_pushforward,
         ],
-    );
+        &mut verifier_state,
+        &Default::default(),
+    )?;
 
     pcs.batch_verify(
         &mut verifier_state,
-        &parsed_commitment_base.inner_parsed_commitment,
+        &parsed_commitment_base,
         &global_statements_base,
-        &parsed_commitment_extension.inner_parsed_commitment,
+        &parsed_commitment_extension,
         &global_statements_extension,
     )?;
 
diff --git a/crates/zk_vm/tests/test_zkvm.rs b/crates/zk_vm/tests/test_zkvm.rs
index 3122a2b0..085aea30 100644
--- a/crates/zk_vm/tests/test_zkvm.rs
+++ b/crates/zk_vm/tests/test_zkvm.rs
@@ -8,6 +8,9 @@ use zk_vm::{
 #[test]
 fn test_zk_vm() {
     let program_str = r#"
+    const DIM = 5;
+    const SECOND_POINT = 2;
+    const SECOND_N_VARS = 7;
     
     fn main() {
         for i in 0..1000 unroll {  if 1 == 0 {  return; } } // increase bytecode size artificially
@@ -20,29 +23,31 @@ fn test_zk_vm() {
             dot_product(i*3, i + 7, (x + 4) * 8, 2);
         }
         
-        point_1 = malloc(10 * 5);
+        point_1 = malloc_vec(1, log2_ceil(10 * DIM));
+        point_1_ptr = point_1 * (2 ** log2_ceil(10 * DIM));
         for i in 0..10 {
-            point_1[i*5 + 0] = 785 + i;
-            point_1[i*5 + 1] = 4152 - i;
-            point_1[i*5 + 2] = 471*82 + i*i;
-            point_1[i*5 + 3] = 7577 + i;
-            point_1[i*5 + 4] = 676 - i;
+            point_1_ptr[i*5 + 0] = 785 + i;
+            point_1_ptr[i*5 + 1] = 4152 - i;
+            point_1_ptr[i*5 + 2] = 471*82 + i*i;
+            point_1_ptr[i*5 + 3] = 7577 + i;
+            point_1_ptr[i*5 + 4] = 676 - i;
         }
 
-        res1 = malloc(5);
+        res1 = malloc_vec(1);
         multilinear_eval(2**3, point_1, res1, 10);
 
-        point_2 = 785;
-        res2 = malloc(5);
-        multilinear_eval(1, point_2, res2, 7);
+        res2 = malloc_vec(1);
+        multilinear_eval(10, SECOND_POINT, res2, SECOND_N_VARS);
 
-        point_3 = 785;
-        res3 = malloc(5);
-        multilinear_eval(2, point_3, res3, 7);
+        res3 = malloc_vec(1);
+        multilinear_eval(11, SECOND_POINT, res3, SECOND_N_VARS);
 
-        print(res3[0], res2[0]);
+        res2_ptr = res2 * 8;
+        res3_ptr = res3 * 8;
 
-        assert res3[0] == res2[0] + 2**7;
+        print(res3_ptr[0], res2_ptr[0]);
+
+        assert res3_ptr[0] == res2_ptr[0] + 2**SECOND_N_VARS;
 
         for i in 0..1000 {
             assert i != 1000;
@@ -53,10 +58,21 @@ fn test_zk_vm() {
    "#
     .to_string();
 
-    let public_input = (0..(1 << 13) - PUBLIC_INPUT_START)
+    const SECOND_POINT: usize = 2;
+    const SECOND_N_VARS: usize = 7;
+
+    let mut public_input = (0..(1 << 13) - PUBLIC_INPUT_START)
         .map(F::from_usize)
         .collect::<Vec<_>>();
 
+    public_input[SECOND_POINT * (SECOND_N_VARS * DIMENSION).next_power_of_two()
+        + SECOND_N_VARS * DIMENSION
+        - PUBLIC_INPUT_START
+        ..(SECOND_POINT + 1) * (SECOND_N_VARS * DIMENSION).next_power_of_two()
+            - PUBLIC_INPUT_START]
+        .iter_mut()
+        .for_each(|x| *x = F::ZERO);
+
     let private_input = (0..1 << 13)
         .map(|i| F::from_usize(i).square())
         .collect::<Vec<_>>();
@@ -72,6 +88,7 @@ fn test_zk_vm() {
         &private_input,
         &batch_pcs,
         false,
-    );
+    )
+    .0;
     verify_execution(&bytecode, &public_input, proof_data, &batch_pcs).unwrap();
 }
diff --git a/crates/zk_vm/zk_vm_air/src/dot_product_air.rs b/crates/zk_vm/zk_vm_air/src/dot_product_air.rs
index 2a3ce4d2..02c40f4a 100644
--- a/crates/zk_vm/zk_vm_air/src/dot_product_air.rs
+++ b/crates/zk_vm/zk_vm_air/src/dot_product_air.rs
@@ -23,7 +23,8 @@ use zk_vm_trace::WitnessDotProduct;
 */
 
 const DOT_PRODUCT_AIR_COLUMNS: usize = 9;
-pub const DOT_PRODUCT_AIR_COLUMN_GROUPS: [Range<usize>; 5] = [0..1, 1..2, 2..5, 5..8, 8..9];
+pub const DOT_PRODUCT_AIR_COLUMN_GROUPS: [Range<usize>; 7] =
+    [0..1, 1..2, 2..3, 3..4, 4..5, 5..8, 8..9];
 
 #[derive(Debug)]
 pub struct DotProductAir;
diff --git a/crates/zk_vm/zk_vm_trace/src/execution_trace.rs b/crates/zk_vm/zk_vm_trace/src/execution_trace.rs
index 22bca1ef..5d26c622 100644
--- a/crates/zk_vm/zk_vm_trace/src/execution_trace.rs
+++ b/crates/zk_vm/zk_vm_trace/src/execution_trace.rs
@@ -4,9 +4,10 @@ use crate::{
     COL_INDEX_MEM_VALUE_A, COL_INDEX_MEM_VALUE_B, COL_INDEX_MEM_VALUE_C, COL_INDEX_PC,
     N_EXEC_COLUMNS, N_INSTRUCTION_COLUMNS,
 };
-use p3_field::Field;
 use p3_field::PrimeCharacteristicRing;
+use p3_field::{BasedVectorSpace, Field};
 use p3_symmetric::Permutation;
+use p3_util::log2_ceil_usize;
 use rayon::prelude::*;
 use utils::{ToUsize, get_poseidon16, get_poseidon24};
 use vm::*;
@@ -44,6 +45,19 @@ pub struct WitnessPoseidon16 {
     pub output: [F; 16],
 }
 
+impl WitnessPoseidon16 {
+    pub fn poseidon_of_zero() -> Self {
+        Self {
+            cycle: None,
+            addr_input_a: ZERO_VEC_PTR,
+            addr_input_b: ZERO_VEC_PTR,
+            addr_output: POSEIDON_16_NULL_HASH_PTR,
+            input: [F::ZERO; 16],
+            output: get_poseidon16().permute([F::ZERO; 16]),
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct WitnessPoseidon24 {
     pub cycle: Option<usize>,
@@ -54,6 +68,21 @@ pub struct WitnessPoseidon24 {
     pub output: [F; 8], // last 8 elements of the output
 }
 
+impl WitnessPoseidon24 {
+    pub fn poseidon_of_zero() -> Self {
+        Self {
+            cycle: None,
+            addr_input_a: ZERO_VEC_PTR,
+            addr_input_b: ZERO_VEC_PTR,
+            addr_output: POSEIDON_24_NULL_HASH_PTR,
+            input: [F::ZERO; 24],
+            output: get_poseidon24().permute([F::ZERO; 24])[16..24]
+                .try_into()
+                .unwrap(),
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct ExecutionTrace {
     pub full_trace: Vec<Vec<F>>,
@@ -203,11 +232,26 @@ pub fn get_execution_trace(
                 let addr_coeffs = coeffs.read_value(memory, fp).unwrap().to_usize();
                 let addr_point = point.read_value(memory, fp).unwrap().to_usize();
                 let addr_res = res.read_value(memory, fp).unwrap().to_usize();
-                let point = (0..*n_vars)
-                    .map(|i| memory.get_ef_element(addr_point + i * DIMENSION))
-                    .collect::<Result<Vec<EF>, _>>()
+
+                let log_point_size = log2_ceil_usize(*n_vars * DIMENSION);
+                let point_slice = memory
+                    .slice(addr_point << log_point_size, *n_vars * DIMENSION)
                     .unwrap();
-                let res = memory.get_ef_element(addr_res).unwrap();
+                for i in *n_vars * DIMENSION..(*n_vars * DIMENSION).next_power_of_two() {
+                    assert!(
+                        memory
+                            .get((addr_point << log_point_size) + i)
+                            .unwrap()
+                            .is_zero()
+                    ); // padding
+                }
+                let point = point_slice[..*n_vars * DIMENSION]
+                    .chunks_exact(DIMENSION)
+                    .map(|chunk| EF::from_basis_coefficients_slice(chunk).unwrap())
+                    .collect::<Vec<_>>();
+
+                let res = memory.get_vector(addr_res).unwrap();
+                assert!(res[DIMENSION..].iter().all(|&x| x.is_zero()));
                 vm_multilinear_evals.push(WitnessMultilinearEval {
                     cycle,
                     addr_coeffs,
@@ -215,7 +259,7 @@ pub fn get_execution_trace(
                     addr_res,
                     n_vars: *n_vars,
                     point,
-                    res,
+                    res: EF::from_basis_coefficients_slice(&res[..DIMENSION]).unwrap(),
                 });
             }
             _ => {}
@@ -230,17 +274,11 @@ pub fn get_execution_trace(
         }
     }
 
-    let mut memory = memory
+    let memory = memory
         .0
         .par_iter()
         .map(|&v| v.unwrap_or(F::ZERO))
         .collect::<Vec<F>>();
-    memory.resize(
-        memory
-            .len()
-            .next_multiple_of(execution_result.public_memory_size),
-        F::ZERO,
-    );
 
     let n_poseidons_16 = poseidons_16.len();
     let n_poseidons_24 = poseidons_24.len();
diff --git a/crates/zk_vm/zk_vm_trace/src/poseidon_tables.rs b/crates/zk_vm/zk_vm_trace/src/poseidon_tables.rs
index 3785f8d0..e2821342 100644
--- a/crates/zk_vm/zk_vm_trace/src/poseidon_tables.rs
+++ b/crates/zk_vm/zk_vm_trace/src/poseidon_tables.rs
@@ -21,48 +21,42 @@ pub fn build_poseidon_columns(
     (cols_16, cols_24)
 }
 
-pub fn all_poseidon_16_indexes(poseidons_16: &[WitnessPoseidon16]) -> Vec<F> {
-    padd_with_zero_to_next_power_of_two(
-        &[
-            poseidons_16
+pub fn all_poseidon_16_indexes(poseidons_16: &[WitnessPoseidon16]) -> [Vec<F>; 3] {
+    [
+        poseidons_16
+            .iter()
+            .map(|p| F::from_usize(p.addr_input_a))
+            .collect::<Vec<_>>(),
+        poseidons_16
+            .iter()
+            .map(|p| F::from_usize(p.addr_input_b))
+            .collect::<Vec<_>>(),
+        poseidons_16
+            .iter()
+            .map(|p| F::from_usize(p.addr_output))
+            .collect::<Vec<_>>(),
+    ]
+}
+
+pub fn all_poseidon_24_indexes(poseidons_24: &[WitnessPoseidon24]) -> [Vec<F>; 3] {
+    [
+        padd_with_zero_to_next_power_of_two(
+            &poseidons_24
                 .iter()
                 .map(|p| F::from_usize(p.addr_input_a))
                 .collect::<Vec<_>>(),
-            poseidons_16
+        ),
+        padd_with_zero_to_next_power_of_two(
+            &poseidons_24
                 .iter()
                 .map(|p| F::from_usize(p.addr_input_b))
                 .collect::<Vec<_>>(),
-            poseidons_16
+        ),
+        padd_with_zero_to_next_power_of_two(
+            &poseidons_24
                 .iter()
                 .map(|p| F::from_usize(p.addr_output))
                 .collect::<Vec<_>>(),
-        ]
-        .concat(),
-    )
-}
-
-pub fn all_poseidon_24_indexes(poseidons_24: &[WitnessPoseidon24]) -> Vec<F> {
-    padd_with_zero_to_next_power_of_two(
-        &[
-            padd_with_zero_to_next_power_of_two(
-                &poseidons_24
-                    .iter()
-                    .map(|p| F::from_usize(p.addr_input_a))
-                    .collect::<Vec<_>>(),
-            ),
-            padd_with_zero_to_next_power_of_two(
-                &poseidons_24
-                    .iter()
-                    .map(|p| F::from_usize(p.addr_input_b))
-                    .collect::<Vec<_>>(),
-            ),
-            padd_with_zero_to_next_power_of_two(
-                &poseidons_24
-                    .iter()
-                    .map(|p| F::from_usize(p.addr_output))
-                    .collect::<Vec<_>>(),
-            ),
-        ]
-        .concat(),
-    )
+        ),
+    ]
 }
diff --git a/docs/benchmark_graphs/graphs/raw_poseidons.svg b/docs/benchmark_graphs/graphs/raw_poseidons.svg
index eaaa7c58..3a358792 100644
--- a/docs/benchmark_graphs/graphs/raw_poseidons.svg
+++ b/docs/benchmark_graphs/graphs/raw_poseidons.svg
@@ -6,7 +6,7 @@
   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <cc:Work>
     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
-    <dc:date>2025-09-09T17:18:54.326828</dc:date>
+    <dc:date>2025-09-14T17:57:36.217649</dc:date>
     <dc:format>image/svg+xml</dc:format>
     <dc:creator>
      <cc:Agent>
@@ -42,16 +42,16 @@ z
      <g id="line2d_1">
       <path d="M 67.98875 383.745188 
 L 67.98875 34.3575 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_2">
       <defs>
-       <path id="me3d15019a9" d="M 0 0 
+       <path id="m8bf32e8a4c" d="M 0 0 
 L 0 3.5 
 " style="stroke: #000000; stroke-width: 0.8"/>
       </defs>
       <g>
-       <use xlink:href="#me3d15019a9" x="67.98875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m8bf32e8a4c" x="67.98875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_1">
@@ -192,11 +192,11 @@ z
      <g id="line2d_3">
       <path d="M 207.234063 383.745188 
 L 207.234063 34.3575 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_4">
       <g>
-       <use xlink:href="#me3d15019a9" x="207.234063" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m8bf32e8a4c" x="207.234063" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_2">
@@ -246,11 +246,11 @@ z
      <g id="line2d_5">
       <path d="M 346.479375 383.745188 
 L 346.479375 34.3575 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_6">
       <g>
-       <use xlink:href="#me3d15019a9" x="346.479375" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m8bf32e8a4c" x="346.479375" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_3">
@@ -268,11 +268,11 @@ L 346.479375 34.3575
      <g id="line2d_7">
       <path d="M 485.724688 383.745188 
 L 485.724688 34.3575 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_8">
       <g>
-       <use xlink:href="#me3d15019a9" x="485.724688" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m8bf32e8a4c" x="485.724688" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_4">
@@ -306,11 +306,11 @@ z
      <g id="line2d_9">
       <path d="M 624.97 383.745188 
 L 624.97 34.3575 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_10">
       <g>
-       <use xlink:href="#me3d15019a9" x="624.97" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m8bf32e8a4c" x="624.97" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_5">
@@ -364,16 +364,16 @@ z
      <g id="line2d_11">
       <path d="M 67.98875 383.745188 
 L 704.53875 383.745188 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_12">
       <defs>
-       <path id="m39e8d3cf8d" d="M 0 0 
+       <path id="mf98f2ce116" d="M 0 0 
 L -3.5 0 
 " style="stroke: #000000; stroke-width: 0.8"/>
       </defs>
       <g>
-       <use xlink:href="#m39e8d3cf8d" x="67.98875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#mf98f2ce116" x="67.98875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_6">
@@ -387,11 +387,11 @@ L -3.5 0
      <g id="line2d_13">
       <path d="M 67.98875 330.80766 
 L 704.53875 330.80766 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_14">
       <g>
-       <use xlink:href="#m39e8d3cf8d" x="67.98875" y="330.80766" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#mf98f2ce116" x="67.98875" y="330.80766" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_7">
@@ -436,11 +436,11 @@ z
      <g id="line2d_15">
       <path d="M 67.98875 277.870131 
 L 704.53875 277.870131 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_16">
       <g>
-       <use xlink:href="#m39e8d3cf8d" x="67.98875" y="277.870131" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#mf98f2ce116" x="67.98875" y="277.870131" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_8">
@@ -459,11 +459,11 @@ L 704.53875 277.870131
      <g id="line2d_17">
       <path d="M 67.98875 224.932603 
 L 704.53875 224.932603 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_18">
       <g>
-       <use xlink:href="#m39e8d3cf8d" x="67.98875" y="224.932603" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#mf98f2ce116" x="67.98875" y="224.932603" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_9">
@@ -482,11 +482,11 @@ L 704.53875 224.932603
      <g id="line2d_19">
       <path d="M 67.98875 171.995074 
 L 704.53875 171.995074 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_20">
       <g>
-       <use xlink:href="#m39e8d3cf8d" x="67.98875" y="171.995074" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#mf98f2ce116" x="67.98875" y="171.995074" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_10">
@@ -505,11 +505,11 @@ L 704.53875 171.995074
      <g id="line2d_21">
       <path d="M 67.98875 119.057546 
 L 704.53875 119.057546 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_22">
       <g>
-       <use xlink:href="#m39e8d3cf8d" x="67.98875" y="119.057546" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#mf98f2ce116" x="67.98875" y="119.057546" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_11">
@@ -528,11 +528,11 @@ L 704.53875 119.057546
      <g id="line2d_23">
       <path d="M 67.98875 66.120017 
 L 704.53875 66.120017 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_24">
       <g>
-       <use xlink:href="#m39e8d3cf8d" x="67.98875" y="66.120017" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#mf98f2ce116" x="67.98875" y="66.120017" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_12">
@@ -789,9 +789,10 @@ z
     <path d="M 87.880938 293.75139 
 L 147.5575 283.163884 
 L 346.479375 269.400127 
-" clip-path="url(#p843711259f)" style="fill: none; stroke: #2e86ab; stroke-width: 2; stroke-linecap: square"/>
+L 445.940313 269.400127 
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke: #2e86ab; stroke-width: 2; stroke-linecap: square"/>
     <defs>
-     <path id="m3f9fc1fd11" d="M 0 3.5 
+     <path id="md619169334" d="M 0 3.5 
 C 0.928211 3.5 1.81853 3.131218 2.474874 2.474874 
 C 3.131218 1.81853 3.5 0.928211 3.5 0 
 C 3.5 -0.928211 3.131218 -1.81853 2.474874 -2.474874 
@@ -803,16 +804,17 @@ C -1.81853 3.131218 -0.928211 3.5 0 3.5
 z
 " style="stroke: #2e86ab"/>
     </defs>
-    <g clip-path="url(#p843711259f)">
-     <use xlink:href="#m3f9fc1fd11" x="87.880938" y="293.75139" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#m3f9fc1fd11" x="147.5575" y="283.163884" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#m3f9fc1fd11" x="346.479375" y="269.400127" style="fill: #2e86ab; stroke: #2e86ab"/>
+    <g clip-path="url(#p9dbe71a508)">
+     <use xlink:href="#md619169334" x="87.880938" y="293.75139" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#md619169334" x="147.5575" y="283.163884" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#md619169334" x="346.479375" y="269.400127" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#md619169334" x="445.940313" y="269.400127" style="fill: #2e86ab; stroke: #2e86ab"/>
     </g>
    </g>
    <g id="line2d_26">
     <path d="M 67.98875 66.120017 
 L 704.53875 66.120017 
-" clip-path="url(#p843711259f)" style="fill: none; stroke-dasharray: 7.4,3.2; stroke-dashoffset: 0; stroke: #2e86ab; stroke-width: 2"/>
+" clip-path="url(#p9dbe71a508)" style="fill: none; stroke-dasharray: 7.4,3.2; stroke-dashoffset: 0; stroke: #2e86ab; stroke-width: 2"/>
    </g>
    <g id="patch_3">
     <path d="M 67.98875 383.745188 
@@ -1091,7 +1093,7 @@ z
   </g>
  </g>
  <defs>
-  <clipPath id="p843711259f">
+  <clipPath id="p9dbe71a508">
    <rect x="67.98875" y="34.3575" width="636.55" height="349.387688"/>
   </clipPath>
  </defs>
diff --git a/docs/benchmark_graphs/graphs/recursive_whir_opening.svg b/docs/benchmark_graphs/graphs/recursive_whir_opening.svg
index 53969caf..369866c8 100644
--- a/docs/benchmark_graphs/graphs/recursive_whir_opening.svg
+++ b/docs/benchmark_graphs/graphs/recursive_whir_opening.svg
@@ -6,7 +6,7 @@
   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <cc:Work>
     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
-    <dc:date>2025-09-10T13:23:11.857774</dc:date>
+    <dc:date>2025-09-14T17:57:36.293085</dc:date>
     <dc:format>image/svg+xml</dc:format>
     <dc:creator>
      <cc:Agent>
@@ -42,16 +42,16 @@ z
      <g id="line2d_1">
       <path d="M 45.716875 383.745188 
 L 45.716875 34.3575 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_2">
       <defs>
-       <path id="m623064e092" d="M 0 0 
+       <path id="m5ad22a8fc6" d="M 0 0 
 L 0 3.5 
 " style="stroke: #000000; stroke-width: 0.8"/>
       </defs>
       <g>
-       <use xlink:href="#m623064e092" x="45.716875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m5ad22a8fc6" x="45.716875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_1">
@@ -192,11 +192,11 @@ z
      <g id="line2d_3">
       <path d="M 189.785625 383.745188 
 L 189.785625 34.3575 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_4">
       <g>
-       <use xlink:href="#m623064e092" x="189.785625" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m5ad22a8fc6" x="189.785625" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_2">
@@ -246,11 +246,11 @@ z
      <g id="line2d_5">
       <path d="M 333.854375 383.745188 
 L 333.854375 34.3575 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_6">
       <g>
-       <use xlink:href="#m623064e092" x="333.854375" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m5ad22a8fc6" x="333.854375" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_3">
@@ -268,11 +268,11 @@ L 333.854375 34.3575
      <g id="line2d_7">
       <path d="M 477.923125 383.745188 
 L 477.923125 34.3575 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_8">
       <g>
-       <use xlink:href="#m623064e092" x="477.923125" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m5ad22a8fc6" x="477.923125" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_4">
@@ -306,11 +306,11 @@ z
      <g id="line2d_9">
       <path d="M 621.991875 383.745188 
 L 621.991875 34.3575 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_10">
       <g>
-       <use xlink:href="#m623064e092" x="621.991875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m5ad22a8fc6" x="621.991875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_5">
@@ -364,16 +364,16 @@ z
      <g id="line2d_11">
       <path d="M 45.716875 383.745188 
 L 704.316875 383.745188 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_12">
       <defs>
-       <path id="mb250bb19d7" d="M 0 0 
+       <path id="m9fc84761df" d="M 0 0 
 L -3.5 0 
 " style="stroke: #000000; stroke-width: 0.8"/>
       </defs>
       <g>
-       <use xlink:href="#mb250bb19d7" x="45.716875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9fc84761df" x="45.716875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_6">
@@ -398,11 +398,11 @@ z
      <g id="line2d_13">
       <path d="M 45.716875 324.925712 
 L 704.316875 324.925712 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_14">
       <g>
-       <use xlink:href="#mb250bb19d7" x="45.716875" y="324.925712" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9fc84761df" x="45.716875" y="324.925712" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_7">
@@ -445,11 +445,11 @@ z
      <g id="line2d_15">
       <path d="M 45.716875 266.106236 
 L 704.316875 266.106236 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_16">
       <g>
-       <use xlink:href="#mb250bb19d7" x="45.716875" y="266.106236" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9fc84761df" x="45.716875" y="266.106236" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_8">
@@ -465,11 +465,11 @@ L 704.316875 266.106236
      <g id="line2d_17">
       <path d="M 45.716875 207.28676 
 L 704.316875 207.28676 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_18">
       <g>
-       <use xlink:href="#mb250bb19d7" x="45.716875" y="207.28676" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9fc84761df" x="45.716875" y="207.28676" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_9">
@@ -485,11 +485,11 @@ L 704.316875 207.28676
      <g id="line2d_19">
       <path d="M 45.716875 148.467284 
 L 704.316875 148.467284 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_20">
       <g>
-       <use xlink:href="#mb250bb19d7" x="45.716875" y="148.467284" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9fc84761df" x="45.716875" y="148.467284" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_10">
@@ -505,11 +505,11 @@ L 704.316875 148.467284
      <g id="line2d_21">
       <path d="M 45.716875 89.647808 
 L 704.316875 89.647808 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_22">
       <g>
-       <use xlink:href="#mb250bb19d7" x="45.716875" y="89.647808" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9fc84761df" x="45.716875" y="89.647808" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_11">
@@ -819,9 +819,10 @@ z
 L 292.691875 219.050655 
 L 333.854375 228.461771 
 L 354.435625 269.635405 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke: #2e86ab; stroke-width: 2; stroke-linecap: square"/>
+L 436.760625 286.693053 
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke: #2e86ab; stroke-width: 2; stroke-linecap: square"/>
     <defs>
-     <path id="mf119959d4e" d="M 0 3.5 
+     <path id="m667d9196d3" d="M 0 3.5 
 C 0.928211 3.5 1.81853 3.131218 2.474874 2.474874 
 C 3.131218 1.81853 3.5 0.928211 3.5 0 
 C 3.5 -0.928211 3.131218 -1.81853 2.474874 -2.474874 
@@ -833,17 +834,18 @@ C -1.81853 3.131218 -0.928211 3.5 0 3.5
 z
 " style="stroke: #2e86ab"/>
     </defs>
-    <g clip-path="url(#p24d5a49279)">
-     <use xlink:href="#mf119959d4e" x="66.298125" y="66.120017" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#mf119959d4e" x="292.691875" y="219.050655" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#mf119959d4e" x="333.854375" y="228.461771" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#mf119959d4e" x="354.435625" y="269.635405" style="fill: #2e86ab; stroke: #2e86ab"/>
+    <g clip-path="url(#p1c4db28a1d)">
+     <use xlink:href="#m667d9196d3" x="66.298125" y="66.120017" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#m667d9196d3" x="292.691875" y="219.050655" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#m667d9196d3" x="333.854375" y="228.461771" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#m667d9196d3" x="354.435625" y="269.635405" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#m667d9196d3" x="436.760625" y="286.693053" style="fill: #2e86ab; stroke: #2e86ab"/>
     </g>
    </g>
    <g id="line2d_24">
     <path d="M 45.716875 354.33545 
 L 704.316875 354.33545 
-" clip-path="url(#p24d5a49279)" style="fill: none; stroke-dasharray: 7.4,3.2; stroke-dashoffset: 0; stroke: #2e86ab; stroke-width: 2"/>
+" clip-path="url(#p1c4db28a1d)" style="fill: none; stroke-dasharray: 7.4,3.2; stroke-dashoffset: 0; stroke: #2e86ab; stroke-width: 2"/>
    </g>
    <g id="patch_3">
     <path d="M 45.716875 383.745188 
@@ -1119,7 +1121,7 @@ z
   </g>
  </g>
  <defs>
-  <clipPath id="p24d5a49279">
+  <clipPath id="p1c4db28a1d">
    <rect x="45.716875" y="34.3575" width="658.6" height="349.387688"/>
   </clipPath>
  </defs>
diff --git a/docs/benchmark_graphs/graphs/xmss_aggregated_overhead.svg b/docs/benchmark_graphs/graphs/xmss_aggregated_overhead.svg
index 42539b0b..735acc67 100644
--- a/docs/benchmark_graphs/graphs/xmss_aggregated_overhead.svg
+++ b/docs/benchmark_graphs/graphs/xmss_aggregated_overhead.svg
@@ -6,7 +6,7 @@
   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <cc:Work>
     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
-    <dc:date>2025-09-10T13:23:12.009727</dc:date>
+    <dc:date>2025-09-14T17:57:36.454384</dc:date>
     <dc:format>image/svg+xml</dc:format>
     <dc:creator>
      <cc:Agent>
@@ -42,16 +42,16 @@ z
      <g id="line2d_1">
       <path d="M 36.465625 383.745188 
 L 36.465625 34.3575 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_2">
       <defs>
-       <path id="m68851a4fe7" d="M 0 0 
+       <path id="m9aaafea466" d="M 0 0 
 L 0 3.5 
 " style="stroke: #000000; stroke-width: 0.8"/>
       </defs>
       <g>
-       <use xlink:href="#m68851a4fe7" x="36.465625" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9aaafea466" x="36.465625" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_1">
@@ -192,11 +192,11 @@ z
      <g id="line2d_3">
       <path d="M 182.86625 383.745188 
 L 182.86625 34.3575 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_4">
       <g>
-       <use xlink:href="#m68851a4fe7" x="182.86625" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9aaafea466" x="182.86625" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_2">
@@ -246,11 +246,11 @@ z
      <g id="line2d_5">
       <path d="M 329.266875 383.745188 
 L 329.266875 34.3575 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_6">
       <g>
-       <use xlink:href="#m68851a4fe7" x="329.266875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9aaafea466" x="329.266875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_3">
@@ -268,11 +268,11 @@ L 329.266875 34.3575
      <g id="line2d_7">
       <path d="M 475.6675 383.745188 
 L 475.6675 34.3575 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_8">
       <g>
-       <use xlink:href="#m68851a4fe7" x="475.6675" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9aaafea466" x="475.6675" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_4">
@@ -306,11 +306,11 @@ z
      <g id="line2d_9">
       <path d="M 622.068125 383.745188 
 L 622.068125 34.3575 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_10">
       <g>
-       <use xlink:href="#m68851a4fe7" x="622.068125" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m9aaafea466" x="622.068125" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_5">
@@ -364,16 +364,16 @@ z
      <g id="line2d_11">
       <path d="M 36.465625 383.745188 
 L 705.725625 383.745188 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_12">
       <defs>
-       <path id="m42e9ba107b" d="M 0 0 
+       <path id="maafab8f2c5" d="M 0 0 
 L -3.5 0 
 " style="stroke: #000000; stroke-width: 0.8"/>
       </defs>
       <g>
-       <use xlink:href="#m42e9ba107b" x="36.465625" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#maafab8f2c5" x="36.465625" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_6">
@@ -398,11 +398,11 @@ z
      <g id="line2d_13">
       <path d="M 36.465625 335.513218 
 L 705.725625 335.513218 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_14">
       <g>
-       <use xlink:href="#m42e9ba107b" x="36.465625" y="335.513218" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#maafab8f2c5" x="36.465625" y="335.513218" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_7">
@@ -445,11 +445,11 @@ z
      <g id="line2d_15">
       <path d="M 36.465625 287.281247 
 L 705.725625 287.281247 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_16">
       <g>
-       <use xlink:href="#m42e9ba107b" x="36.465625" y="287.281247" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#maafab8f2c5" x="36.465625" y="287.281247" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_8">
@@ -465,11 +465,11 @@ L 705.725625 287.281247
      <g id="line2d_17">
       <path d="M 36.465625 239.049277 
 L 705.725625 239.049277 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_18">
       <g>
-       <use xlink:href="#m42e9ba107b" x="36.465625" y="239.049277" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#maafab8f2c5" x="36.465625" y="239.049277" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_9">
@@ -497,11 +497,11 @@ z
      <g id="line2d_19">
       <path d="M 36.465625 190.817307 
 L 705.725625 190.817307 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_20">
       <g>
-       <use xlink:href="#m42e9ba107b" x="36.465625" y="190.817307" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#maafab8f2c5" x="36.465625" y="190.817307" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_10">
@@ -518,11 +518,11 @@ L 705.725625 190.817307
      <g id="line2d_21">
       <path d="M 36.465625 142.585336 
 L 705.725625 142.585336 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_22">
       <g>
-       <use xlink:href="#m42e9ba107b" x="36.465625" y="142.585336" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#maafab8f2c5" x="36.465625" y="142.585336" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_11">
@@ -539,11 +539,11 @@ L 705.725625 142.585336
      <g id="line2d_23">
       <path d="M 36.465625 94.353366 
 L 705.725625 94.353366 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_24">
       <g>
-       <use xlink:href="#m42e9ba107b" x="36.465625" y="94.353366" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#maafab8f2c5" x="36.465625" y="94.353366" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_12">
@@ -560,11 +560,11 @@ L 705.725625 94.353366
      <g id="line2d_25">
       <path d="M 36.465625 46.121395 
 L 705.725625 46.121395 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_26">
       <g>
-       <use xlink:href="#m42e9ba107b" x="36.465625" y="46.121395" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#maafab8f2c5" x="36.465625" y="46.121395" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_13">
@@ -584,9 +584,10 @@ L 182.86625 66.120017
 L 203.780625 162.583958 
 L 329.266875 168.844964 
 L 350.18125 208.770318 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke: #2e86ab; stroke-width: 2; stroke-linecap: square"/>
+L 433.83875 259.145931 
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke: #2e86ab; stroke-width: 2; stroke-linecap: square"/>
     <defs>
-     <path id="m4852168e6f" d="M 0 3.5 
+     <path id="m5794ffd26f" d="M 0 3.5 
 C 0.928211 3.5 1.81853 3.131218 2.474874 2.474874 
 C 3.131218 1.81853 3.5 0.928211 3.5 0 
 C 3.5 -0.928211 3.131218 -1.81853 2.474874 -2.474874 
@@ -598,18 +599,19 @@ C -1.81853 3.131218 -0.928211 3.5 0 3.5
 z
 " style="stroke: #2e86ab"/>
     </defs>
-    <g clip-path="url(#p564b1e7b26)">
-     <use xlink:href="#m4852168e6f" x="57.38" y="85.965197" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#m4852168e6f" x="182.86625" y="66.120017" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#m4852168e6f" x="203.780625" y="162.583958" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#m4852168e6f" x="329.266875" y="168.844964" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#m4852168e6f" x="350.18125" y="208.770318" style="fill: #2e86ab; stroke: #2e86ab"/>
+    <g clip-path="url(#p04f54b31bc)">
+     <use xlink:href="#m5794ffd26f" x="57.38" y="85.965197" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#m5794ffd26f" x="182.86625" y="66.120017" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#m5794ffd26f" x="203.780625" y="162.583958" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#m5794ffd26f" x="329.266875" y="168.844964" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#m5794ffd26f" x="350.18125" y="208.770318" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#m5794ffd26f" x="433.83875" y="259.145931" style="fill: #2e86ab; stroke: #2e86ab"/>
     </g>
    </g>
    <g id="line2d_28">
     <path d="M 36.465625 345.159612 
 L 705.725625 345.159612 
-" clip-path="url(#p564b1e7b26)" style="fill: none; stroke-dasharray: 7.4,3.2; stroke-dashoffset: 0; stroke: #2e86ab; stroke-width: 2"/>
+" clip-path="url(#p04f54b31bc)" style="fill: none; stroke-dasharray: 7.4,3.2; stroke-dashoffset: 0; stroke: #2e86ab; stroke-width: 2"/>
    </g>
    <g id="patch_3">
     <path d="M 36.465625 383.745188 
@@ -1202,7 +1204,7 @@ z
   </g>
  </g>
  <defs>
-  <clipPath id="p564b1e7b26">
+  <clipPath id="p04f54b31bc">
    <rect x="36.465625" y="34.3575" width="669.26" height="349.387688"/>
   </clipPath>
  </defs>
diff --git a/docs/benchmark_graphs/graphs/xmss_aggregated_time.svg b/docs/benchmark_graphs/graphs/xmss_aggregated_time.svg
index a1021308..bf5f6005 100644
--- a/docs/benchmark_graphs/graphs/xmss_aggregated_time.svg
+++ b/docs/benchmark_graphs/graphs/xmss_aggregated_time.svg
@@ -6,7 +6,7 @@
   <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <cc:Work>
     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
-    <dc:date>2025-09-10T13:23:11.933359</dc:date>
+    <dc:date>2025-09-14T17:57:36.371584</dc:date>
     <dc:format>image/svg+xml</dc:format>
     <dc:creator>
      <cc:Agent>
@@ -42,16 +42,16 @@ z
      <g id="line2d_1">
       <path d="M 42.53875 383.745188 
 L 42.53875 34.3575 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_2">
       <defs>
-       <path id="md75a29c8b9" d="M 0 0 
+       <path id="m6f89537e1a" d="M 0 0 
 L 0 3.5 
 " style="stroke: #000000; stroke-width: 0.8"/>
       </defs>
       <g>
-       <use xlink:href="#md75a29c8b9" x="42.53875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m6f89537e1a" x="42.53875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_1">
@@ -192,11 +192,11 @@ z
      <g id="line2d_3">
       <path d="M 187.296563 383.745188 
 L 187.296563 34.3575 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_4">
       <g>
-       <use xlink:href="#md75a29c8b9" x="187.296563" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m6f89537e1a" x="187.296563" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_2">
@@ -246,11 +246,11 @@ z
      <g id="line2d_5">
       <path d="M 332.054375 383.745188 
 L 332.054375 34.3575 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_6">
       <g>
-       <use xlink:href="#md75a29c8b9" x="332.054375" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m6f89537e1a" x="332.054375" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_3">
@@ -268,11 +268,11 @@ L 332.054375 34.3575
      <g id="line2d_7">
       <path d="M 476.812188 383.745188 
 L 476.812188 34.3575 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_8">
       <g>
-       <use xlink:href="#md75a29c8b9" x="476.812188" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m6f89537e1a" x="476.812188" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_4">
@@ -306,11 +306,11 @@ z
      <g id="line2d_9">
       <path d="M 621.57 383.745188 
 L 621.57 34.3575 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_10">
       <g>
-       <use xlink:href="#md75a29c8b9" x="621.57" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#m6f89537e1a" x="621.57" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_5">
@@ -364,16 +364,16 @@ z
      <g id="line2d_11">
       <path d="M 42.53875 383.745188 
 L 704.28875 383.745188 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_12">
       <defs>
-       <path id="m7d6ce4526e" d="M 0 0 
+       <path id="ma4edd0a1f3" d="M 0 0 
 L -3.5 0 
 " style="stroke: #000000; stroke-width: 0.8"/>
       </defs>
       <g>
-       <use xlink:href="#m7d6ce4526e" x="42.53875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#ma4edd0a1f3" x="42.53875" y="383.745188" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_6">
@@ -387,11 +387,11 @@ L -3.5 0
      <g id="line2d_13">
       <path d="M 42.53875 339.009249 
 L 704.28875 339.009249 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_14">
       <g>
-       <use xlink:href="#m7d6ce4526e" x="42.53875" y="339.009249" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#ma4edd0a1f3" x="42.53875" y="339.009249" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_7">
@@ -405,11 +405,11 @@ L 704.28875 339.009249
      <g id="line2d_15">
       <path d="M 42.53875 294.273309 
 L 704.28875 294.273309 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_16">
       <g>
-       <use xlink:href="#m7d6ce4526e" x="42.53875" y="294.273309" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#ma4edd0a1f3" x="42.53875" y="294.273309" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_8">
@@ -444,11 +444,11 @@ z
      <g id="line2d_17">
       <path d="M 42.53875 249.53737 
 L 704.28875 249.53737 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_18">
       <g>
-       <use xlink:href="#m7d6ce4526e" x="42.53875" y="249.53737" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#ma4edd0a1f3" x="42.53875" y="249.53737" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_9">
@@ -462,11 +462,11 @@ L 704.28875 249.53737
      <g id="line2d_19">
       <path d="M 42.53875 204.80143 
 L 704.28875 204.80143 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_20">
       <g>
-       <use xlink:href="#m7d6ce4526e" x="42.53875" y="204.80143" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#ma4edd0a1f3" x="42.53875" y="204.80143" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_10">
@@ -480,11 +480,11 @@ L 704.28875 204.80143
      <g id="line2d_21">
       <path d="M 42.53875 160.06549 
 L 704.28875 160.06549 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_22">
       <g>
-       <use xlink:href="#m7d6ce4526e" x="42.53875" y="160.06549" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#ma4edd0a1f3" x="42.53875" y="160.06549" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_11">
@@ -499,11 +499,11 @@ L 704.28875 160.06549
      <g id="line2d_23">
       <path d="M 42.53875 115.329551 
 L 704.28875 115.329551 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_24">
       <g>
-       <use xlink:href="#m7d6ce4526e" x="42.53875" y="115.329551" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#ma4edd0a1f3" x="42.53875" y="115.329551" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_12">
@@ -518,11 +518,11 @@ L 704.28875 115.329551
      <g id="line2d_25">
       <path d="M 42.53875 70.593611 
 L 704.28875 70.593611 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #b0b0b0; stroke-opacity: 0.3; stroke-width: 0.8; stroke-linecap: square"/>
      </g>
      <g id="line2d_26">
       <g>
-       <use xlink:href="#m7d6ce4526e" x="42.53875" y="70.593611" style="stroke: #000000; stroke-width: 0.8"/>
+       <use xlink:href="#ma4edd0a1f3" x="42.53875" y="70.593611" style="stroke: #000000; stroke-width: 0.8"/>
       </g>
      </g>
      <g id="text_13">
@@ -832,9 +832,10 @@ L 187.296563 81.777596
 L 207.97625 173.486272 
 L 332.054375 204.354071 
 L 352.734063 237.682346 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke: #2e86ab; stroke-width: 2; stroke-linecap: square"/>
+L 435.452813 279.734129 
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke: #2e86ab; stroke-width: 2; stroke-linecap: square"/>
     <defs>
-     <path id="mf3a29084a7" d="M 0 3.5 
+     <path id="me6983ae60d" d="M 0 3.5 
 C 0.928211 3.5 1.81853 3.131218 2.474874 2.474874 
 C 3.131218 1.81853 3.5 0.928211 3.5 0 
 C 3.5 -0.928211 3.131218 -1.81853 2.474874 -2.474874 
@@ -846,18 +847,19 @@ C -1.81853 3.131218 -0.928211 3.5 0 3.5
 z
 " style="stroke: #2e86ab"/>
     </defs>
-    <g clip-path="url(#pcbaeab31b9)">
-     <use xlink:href="#mf3a29084a7" x="63.218438" y="66.120017" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#mf3a29084a7" x="187.296563" y="81.777596" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#mf3a29084a7" x="207.97625" y="173.486272" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#mf3a29084a7" x="332.054375" y="204.354071" style="fill: #2e86ab; stroke: #2e86ab"/>
-     <use xlink:href="#mf3a29084a7" x="352.734063" y="237.682346" style="fill: #2e86ab; stroke: #2e86ab"/>
+    <g clip-path="url(#pd8b2dc4d61)">
+     <use xlink:href="#me6983ae60d" x="63.218438" y="66.120017" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#me6983ae60d" x="187.296563" y="81.777596" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#me6983ae60d" x="207.97625" y="173.486272" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#me6983ae60d" x="332.054375" y="204.354071" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#me6983ae60d" x="352.734063" y="237.682346" style="fill: #2e86ab; stroke: #2e86ab"/>
+     <use xlink:href="#me6983ae60d" x="435.452813" y="279.734129" style="fill: #2e86ab; stroke: #2e86ab"/>
     </g>
    </g>
    <g id="line2d_28">
     <path d="M 42.53875 372.561203 
 L 704.28875 372.561203 
-" clip-path="url(#pcbaeab31b9)" style="fill: none; stroke-dasharray: 7.4,3.2; stroke-dashoffset: 0; stroke: #2e86ab; stroke-width: 2"/>
+" clip-path="url(#pd8b2dc4d61)" style="fill: none; stroke-dasharray: 7.4,3.2; stroke-dashoffset: 0; stroke: #2e86ab; stroke-width: 2"/>
    </g>
    <g id="patch_3">
     <path d="M 42.53875 383.745188 
@@ -1167,7 +1169,7 @@ z
   </g>
  </g>
  <defs>
-  <clipPath id="pcbaeab31b9">
+  <clipPath id="pd8b2dc4d61">
    <rect x="42.53875" y="34.3575" width="661.75" height="349.387688"/>
   </clipPath>
  </defs>
diff --git a/docs/benchmark_graphs/main.py b/docs/benchmark_graphs/main.py
index 24d7ccf6..968f8c8a 100644
--- a/docs/benchmark_graphs/main.py
+++ b/docs/benchmark_graphs/main.py
@@ -51,13 +51,15 @@ def create_duration_graph(data, target, target_label, title, y_legend, file):
         ('2025-08-27', 85000),
         ('2025-08-30', 95000),
         ('2025-09-09', 108000),
+        ('2025-09-14', 108000),
     ], target=300_000, target_label="Target (300.000 Poseidon2 / s)", title="Raw Poseidon2", y_legend="Poseidons proven / s", file="raw_poseidons")
 
     create_duration_graph(data=[
         ('2025-08-27', 2.7),
         ('2025-09-07', 1.4),
         ('2025-09-09', 1.32),
-        ('2025-09-10', 0.970)
+        ('2025-09-10', 0.970),
+        ('2025-09-14', 0.825),
     ], target=0.25, target_label="Target (0.25 s)", title="Recursive WHIR opening", y_legend="Proving time (s)", file="recursive_whir_opening")
 
     create_duration_graph(data=[
@@ -65,7 +67,8 @@ def create_duration_graph(data, target, target_label, title, y_legend, file):
         ('2025-09-02', 13.5),
         ('2025-09-03', 9.4),
         ('2025-09-09', 8.02),
-        ('2025-09-10', 6.53)
+        ('2025-09-10', 6.53),
+        ('2025-09-14', 4.65)
     ], target=0.5, target_label="Target (0.5 s)", title="500 XMSS aggregated: proving time", y_legend="Proving time (s)", file="xmss_aggregated_time")
 
     create_duration_graph(data=[
@@ -73,5 +76,6 @@ def create_duration_graph(data, target, target_label, title, y_legend, file):
         ('2025-09-02', 13.5 / 0.82),
         ('2025-09-03', 9.4 / 0.82),
         ('2025-09-09', 8.02 / 0.72),
-        ('2025-09-10', 6.53 / 0.72)
+        ('2025-09-10', 6.53 / 0.72),
+        ('2025-09-14', 4.65 / 0.72)
     ], target=2.0, target_label="Target (2x)", title="500 XMSS aggregated: zkVM overhead vs raw Poseidons", y_legend="", file="xmss_aggregated_overhead")
diff --git a/src/examples/prove_poseidon2.rs b/src/examples/prove_poseidon2.rs
index 593f3a73..7306e232 100644
--- a/src/examples/prove_poseidon2.rs
+++ b/src/examples/prove_poseidon2.rs
@@ -5,7 +5,10 @@ use p3_field::PrimeField64;
 use p3_koala_bear::{KoalaBear, QuinticExtensionFieldKB};
 use p3_symmetric::Permutation;
 use p3_util::log2_ceil_usize;
-use pcs::{PCS, packed_pcs_commit, packed_pcs_global_statements, packed_pcs_parse_commitment};
+use pcs::{
+    ColDims, PCS, packed_pcs_commit, packed_pcs_global_statements_for_prover,
+    packed_pcs_global_statements_for_verifier, packed_pcs_parse_commitment,
+};
 use rand::{Rng, SeedableRng, rngs::StdRng};
 use std::fmt;
 use std::marker::PhantomData;
@@ -43,7 +46,11 @@ impl fmt::Display for Poseidon2Benchmark {
                 / self.prover_time.as_secs_f64())
             .round() as usize
         )?;
-        writeln!(f, "Proof size: {:.1} KiB", self.proof_size / 1024.0)?;
+        writeln!(
+            f,
+            "Proof size: {:.1} KiB (not optimized)",
+            self.proof_size / 1024.0
+        )?;
         writeln!(f, "Verification: {} ms", self.verifier_time.as_millis())
     }
 }
@@ -150,31 +157,40 @@ pub fn prove_poseidon2(
     let commited_trace_polynomial_24 =
         padd_with_zero_to_next_power_of_two(&witness_columns_24.concat());
 
-    let packed_commitment_witness = packed_pcs_commit(
-        &pcs,
-        &[&commited_trace_polynomial_16, &commited_trace_polynomial_24],
-        &dft,
-        &mut prover_state,
-    );
+    let dims = [
+        ColDims::dense(log_table_area_16),
+        ColDims::dense(log_table_area_24),
+    ];
+    let log_smallest_decomposition_chunk = 0; // UNUSED because verything is power of 2
+
+    let commited_data = [
+        commited_trace_polynomial_16.as_slice(),
+        commited_trace_polynomial_24.as_slice(),
+    ];
+    let commitment_witness =
+        packed_pcs_commit(&pcs, &commited_data, &dims, &dft, &mut prover_state, 0);
 
     let evaluations_remaining_to_prove_16 =
         table_16.prove_base(&mut prover_state, univariate_skips, witness_16);
     let evaluations_remaining_to_prove_24 =
         table_24.prove_base(&mut prover_state, univariate_skips, witness_24);
 
-    let global_statements_to_prove = packed_pcs_global_statements(
-        &packed_commitment_witness.tree,
+    let global_statements_to_prove = packed_pcs_global_statements_for_prover(
+        &commited_data,
+        &dims,
+        log_smallest_decomposition_chunk,
         &[
             evaluations_remaining_to_prove_16,
             evaluations_remaining_to_prove_24,
         ],
+        &mut prover_state,
     );
     pcs.open(
         &dft,
         &mut prover_state,
         &global_statements_to_prove,
-        packed_commitment_witness.inner_witness,
-        &packed_commitment_witness.packed_polynomial,
+        commitment_witness.inner_witness,
+        &commitment_witness.packed_polynomial,
     );
 
     let prover_time = t.elapsed();
@@ -185,7 +201,8 @@ pub fn prove_poseidon2(
     let packed_parsed_commitment = packed_pcs_parse_commitment(
         &pcs,
         &mut verifier_state,
-        vec![log_table_area_16, log_table_area_24],
+        &dims,
+        log_smallest_decomposition_chunk,
     )
     .unwrap();
 
@@ -206,16 +223,20 @@ pub fn prove_poseidon2(
         )
         .unwrap();
 
-    let global_statements_to_verify = packed_pcs_global_statements(
-        &packed_parsed_commitment.tree,
+    let global_statements_to_verify = packed_pcs_global_statements_for_verifier(
+        &dims,
+        log_smallest_decomposition_chunk,
         &[
             evaluations_remaining_to_verify_16,
             evaluations_remaining_to_verify_24,
         ],
-    );
+        &mut verifier_state,
+        &Default::default(),
+    )
+    .unwrap();
     pcs.verify(
         &mut verifier_state,
-        &packed_parsed_commitment.inner_parsed_commitment,
+        &packed_parsed_commitment,
         &global_statements_to_verify,
     )
     .unwrap();