Skip to content

Commit

Permalink
Optimize computation of DEEP queries in recursive verifier (#1594)
Browse files Browse the repository at this point in the history
* feat: optimize computation of DEEP queries

chore: update changelog

chore: address feedback
Signed-off-by: Al-Kindi-0 <[email protected]>

* refactor: remove file

* feat: add paramter check to recursive verifier

chore: changelog update

chore: add back file and update it

Revert "refactor: remove file"

This reverts commit 384a5af.

---------

Signed-off-by: Al-Kindi-0 <[email protected]>
  • Loading branch information
Al-Kindi-0 authored Dec 19, 2024
1 parent 21da03d commit 5944710
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 72 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#### Enhancements
- Added options `--kernel`, `--debug` and `--output` to `miden bundle` (#1447).
- Added `miden_core::mast::MastForest::advice_map` to load it into the advice provider before the `MastForest` execution (#1574).
- Optimized the computation of the DEEP queries in the recursive verifier (#1594).
- Added validity checks for the inputs to the recursive verifier (#1596).

## 0.11.0 (2024-11-04)

Expand Down
12 changes: 11 additions & 1 deletion stdlib/asm/crypto/stark/constants.masm
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,15 @@ const.COMPOSITION_COEF_PTR=4294900200

# We need 2 Felt for each trace column and each of the 8 constraint composition columns. We thus need
# (70 + 7 + 8) * 2 Felt i.e. 43 memory slots.
# Note that there is a cap on the number of such coefficients so that the memory region allocated for
# these coefficients does not overlap with the memory region storing the FRI queries.
# This cap is of a 100 coefficients which is equivalent to 50 memory slots. This gives 150 memory
# slots for all of the FRI queries i.e., 150 FRI queries.
const.DEEP_RAND_CC_PTR=4294903000

# FRI
#
# (FRI_COM_PTR - 100) ---|
# (FRI_COM_PTR - 150) ---|
# .
# . | <- FRI queries
# .
Expand Down Expand Up @@ -97,6 +101,7 @@ const.TMP5=4294903319
const.TMP6=4294903320
const.TMP7=4294903321
const.TMP8=4294903322
const.TMP9=4294903323



Expand Down Expand Up @@ -135,6 +140,7 @@ const.TMP8=4294903322
# | TMP6 | 4294903320 |
# | TMP7 | 4294903321 |
# | TMP8 | 4294903322 |
# | TMP9 | 4294903323 |
# +------------------------------------------+-------------------------+

# ACCESSORS
Expand Down Expand Up @@ -305,3 +311,7 @@ end
export.tmp8
push.TMP8
end

export.tmp9
push.TMP9
end
139 changes: 82 additions & 57 deletions stdlib/asm/crypto/stark/deep_queries.masm
Original file line number Diff line number Diff line change
Expand Up @@ -127,22 +127,23 @@ end
#! Loads the next query rows in the main, auxiliary and constraint composition polynomials traces.
#! It takes a pointer to the current random query index and returns that index.
#!
#! Input: [query_ptr, ...]
#! Output: [index, query_ptr, ...]
#! Input: [Y, query_ptr, ...]
#! Output: [Y, Y, index, query_ptr, ...]
#!
#! where:
#! - Y is a "garbage" word.
#!
#! Cycles: 219
#! Cycles: 206
proc.load_query_row
# Main trace portion of the query

## Get the next query index
padw
dup.4
mem_loadw
swap
#=> [depth, index, y, y, query_ptr, ...]
#=> [index, depth, y, y, query_ptr, ...] where y are "garbage" values

## Get main trace commitment and use it to get the leaf
movup.3 movup.3
movdn.3 movdn.2
push.0.0
exec.constants::main_trace_com_ptr mem_loadw
#=>[R, depth, index, query_ptr, ...]
Expand Down Expand Up @@ -273,50 +274,48 @@ proc.load_query_row
assert_eq
#=> [Y, ptr, y, y, y, depth, index, query_ptr, ...]

dropw dropw drop
#=> [index, query_ptr, ...]
drop
#=> [Y, Y, index, query_ptr, ...]
end

#! Takes a query index and computes x := offset * domain_gen^index. It also computes the denominators
#! (x - z) and (x - gz).
#!
#! Input: [index, ...]
#! Output: [Z, x, index, ...] where Z := [-gz1, x -gz0, -z1, x - z0]
#! Input: [Y, Y, index, ...]
#! Output: [Z, Y, x, index, ...]
#!
#! where:
#! - Z := [-gz1, x -gz0, -z1, x - z0]
#! - Y is a "garbage" word
#!
#! Cycles: 68
#! Cycles: 58
proc.compute_denominators
# Compute x = offset * domain_gen^index
padw
exec.constants::lde_size_ptr mem_loadw
#=> [lde_size, depth, domain_gen, 0, index, ...]
#=> [lde_size, depth, domain_gen, 0, Y, index, ...]
movup.2
dup.4
dup.8
exp.u32
exec.constants::domain_offset mul
#=> [x, lde_size, depth, 0, index, ...]
#=> [x, lde_size, depth, 0, Y, index, ...]

# Get z and gz from memory
movdn.3
#=> [lde_size, depth, 0, x, index, ...]
#=> [lde_size, depth, 0, x, Y, index, ...]
push.0
exec.constants::tmp1 mem_loadw
#=> [gz1, gz0, z1, z0, x, index, ...]
#=> [-z0, -gz0, -gz1, -z1, x, Y, index, ...]

# Compute Z := [-z1, x - z0, -gz1, x -gz0]
neg
dup.4
movup.2
sub
#=> [x-gz0, -gz1, z1, z0, x, index, ...]
dup.4 add
#=> [x-z0, -gz0, -gz1, -z1, x, Y, index, ...]
movdn.3
movdn.2
#=> [z1, z0, -gz1, x-gz0, x, index, ...]
neg
movdn.3
dup.4 swap
sub
movdn.3
#=> [Z, x, index, ...] where Z := [-gz1, x -gz0, -z1, x - z0]
#=> [-gz0, -gz1, -z1, x-z0, x, Y, index, ...]

movup.4 dup movdn.9
#=> [x, -gz0, -gz1, -z1, x-z0, Y, x, index, ...]

add swap
#=> [-gz1, x - gz0, -z1, x-z0, Y, x, index, ...]
end

#! Computes the random linear combination involving the main trace columns and accumulates
Expand Down Expand Up @@ -429,7 +428,7 @@ proc.combine_constraint_poly_columns
dropw
swapw
exec.constants::tmp3 mem_loadw
#=> [Acc3, Acc2, y, y, y, y, Acc1`, Acc0`, ...]
#=> [Acc3, Acc2, y, y, y, y, Acc1`, Acc0`, ...] where y are "garbage" values
movdn.5 movdn.5
#=> [y, y, y, y, Acc3, Acc2, Acc1`, Acc0`, ...]
dropw
Expand Down Expand Up @@ -466,12 +465,43 @@ end
#! Compute the DEEP composition polynomial FRI queries.
#!
#! Input: [query_ptr, ...]
#! Output: [...]
#! Cycles: 6 + num_queries * 473
#! Output: [query_ptr, ...]
#! Cycles: 24 + num_queries * 445
export.compute_deep_composition_polynomial_queries

# Get pointer to help test for the last query to be processed
exec.constants::fri_com_ptr
dup.1
#=>[query_ptr, query_end_ptr, ...]
# =>[query_ptr, query_end_ptr, query_ptr...]
# Store the pointers to:
# 1. random values for computing DEEP polynomial
# 2. OOD evaluation frame
# 3. trace row values for the current query
push.0
exec.constants::deep_rand_coef_ptr
exec.constants::ood_trace_ptr
exec.constants::current_trace_row_ptr
exec.constants::tmp9 mem_storew
# =>[Y, query_ptr, query_end_ptr, query_ptr, ...]

# Compute the negations of z and gz where z is the OOD point
# We do it here as this computation is common to all queries.
exec.constants::z_ptr mem_loadw
# => [zN_1, zN_0, z1, z0, query_ptr, query_end_ptr, query_ptr, ...]
drop drop
neg swap neg
# => [-z0, -z1, query_ptr, query_end_ptr, query_ptr, ...]
dup.1 exec.constants::trace_domain_generator_ptr mem_load mul
# => [-gz1, -z0, -z1, query_ptr, query_end_ptr, query_ptr, ...]
swap
# => [-z0, -gz1, -z1, query_ptr, query_end_ptr, query_ptr, ...]
dup exec.constants::trace_domain_generator_ptr mem_load mul
# => [-gz0, -z0, -gz1, -z1, query_ptr, query_end_ptr, query_ptr, ...]
swap
# => [-z0, -gz0, -gz1, -z1, query_ptr, query_end_ptr, query_ptr, ...]
# Save to temporary location `tmp1` for later use when computing the denominators
exec.constants::tmp1 mem_storew
# => [Y, query_ptr, query_end_ptr, query_ptr, ...]

push.1
while.true
Expand All @@ -480,19 +510,18 @@ export.compute_deep_composition_polynomial_queries
# Load the (main, aux, constraint)-traces rows associated with the current query and get
# the index of the query.
#
# Cycles: 219
# Cycles: 206
exec.load_query_row
#=>[index, query_ptr, query_end_ptr, ...]
#=>[Y, Y, index, query_ptr, query_end_ptr, query_ptr, ...]


# II)
#
# Compute x := offset * domain_gen^index and denominators (x - z) and (x - gz)
#
# Cycles: 68
# Cycles: 58
exec.compute_denominators
#=> [Z, x, index, query_ptr, query_end_ptr, ...] where Z := [-gz1, x - gz0, -z1, x - z0]

#=> [Z, Y, x, index, query_ptr, query_end_ptr, query_ptr, ...] where Z := [-gz1, x - gz0, -z1, x - z0]

# III)
#
Expand All @@ -503,40 +532,37 @@ export.compute_deep_composition_polynomial_queries

## a) Push pointers
##
## Cycles: 4
push.0
exec.constants::deep_rand_coef_ptr
exec.constants::ood_trace_ptr
exec.constants::current_trace_row_ptr
#=> [P, Z, x, index, query_ptr, query_end_ptr, ...]
## Cycles: 3
swapw exec.constants::tmp9 mem_loadw
#=> [P, Z, x, index, query_ptr, query_end_ptr, query_ptr, ...]
# where P := [CURRENT_TRACE_ROW_PTR, OOD_TRACE_PTR, DEEP_RAND_CC_PTR, 0]

## b) Push the accumulators
##
## Cycles: 4
padw
#=> [Acc, P, Z, x, index, query_ptr, query_end_ptr, ...]
#=> [Acc, P, Z, x, index, query_ptr, query_end_ptr, query_ptr, ...]
#=> where Acc =: [Acc3, Acc2, Acc1, Acc0]

## c) This will be used to mstream the elements T_i(x)
##
## Cycles: 8
padw padw
#=> [Y, Y, Acc, P, Z, x, index, query_ptr, query_end_ptr, ...]
#=> [Y, Y, Acc, P, Z, x, index, query_ptr, query_end_ptr, query_ptr, ...]

## d) Compute the random linear combination
##
## Cycles: 81 + 9 + 25 = 115
exec.combine_main_trace_columns
exec.combine_aux_trace_columns
exec.combine_constraint_poly_columns
#=> [Acc, Z, x, index, query_ptr, query_end_ptr, ...]
#=> [Acc, Z, x, index, query_ptr, query_end_ptr, query_ptr, ...]

## e) Divide by denominators and sum to get final result
##
## Cycles: 38
exec.divide_by_denominators_and_sum
#=> [eval1, eval0, x, index, query_ptr, query_end_ptr, ...]
#=> [eval1, eval0, x, index, query_ptr, query_end_ptr, query_ptr, ...]


# IV)
Expand All @@ -549,22 +575,21 @@ export.compute_deep_composition_polynomial_queries
## Cycles: 4
movup.3 movup.3
exec.constants::domain_offset_inv mul
#=> [poe, index, eval1, eval0, query_ptr, query_end_ptr, ...]
#=> [poe, index, eval1, eval0, query_ptr, query_end_ptr, query_ptr, ...]

## b) Store [eval0, eval1, index, poe]
##
## Cycles: 5
dup.4 add.1 swap.5
mem_storew
#=> [poe, index, eval1, eval0, query_ptr+1, query_end_ptr, ...]
#=> [poe, index, eval1, eval0, query_ptr+1, query_end_ptr, query_ptr, ...]

## c) Prepare stack for next iteration
##
## Cycles: 8
dropw
dup.1 dup.1
## Cycles: 4
dup.5 dup.5
neq
#=> [?, query_ptr+1, query_end_ptr, ...]
end
drop drop
dropw drop drop
end
19 changes: 10 additions & 9 deletions stdlib/asm/crypto/stark/mod.masm
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,24 @@ use.std::crypto::stark::verifier
#! The following simplifying assumptions are currently made:
#! - The blowup is set to 8.
#! - The maximal allowed degree of the remainder polynomial is 7.
#! - Only the input and output stacks, assumed of fixed size equal to 16, are handled in regards
#! to public inputs.
#! - The public inputs are composed of the input and output stacks, of fixed size equal to 16.
#! - There are two trace segments, main and auxiliary. It is assumed that the main trace segment
#! is 73 columns wide while the auxiliary trace segment is 9 columns wide.
#! is 70 columns wide while the auxiliary trace segment is 7 columns wide.
#! - The OOD evaluation frame is composed of two interleaved rows, current and next, each composed
#! of 73 elements representing the main trace portion and 9 elements for the auxiliary trace one.
#! of 70 elements representing the main trace portion and 7 elements for the auxiliary trace one.
#! - To boost soundness, the protocol is run on a quadratic extension field and this means that
#! the OOD evaluation frame is composed of elements in a quadratic extension field i.e. tuples.
#! Similarly, elements of the auxiliary trace are quadratic extension field elements.
#! Similarly, elements of the auxiliary trace are quadratic extension field elements. The random
#! values for computing random linear combinations are also in this extension field.
#! - The following procedure makes use of global memory address beyond 3 * 2^30 and these are
#! defined in `constants.masm`.
#!
#! Input: [log(trace_length), num_queries, log(blowup), grinding]
#! Output: []
#! Input: [log(trace_length), num_queries, log(blowup), grinding, ...]
#! Output: [...]
#!
#! Cycles:
#! 1- Remainder codeword size 32:
#! 5000 + num_queries * (40 + num_fri_layers * 76 + 26 + 463) + 83 * num_fri_layers + 10 * log(trace_length) + 1633
#! 5013 + num_queries * (40 + num_fri_layers * 76 + 26 + 445) + 83 * num_fri_layers + 10 * log(trace_length) + 1633
#! 2- Remainder codeword size 64:
#! 5000 + num_queries * (40 + num_fri_layers * 76 + 26 + 463) + 83 * num_fri_layers + 10 * log(trace_length) + 3109
#! 5013 + num_queries * (40 + num_fri_layers * 76 + 26 + 445) + 83 * num_fri_layers + 10 * log(trace_length) + 3109
export.verifier::verify
32 changes: 32 additions & 0 deletions stdlib/asm/crypto/stark/utils.masm
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,35 @@ export.compute_lde_generator
exp.u32
# => [domain_gen, ..]
end

#! Validates the inputs to the recursive verifier.
#!
#! Input: [log(trace_length), num_queries, log(blowup), grinding, ...]
#! Output: [log(trace_length), num_queries, log(blowup), grinding, ...]
#!
#! Cycles: 28
export.validate_inputs
# 1) Assert that all inputs are u32 so that we can use u32 operations in what follows
dupw
u32assertw
# => [log(trace_length), num_queries, log(blowup), grinding, ...]

# 2) Assert that the trace length is at most 29. The 2-adicity of our field is 32 and since
# the blowup factor is 8, we need to make sure that the LDE size is at most 2^32.
# We also check that the trace length is greater than the minimal length supported i.e., 2^6.
dup u32lt.30 assert
u32gt.5 assert

# 3) Assert that the number of FRI queries is at most 150. This restriction is a soft one
# and is due to the memory layout in the `constants.masm` files but can be updated
# therein.
# We also make sure that there is at least one FRI query.
dup u32lt.151 assert
u32gt.0 assert

# 4) Assert that the the log(blowup) is 3
eq.3 assert

# 5) Assert that the grinding factor is at most 31
u32lt.32 assert
end
Loading

0 comments on commit 5944710

Please sign in to comment.