From efd46148b0d764137a96c7f454dbd6df6c490c97 Mon Sep 17 00:00:00 2001
From: Jaremy Creechley <creechley@gmail.com>
Date: Thu, 23 May 2024 19:28:17 +0300
Subject: [PATCH] Pr add prover benchmark tool (#790)

* initial setup

* reorg

* figuring out basic shell commands

* benchmarks

* benchmarks

* Sets up environment for running benchmarks

* updates

* integrate setup and proving

* updates

* adding outputs

* cleanup

* check failure

* benchmarks

* benchmarks

* benchmarks

* benchmarks

* benchmarks

* benchmarks

* formatting

* fix running larger sizes

* use larger ceremony file size

* use larger ceremony file size

* use larger ceremony file size

* restore benchmarks

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* refactor env

* refactor env

* refactor env

* refactor env

* refactor env

* rename

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* readme

* readme

* merge

* initial splitout of codex ark prover cli

* opts

* copying nimcli opts

* copying nimcli opts

* copying nimcli opts

* updating ark cli

* updating ark cli

* updating ark cli

* updating ark cli

* updating ark cli

* updating ark cli

* updating ark cli

* updating ark cli

* docs

* remove file

* add param

* add benchmarkLoops param

* update benchmark formatting

* update benchmark formatting

* update benchmark formatting

* update benchmark formatting

* fix naming

* fix serde version

* Apply suggestions from code review

cleanup wording

Signed-off-by: Dmitriy Ryajov <dryajov@gmail.com>

---------

Signed-off-by: Dmitriy Ryajov <dryajov@gmail.com>
Co-authored-by: Dmitriy Ryajov <dryajov@gmail.com>
---
 benchmarks/.gitignore          |   2 +
 benchmarks/README.md           |  33 ++++++
 benchmarks/config.nims         |  15 +++
 benchmarks/create_circuits.nim | 187 +++++++++++++++++++++++++++++++++
 benchmarks/run_benchmarks.nim  | 105 ++++++++++++++++++
 benchmarks/utils.nim           |  76 ++++++++++++++
 6 files changed, 418 insertions(+)
 create mode 100644 benchmarks/.gitignore
 create mode 100644 benchmarks/README.md
 create mode 100644 benchmarks/config.nims
 create mode 100644 benchmarks/create_circuits.nim
 create mode 100644 benchmarks/run_benchmarks.nim
 create mode 100644 benchmarks/utils.nim

diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
new file mode 100644
index 000000000..6f6971524
--- /dev/null
+++ b/benchmarks/.gitignore
@@ -0,0 +1,2 @@
+ceremony
+circuit_bench_*
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 000000000..0cff64e93
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,33 @@
+
+## Benchmark Runner
+
+Modify `runAllBenchmarks` proc in `run_benchmarks.nim` to the desired parameters and variations.
+
+Then run it:
+
+```sh
+nim c -r run_benchmarks
+```
+
+By default all circuit files for each combinations of circuit args will be generated in a unique folder named like:
+    nim-codex/benchmarks/circuit_bench_depth32_maxslots256_cellsize2048_blocksize65536_nsamples9_entropy1234567_seed12345_nslots11_ncells512_index3
+
+Generating the circuit files often takes longer than running benchmarks, so caching the results allows re-running the benchmark as needed.
+
+You can modify the `CircuitArgs` and `CircuitEnv` objects in `runAllBenchMarks` to suite your needs. See `create_circuits.nim` for their definition.
+
+The runner executes all commands relative to the `nim-codex` repo. This simplifies finding the correct circuit includes paths, etc. `CircuitEnv` sets all of this.
+
+## Codex Ark Circom CLI
+
+Runs Codex's prover setup with Ark / Circom.
+
+Compile:
+```sh
+nim c codex_ark_prover_cli.nim
+```
+
+Run to see usage:
+```sh
+./codex_ark_prover_cli.nim -h
+```
diff --git a/benchmarks/config.nims b/benchmarks/config.nims
new file mode 100644
index 000000000..c5c2c5dc4
--- /dev/null
+++ b/benchmarks/config.nims
@@ -0,0 +1,15 @@
+--path:
+  ".."
+--path:
+  "../tests"
+--threads:
+  on
+--tlsEmulation:
+  off
+--d:
+  release
+
+# when not defined(chronicles_log_level):
+#   --define:"chronicles_log_level:NONE" # compile all log statements
+#   --define:"chronicles_sinks:textlines[dynamic]" # allow logs to be filtered at runtime
+#   --"import":"logging" # ensure that logging is ignored at runtime
diff --git a/benchmarks/create_circuits.nim b/benchmarks/create_circuits.nim
new file mode 100644
index 000000000..911dcd515
--- /dev/null
+++ b/benchmarks/create_circuits.nim
@@ -0,0 +1,187 @@
+import std/[hashes, json, strutils, strformat, os, osproc, uri]
+
+import ./utils
+
+type
+  CircuitEnv* = object
+    nimCircuitCli*: string
+    circuitDirIncludes*: string
+    ptauPath*: string
+    ptauUrl*: Uri
+    codexProjDir*: string
+
+  CircuitArgs* = object
+    depth*: int
+    maxslots*: int
+    cellsize*: int
+    blocksize*: int
+    nsamples*: int
+    entropy*: int
+    seed*: int
+    nslots*: int
+    ncells*: int
+    index*: int
+
+proc findCodexProjectDir(): string =
+  ## find codex proj dir -- assumes this script is in codex/benchmarks
+  result = currentSourcePath().parentDir.parentDir
+
+func default*(tp: typedesc[CircuitEnv]): CircuitEnv =
+  let codexDir = findCodexProjectDir()
+  result.nimCircuitCli =
+    codexDir / "vendor" / "codex-storage-proofs-circuits" / "reference" / "nim" /
+    "proof_input" / "cli"
+  result.circuitDirIncludes =
+    codexDir / "vendor" / "codex-storage-proofs-circuits" / "circuit"
+  result.ptauPath =
+    codexDir / "benchmarks" / "ceremony" / "powersOfTau28_hez_final_23.ptau"
+  result.ptauUrl = "https://storage.googleapis.com/zkevm/ptau".parseUri
+  result.codexProjDir = codexDir
+
+proc check*(env: var CircuitEnv) =
+  ## check that the CWD of script is in the codex parent
+  let codexProjDir = findCodexProjectDir()
+  echo "\n\nFound project dir: ", codexProjDir
+
+  let snarkjs = findExe("snarkjs")
+  if snarkjs == "":
+    echo dedent"""
+    ERROR: must install snarkjs first
+
+      npm install -g snarkjs@latest
+    """
+
+  let circom = findExe("circom")
+  if circom == "":
+    echo dedent"""
+    ERROR: must install circom first
+
+      git clone https://github.com/iden3/circom.git
+      cargo install --path circom
+    """
+
+  if snarkjs == "" or circom == "":
+    quit 2
+
+  echo "Found SnarkJS: ", snarkjs
+  echo "Found Circom: ", circom
+
+  if not env.nimCircuitCli.fileExists:
+    echo "Nim Circuit reference cli not found: ", env.nimCircuitCli
+    echo "Building Circuit reference cli...\n"
+    withDir env.nimCircuitCli.parentDir:
+      runit "nimble build -d:release --styleCheck:off cli"
+    echo "CWD: ", getCurrentDir()
+    assert env.nimCircuitCli.fileExists()
+
+  echo "Found NimCircuitCli: ", env.nimCircuitCli
+  echo "Found Circuit Path: ", env.circuitDirIncludes
+  echo "Found PTAU file: ", env.ptauPath
+
+proc downloadPtau*(ptauPath: string, ptauUrl: Uri) =
+  ## download ptau file using curl if needed
+  if not ptauPath.fileExists:
+    echo "Ceremony file not found, downloading..."
+    createDir ptauPath.parentDir
+    withDir ptauPath.parentDir:
+      runit fmt"curl --output '{ptauPath}' '{$ptauUrl}/{ptauPath.splitPath().tail}'"
+  else:
+    echo "Found PTAU file at: ", ptauPath
+
+proc getCircuitBenchStr*(args: CircuitArgs): string =
+  for f, v in fieldPairs(args):
+    result &= "_" & f & $v
+
+proc getCircuitBenchPath*(args: CircuitArgs, env: CircuitEnv): string =
+  ## generate folder name for unique circuit args
+  result = env.codexProjDir / "benchmarks/circuit_bench" & getCircuitBenchStr(args)
+
+proc generateCircomAndSamples*(args: CircuitArgs, env: CircuitEnv, name: string) =
+  ## run nim circuit and sample generator 
+  var cliCmd = env.nimCircuitCli
+  for f, v in fieldPairs(args):
+    cliCmd &= " --" & f & "=" & $v
+
+  if not "input.json".fileExists:
+    echo "Generating Circom Files..."
+    runit fmt"{cliCmd} -v --circom={name}.circom --output=input.json"
+
+proc createCircuit*(
+    args: CircuitArgs,
+    env: CircuitEnv,
+    name = "proof_main",
+    circBenchDir = getCircuitBenchPath(args, env),
+    someEntropy = "some_entropy_75289v3b7rcawcsyiur",
+    doGenerateWitness = false,
+): tuple[dir: string, name: string] =
+  ## Generates all the files needed for to run a proof circuit. Downloads the PTAU file if needed.
+  ## 
+  ## All needed circuit files will be generated as needed. 
+  ## They will be located in `circBenchDir` which defaults to a folder like:
+  ##    `nim-codex/benchmarks/circuit_bench_depth32_maxslots256_cellsize2048_blocksize65536_nsamples9_entropy1234567_seed12345_nslots11_ncells512_index3`
+  ## with all the given CircuitArgs.
+  ## 
+  let circdir = circBenchDir
+
+  downloadPtau env.ptauPath, env.ptauUrl
+
+  echo "Creating circuit dir: ", circdir
+  createDir circdir
+  withDir circdir:
+    writeFile("circuit_params.json", pretty(%*args))
+    let
+      inputs = circdir / "input.json"
+      zkey = circdir / fmt"{name}.zkey"
+      wasm = circdir / fmt"{name}.wasm"
+      r1cs = circdir / fmt"{name}.r1cs"
+      wtns = circdir / fmt"{name}.wtns"
+
+    generateCircomAndSamples(args, env, name)
+
+    if not wasm.fileExists or not r1cs.fileExists:
+      runit fmt"circom --r1cs --wasm --O2 -l{env.circuitDirIncludes} {name}.circom"
+      moveFile fmt"{name}_js" / fmt"{name}.wasm", fmt"{name}.wasm"
+    echo "Found wasm: ", wasm
+    echo "Found r1cs: ", r1cs
+
+    if not zkey.fileExists:
+      echo "ZKey not found, generating..."
+      putEnv "NODE_OPTIONS", "--max-old-space-size=8192"
+      if not fmt"{name}_0000.zkey".fileExists:
+        runit fmt"snarkjs groth16 setup {r1cs} {env.ptauPath} {name}_0000.zkey"
+        echo fmt"Generated {name}_0000.zkey"
+
+      let cmd =
+        fmt"snarkjs zkey contribute {name}_0000.zkey {name}_0001.zkey --name='1st Contributor Name'"
+      echo "CMD: ", cmd
+      let cmdRes = execCmdEx(cmd, options = {}, input = someEntropy & "\n")
+      assert cmdRes.exitCode == 0
+
+      moveFile fmt"{name}_0001.zkey", fmt"{name}.zkey"
+      removeFile fmt"{name}_0000.zkey"
+
+    if not wtns.fileExists and doGenerateWitness:
+      runit fmt"node generate_witness.js {wtns} ../input.json ../witness.wtns"
+
+  return (circdir, name)
+
+when isMainModule:
+  echo "findCodexProjectDir: ", findCodexProjectDir()
+  ## test run creating a circuit
+  var env = CircuitEnv.default()
+  env.check()
+
+  let args = CircuitArgs(
+    depth: 32, # maximum depth of the slot tree 
+    maxslots: 256, # maximum number of slots
+    cellsize: 2048, # cell size in bytes 
+    blocksize: 65536, # block size in bytes 
+    nsamples: 5, # number of samples to prove
+    entropy: 1234567, # external randomness
+    seed: 12345, # seed for creating fake data
+    nslots: 11, # number of slots in the dataset
+    index: 3, # which slot we prove (0..NSLOTS-1)
+    ncells: 512, # number of cells in this slot
+  )
+  let benchenv = createCircuit(args, env)
+  echo "\nBench dir:\n", benchenv
diff --git a/benchmarks/run_benchmarks.nim b/benchmarks/run_benchmarks.nim
new file mode 100644
index 000000000..f69c13e06
--- /dev/null
+++ b/benchmarks/run_benchmarks.nim
@@ -0,0 +1,105 @@
+import std/[sequtils, strformat, os, options, importutils]
+import std/[times, os, strutils, terminal]
+
+import pkg/questionable
+import pkg/questionable/results
+import pkg/datastore
+
+import pkg/codex/[rng, stores, merkletree, codextypes, slots]
+import pkg/codex/utils/[json, poseidon2digest]
+import pkg/codex/slots/[builder, sampler/utils, backends/helpers]
+import pkg/constantine/math/[arithmetic, io/io_bigints, io/io_fields]
+
+import ./utils
+import ./create_circuits
+
+type CircuitFiles* = object
+  r1cs*: string
+  wasm*: string
+  zkey*: string
+  inputs*: string
+
+proc runArkCircom(args: CircuitArgs, files: CircuitFiles, benchmarkLoops: int) =
+  echo "Loading sample proof..."
+  var
+    inputData = files.inputs.readFile()
+    inputJson = !JsonNode.parse(inputData)
+    proofInputs = Poseidon2Hash.jsonToProofInput(inputJson)
+    circom = CircomCompat.init(
+      files.r1cs,
+      files.wasm,
+      files.zkey,
+      slotDepth = args.depth,
+      numSamples = args.nsamples,
+    )
+  defer:
+    circom.release() # this comes from the rust FFI
+
+  echo "Sample proof loaded..."
+  echo "Proving..."
+
+  let nameArgs = getCircuitBenchStr(args)
+  var proof: CircomProof
+  benchmark fmt"prover-{nameArgs}", benchmarkLoops:
+    proof = circom.prove(proofInputs).tryGet
+
+  var verRes: bool
+  benchmark fmt"verify-{nameArgs}", benchmarkLoops:
+    verRes = circom.verify(proof, proofInputs).tryGet
+  echo "verify result: ", verRes
+
+proc runRapidSnark(args: CircuitArgs, files: CircuitFiles, benchmarkLoops: int) =
+  # time rapidsnark ${CIRCUIT_MAIN}.zkey witness.wtns proof.json public.json
+
+  echo "generating the witness..."
+  ## TODO
+
+proc runBenchmark(args: CircuitArgs, env: CircuitEnv, benchmarkLoops: int) =
+  ## execute benchmarks given a set of args
+  ## will create a folder in `benchmarks/circuit_bench_$(args)`
+  ## 
+
+  let env = createCircuit(args, env)
+
+  ## TODO: copy over testcircomcompat proving
+  let files = CircuitFiles(
+    r1cs: env.dir / fmt"{env.name}.r1cs",
+    wasm: env.dir / fmt"{env.name}.wasm",
+    zkey: env.dir / fmt"{env.name}.zkey",
+    inputs: env.dir / fmt"input.json",
+  )
+
+  runArkCircom(args, files, benchmarkLoops)
+
+proc runAllBenchmarks*() =
+  echo "Running benchmark"
+  # setup()
+  var env = CircuitEnv.default()
+  env.check()
+
+  var args = CircuitArgs(
+    depth: 32, # maximum depth of the slot tree 
+    maxslots: 256, # maximum number of slots  
+    cellsize: 2048, # cell size in bytes 
+    blocksize: 65536, # block size in bytes 
+    nsamples: 1, # number of samples to prove
+    entropy: 1234567, # external randomness
+    seed: 12345, # seed for creating fake data
+    nslots: 11, # number of slots in the dataset
+    index: 3, # which slot we prove (0..NSLOTS-1)
+    ncells: 512, # number of cells in this slot
+  )
+
+  let
+    numberSamples = 3
+    benchmarkLoops = 5
+
+  for i in 1 .. numberSamples:
+    args.nsamples = i
+    stdout.styledWriteLine(fgYellow, "\nbenchmarking args: ", $args)
+    runBenchmark(args, env, benchmarkLoops)
+
+  printBenchMarkSummaries()
+
+when isMainModule:
+  runAllBenchmarks()
diff --git a/benchmarks/utils.nim b/benchmarks/utils.nim
new file mode 100644
index 000000000..af5cdc254
--- /dev/null
+++ b/benchmarks/utils.nim
@@ -0,0 +1,76 @@
+import std/tables
+
+template withDir*(dir: string, blk: untyped) =
+  ## set working dir for duration of blk
+  let prev = getCurrentDir()
+  try:
+    setCurrentDir(dir)
+    `blk`
+  finally:
+    setCurrentDir(prev)
+
+template runit*(cmd: string) =
+  ## run shell commands and verify it runs without an error code
+  echo "RUNNING: ", cmd
+  let cmdRes = execShellCmd(cmd)
+  echo "STATUS: ", cmdRes
+  assert cmdRes == 0
+
+var benchRuns* = newTable[string, tuple[avgTimeSec: float, count: int]]()
+
+func avg(vals: openArray[float]): float =
+  for v in vals:
+    result += v / vals.len().toFloat()
+
+template benchmark*(name: untyped, count: int, blk: untyped) =
+  let benchmarkName: string = name
+  ## simple benchmarking of a block of code
+  var runs = newSeqOfCap[float](count)
+  for i in 1 .. count:
+    block:
+      let t0 = epochTime()
+      `blk`
+      let elapsed = epochTime() - t0
+      runs.add elapsed
+
+  var elapsedStr = ""
+  for v in runs:
+    elapsedStr &= ", " & v.formatFloat(format = ffDecimal, precision = 3)
+  stdout.styledWriteLine(
+    fgGreen, "CPU Time [", benchmarkName, "] ", "avg(", $count, "): ", elapsedStr, " s"
+  )
+  benchRuns[benchmarkName] = (runs.avg(), count)
+
+template printBenchMarkSummaries*(printRegular=true, printTsv=true) =
+  if printRegular:
+    echo ""
+    for k, v in benchRuns:
+      echo "Benchmark average run ", v.avgTimeSec, " for ", v.count, " runs ", "for ", k
+    
+  if printTsv:
+    echo ""
+    echo "name", "\t", "avgTimeSec", "\t", "count"
+    for k, v in benchRuns:
+      echo k, "\t", v.avgTimeSec, "\t", v.count
+
+
+import std/math
+
+func floorLog2*(x: int): int =
+  var k = -1
+  var y = x
+  while (y > 0):
+    k += 1
+    y = y shr 1
+  return k
+
+func ceilingLog2*(x: int): int =
+  if (x == 0):
+    return -1
+  else:
+    return (floorLog2(x - 1) + 1)
+
+func checkPowerOfTwo*(x: int, what: string): int =
+  let k = ceilingLog2(x)
+  assert(x == 2 ^ k, ("`" & what & "` is expected to be a power of 2"))
+  return x