From efd46148b0d764137a96c7f454dbd6df6c490c97 Mon Sep 17 00:00:00 2001 From: Jaremy Creechley Date: Thu, 23 May 2024 19:28:17 +0300 Subject: [PATCH] Pr add prover benchmark tool (#790) * initial setup * reorg * figuring out basic shell commands * benchmarks * benchmarks * Sets up environment for running benchmarks * updates * integrate setup and proving * updates * adding outputs * cleanup * check failure * benchmarks * benchmarks * benchmarks * benchmarks * benchmarks * benchmarks * formatting * fix running larger sizes * use larger ceremony file size * use larger ceremony file size * use larger ceremony file size * restore benchmarks * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * refactor env * refactor env * refactor env * refactor env * refactor env * rename * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * readme * readme * merge * initial splitout of codex ark prover cli * opts * copying nimcli opts * copying nimcli opts * copying nimcli opts * updating ark cli * updating ark cli * updating ark cli * updating ark cli * updating ark cli * updating ark cli * updating ark cli * updating ark cli * docs * remove file * add param * add benchmarkLoops param * update benchmark formatting * update benchmark formatting * update benchmark formatting * update benchmark formatting * fix naming * fix serde version * Apply suggestions from code review cleanup wording Signed-off-by: Dmitriy Ryajov --------- Signed-off-by: Dmitriy Ryajov Co-authored-by: Dmitriy Ryajov --- benchmarks/.gitignore | 2 + benchmarks/README.md | 33 ++++++ benchmarks/config.nims | 15 +++ benchmarks/create_circuits.nim | 187 +++++++++++++++++++++++++++++++++ benchmarks/run_benchmarks.nim | 105 ++++++++++++++++++ benchmarks/utils.nim | 76 ++++++++++++++ 6 files changed, 418 insertions(+) create mode 100644 benchmarks/.gitignore create mode 100644 benchmarks/README.md create mode 100644 benchmarks/config.nims create mode 100644 benchmarks/create_circuits.nim create mode 100644 benchmarks/run_benchmarks.nim create mode 100644 benchmarks/utils.nim diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore new file mode 100644 index 000000000..6f6971524 --- /dev/null +++ b/benchmarks/.gitignore @@ -0,0 +1,2 @@ +ceremony +circuit_bench_* diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 000000000..0cff64e93 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,33 @@ + +## Benchmark Runner + +Modify `runAllBenchmarks` proc in `run_benchmarks.nim` to the desired parameters and variations. + +Then run it: + +```sh +nim c -r run_benchmarks +``` + +By default all circuit files for each combinations of circuit args will be generated in a unique folder named like: + nim-codex/benchmarks/circuit_bench_depth32_maxslots256_cellsize2048_blocksize65536_nsamples9_entropy1234567_seed12345_nslots11_ncells512_index3 + +Generating the circuit files often takes longer than running benchmarks, so caching the results allows re-running the benchmark as needed. + +You can modify the `CircuitArgs` and `CircuitEnv` objects in `runAllBenchMarks` to suite your needs. See `create_circuits.nim` for their definition. + +The runner executes all commands relative to the `nim-codex` repo. This simplifies finding the correct circuit includes paths, etc. `CircuitEnv` sets all of this. + +## Codex Ark Circom CLI + +Runs Codex's prover setup with Ark / Circom. + +Compile: +```sh +nim c codex_ark_prover_cli.nim +``` + +Run to see usage: +```sh +./codex_ark_prover_cli.nim -h +``` diff --git a/benchmarks/config.nims b/benchmarks/config.nims new file mode 100644 index 000000000..c5c2c5dc4 --- /dev/null +++ b/benchmarks/config.nims @@ -0,0 +1,15 @@ +--path: + ".." +--path: + "../tests" +--threads: + on +--tlsEmulation: + off +--d: + release + +# when not defined(chronicles_log_level): +# --define:"chronicles_log_level:NONE" # compile all log statements +# --define:"chronicles_sinks:textlines[dynamic]" # allow logs to be filtered at runtime +# --"import":"logging" # ensure that logging is ignored at runtime diff --git a/benchmarks/create_circuits.nim b/benchmarks/create_circuits.nim new file mode 100644 index 000000000..911dcd515 --- /dev/null +++ b/benchmarks/create_circuits.nim @@ -0,0 +1,187 @@ +import std/[hashes, json, strutils, strformat, os, osproc, uri] + +import ./utils + +type + CircuitEnv* = object + nimCircuitCli*: string + circuitDirIncludes*: string + ptauPath*: string + ptauUrl*: Uri + codexProjDir*: string + + CircuitArgs* = object + depth*: int + maxslots*: int + cellsize*: int + blocksize*: int + nsamples*: int + entropy*: int + seed*: int + nslots*: int + ncells*: int + index*: int + +proc findCodexProjectDir(): string = + ## find codex proj dir -- assumes this script is in codex/benchmarks + result = currentSourcePath().parentDir.parentDir + +func default*(tp: typedesc[CircuitEnv]): CircuitEnv = + let codexDir = findCodexProjectDir() + result.nimCircuitCli = + codexDir / "vendor" / "codex-storage-proofs-circuits" / "reference" / "nim" / + "proof_input" / "cli" + result.circuitDirIncludes = + codexDir / "vendor" / "codex-storage-proofs-circuits" / "circuit" + result.ptauPath = + codexDir / "benchmarks" / "ceremony" / "powersOfTau28_hez_final_23.ptau" + result.ptauUrl = "https://storage.googleapis.com/zkevm/ptau".parseUri + result.codexProjDir = codexDir + +proc check*(env: var CircuitEnv) = + ## check that the CWD of script is in the codex parent + let codexProjDir = findCodexProjectDir() + echo "\n\nFound project dir: ", codexProjDir + + let snarkjs = findExe("snarkjs") + if snarkjs == "": + echo dedent""" + ERROR: must install snarkjs first + + npm install -g snarkjs@latest + """ + + let circom = findExe("circom") + if circom == "": + echo dedent""" + ERROR: must install circom first + + git clone https://github.com/iden3/circom.git + cargo install --path circom + """ + + if snarkjs == "" or circom == "": + quit 2 + + echo "Found SnarkJS: ", snarkjs + echo "Found Circom: ", circom + + if not env.nimCircuitCli.fileExists: + echo "Nim Circuit reference cli not found: ", env.nimCircuitCli + echo "Building Circuit reference cli...\n" + withDir env.nimCircuitCli.parentDir: + runit "nimble build -d:release --styleCheck:off cli" + echo "CWD: ", getCurrentDir() + assert env.nimCircuitCli.fileExists() + + echo "Found NimCircuitCli: ", env.nimCircuitCli + echo "Found Circuit Path: ", env.circuitDirIncludes + echo "Found PTAU file: ", env.ptauPath + +proc downloadPtau*(ptauPath: string, ptauUrl: Uri) = + ## download ptau file using curl if needed + if not ptauPath.fileExists: + echo "Ceremony file not found, downloading..." + createDir ptauPath.parentDir + withDir ptauPath.parentDir: + runit fmt"curl --output '{ptauPath}' '{$ptauUrl}/{ptauPath.splitPath().tail}'" + else: + echo "Found PTAU file at: ", ptauPath + +proc getCircuitBenchStr*(args: CircuitArgs): string = + for f, v in fieldPairs(args): + result &= "_" & f & $v + +proc getCircuitBenchPath*(args: CircuitArgs, env: CircuitEnv): string = + ## generate folder name for unique circuit args + result = env.codexProjDir / "benchmarks/circuit_bench" & getCircuitBenchStr(args) + +proc generateCircomAndSamples*(args: CircuitArgs, env: CircuitEnv, name: string) = + ## run nim circuit and sample generator + var cliCmd = env.nimCircuitCli + for f, v in fieldPairs(args): + cliCmd &= " --" & f & "=" & $v + + if not "input.json".fileExists: + echo "Generating Circom Files..." + runit fmt"{cliCmd} -v --circom={name}.circom --output=input.json" + +proc createCircuit*( + args: CircuitArgs, + env: CircuitEnv, + name = "proof_main", + circBenchDir = getCircuitBenchPath(args, env), + someEntropy = "some_entropy_75289v3b7rcawcsyiur", + doGenerateWitness = false, +): tuple[dir: string, name: string] = + ## Generates all the files needed for to run a proof circuit. Downloads the PTAU file if needed. + ## + ## All needed circuit files will be generated as needed. + ## They will be located in `circBenchDir` which defaults to a folder like: + ## `nim-codex/benchmarks/circuit_bench_depth32_maxslots256_cellsize2048_blocksize65536_nsamples9_entropy1234567_seed12345_nslots11_ncells512_index3` + ## with all the given CircuitArgs. + ## + let circdir = circBenchDir + + downloadPtau env.ptauPath, env.ptauUrl + + echo "Creating circuit dir: ", circdir + createDir circdir + withDir circdir: + writeFile("circuit_params.json", pretty(%*args)) + let + inputs = circdir / "input.json" + zkey = circdir / fmt"{name}.zkey" + wasm = circdir / fmt"{name}.wasm" + r1cs = circdir / fmt"{name}.r1cs" + wtns = circdir / fmt"{name}.wtns" + + generateCircomAndSamples(args, env, name) + + if not wasm.fileExists or not r1cs.fileExists: + runit fmt"circom --r1cs --wasm --O2 -l{env.circuitDirIncludes} {name}.circom" + moveFile fmt"{name}_js" / fmt"{name}.wasm", fmt"{name}.wasm" + echo "Found wasm: ", wasm + echo "Found r1cs: ", r1cs + + if not zkey.fileExists: + echo "ZKey not found, generating..." + putEnv "NODE_OPTIONS", "--max-old-space-size=8192" + if not fmt"{name}_0000.zkey".fileExists: + runit fmt"snarkjs groth16 setup {r1cs} {env.ptauPath} {name}_0000.zkey" + echo fmt"Generated {name}_0000.zkey" + + let cmd = + fmt"snarkjs zkey contribute {name}_0000.zkey {name}_0001.zkey --name='1st Contributor Name'" + echo "CMD: ", cmd + let cmdRes = execCmdEx(cmd, options = {}, input = someEntropy & "\n") + assert cmdRes.exitCode == 0 + + moveFile fmt"{name}_0001.zkey", fmt"{name}.zkey" + removeFile fmt"{name}_0000.zkey" + + if not wtns.fileExists and doGenerateWitness: + runit fmt"node generate_witness.js {wtns} ../input.json ../witness.wtns" + + return (circdir, name) + +when isMainModule: + echo "findCodexProjectDir: ", findCodexProjectDir() + ## test run creating a circuit + var env = CircuitEnv.default() + env.check() + + let args = CircuitArgs( + depth: 32, # maximum depth of the slot tree + maxslots: 256, # maximum number of slots + cellsize: 2048, # cell size in bytes + blocksize: 65536, # block size in bytes + nsamples: 5, # number of samples to prove + entropy: 1234567, # external randomness + seed: 12345, # seed for creating fake data + nslots: 11, # number of slots in the dataset + index: 3, # which slot we prove (0..NSLOTS-1) + ncells: 512, # number of cells in this slot + ) + let benchenv = createCircuit(args, env) + echo "\nBench dir:\n", benchenv diff --git a/benchmarks/run_benchmarks.nim b/benchmarks/run_benchmarks.nim new file mode 100644 index 000000000..f69c13e06 --- /dev/null +++ b/benchmarks/run_benchmarks.nim @@ -0,0 +1,105 @@ +import std/[sequtils, strformat, os, options, importutils] +import std/[times, os, strutils, terminal] + +import pkg/questionable +import pkg/questionable/results +import pkg/datastore + +import pkg/codex/[rng, stores, merkletree, codextypes, slots] +import pkg/codex/utils/[json, poseidon2digest] +import pkg/codex/slots/[builder, sampler/utils, backends/helpers] +import pkg/constantine/math/[arithmetic, io/io_bigints, io/io_fields] + +import ./utils +import ./create_circuits + +type CircuitFiles* = object + r1cs*: string + wasm*: string + zkey*: string + inputs*: string + +proc runArkCircom(args: CircuitArgs, files: CircuitFiles, benchmarkLoops: int) = + echo "Loading sample proof..." + var + inputData = files.inputs.readFile() + inputJson = !JsonNode.parse(inputData) + proofInputs = Poseidon2Hash.jsonToProofInput(inputJson) + circom = CircomCompat.init( + files.r1cs, + files.wasm, + files.zkey, + slotDepth = args.depth, + numSamples = args.nsamples, + ) + defer: + circom.release() # this comes from the rust FFI + + echo "Sample proof loaded..." + echo "Proving..." + + let nameArgs = getCircuitBenchStr(args) + var proof: CircomProof + benchmark fmt"prover-{nameArgs}", benchmarkLoops: + proof = circom.prove(proofInputs).tryGet + + var verRes: bool + benchmark fmt"verify-{nameArgs}", benchmarkLoops: + verRes = circom.verify(proof, proofInputs).tryGet + echo "verify result: ", verRes + +proc runRapidSnark(args: CircuitArgs, files: CircuitFiles, benchmarkLoops: int) = + # time rapidsnark ${CIRCUIT_MAIN}.zkey witness.wtns proof.json public.json + + echo "generating the witness..." + ## TODO + +proc runBenchmark(args: CircuitArgs, env: CircuitEnv, benchmarkLoops: int) = + ## execute benchmarks given a set of args + ## will create a folder in `benchmarks/circuit_bench_$(args)` + ## + + let env = createCircuit(args, env) + + ## TODO: copy over testcircomcompat proving + let files = CircuitFiles( + r1cs: env.dir / fmt"{env.name}.r1cs", + wasm: env.dir / fmt"{env.name}.wasm", + zkey: env.dir / fmt"{env.name}.zkey", + inputs: env.dir / fmt"input.json", + ) + + runArkCircom(args, files, benchmarkLoops) + +proc runAllBenchmarks*() = + echo "Running benchmark" + # setup() + var env = CircuitEnv.default() + env.check() + + var args = CircuitArgs( + depth: 32, # maximum depth of the slot tree + maxslots: 256, # maximum number of slots + cellsize: 2048, # cell size in bytes + blocksize: 65536, # block size in bytes + nsamples: 1, # number of samples to prove + entropy: 1234567, # external randomness + seed: 12345, # seed for creating fake data + nslots: 11, # number of slots in the dataset + index: 3, # which slot we prove (0..NSLOTS-1) + ncells: 512, # number of cells in this slot + ) + + let + numberSamples = 3 + benchmarkLoops = 5 + + for i in 1 .. numberSamples: + args.nsamples = i + stdout.styledWriteLine(fgYellow, "\nbenchmarking args: ", $args) + runBenchmark(args, env, benchmarkLoops) + + printBenchMarkSummaries() + +when isMainModule: + runAllBenchmarks() diff --git a/benchmarks/utils.nim b/benchmarks/utils.nim new file mode 100644 index 000000000..af5cdc254 --- /dev/null +++ b/benchmarks/utils.nim @@ -0,0 +1,76 @@ +import std/tables + +template withDir*(dir: string, blk: untyped) = + ## set working dir for duration of blk + let prev = getCurrentDir() + try: + setCurrentDir(dir) + `blk` + finally: + setCurrentDir(prev) + +template runit*(cmd: string) = + ## run shell commands and verify it runs without an error code + echo "RUNNING: ", cmd + let cmdRes = execShellCmd(cmd) + echo "STATUS: ", cmdRes + assert cmdRes == 0 + +var benchRuns* = newTable[string, tuple[avgTimeSec: float, count: int]]() + +func avg(vals: openArray[float]): float = + for v in vals: + result += v / vals.len().toFloat() + +template benchmark*(name: untyped, count: int, blk: untyped) = + let benchmarkName: string = name + ## simple benchmarking of a block of code + var runs = newSeqOfCap[float](count) + for i in 1 .. count: + block: + let t0 = epochTime() + `blk` + let elapsed = epochTime() - t0 + runs.add elapsed + + var elapsedStr = "" + for v in runs: + elapsedStr &= ", " & v.formatFloat(format = ffDecimal, precision = 3) + stdout.styledWriteLine( + fgGreen, "CPU Time [", benchmarkName, "] ", "avg(", $count, "): ", elapsedStr, " s" + ) + benchRuns[benchmarkName] = (runs.avg(), count) + +template printBenchMarkSummaries*(printRegular=true, printTsv=true) = + if printRegular: + echo "" + for k, v in benchRuns: + echo "Benchmark average run ", v.avgTimeSec, " for ", v.count, " runs ", "for ", k + + if printTsv: + echo "" + echo "name", "\t", "avgTimeSec", "\t", "count" + for k, v in benchRuns: + echo k, "\t", v.avgTimeSec, "\t", v.count + + +import std/math + +func floorLog2*(x: int): int = + var k = -1 + var y = x + while (y > 0): + k += 1 + y = y shr 1 + return k + +func ceilingLog2*(x: int): int = + if (x == 0): + return -1 + else: + return (floorLog2(x - 1) + 1) + +func checkPowerOfTwo*(x: int, what: string): int = + let k = ceilingLog2(x) + assert(x == 2 ^ k, ("`" & what & "` is expected to be a power of 2")) + return x