Merge branch 'master' into feature/ceremony-files

# Conflicts: # .gitmodules # docker/codex.Dockerfile
codex-storage · Jun 2, 2024 · 9d146c8 · 9d146c8
2 parents c3fb06b + 3246c43
commit 9d146c8
Show file tree

Hide file tree

Showing 59 changed files with 1,591 additions and 522 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -212,6 +212,6 @@
 [submodule "vendor/nim-serde"]
 	path = vendor/nim-serde
 	url = https://github.com/codex-storage/nim-serde.git
-[submodule "vendor/zip"]
-	path = vendor/zip
-	url = https://github.com/nim-lang/zip.git
+[submodule "vendor/nim-leveldbstatic"]
+	path = vendor/nim-leveldbstatic
+	url = https://github.com/codex-storage/nim-leveldb.git
diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
@@ -0,0 +1,2 @@
+ceremony
+circuit_bench_*
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,33 @@
+
+## Benchmark Runner
+
+Modify `runAllBenchmarks` proc in `run_benchmarks.nim` to the desired parameters and variations.
+
+Then run it:
+
+```sh
+nim c -r run_benchmarks
+```
+
+By default all circuit files for each combinations of circuit args will be generated in a unique folder named like:
+    nim-codex/benchmarks/circuit_bench_depth32_maxslots256_cellsize2048_blocksize65536_nsamples9_entropy1234567_seed12345_nslots11_ncells512_index3
+
+Generating the circuit files often takes longer than running benchmarks, so caching the results allows re-running the benchmark as needed.
+
+You can modify the `CircuitArgs` and `CircuitEnv` objects in `runAllBenchMarks` to suite your needs. See `create_circuits.nim` for their definition.
+
+The runner executes all commands relative to the `nim-codex` repo. This simplifies finding the correct circuit includes paths, etc. `CircuitEnv` sets all of this.
+
+## Codex Ark Circom CLI
+
+Runs Codex's prover setup with Ark / Circom.
+
+Compile:
+```sh
+nim c codex_ark_prover_cli.nim
+```
+
+Run to see usage:
+```sh
+./codex_ark_prover_cli.nim -h
+```
diff --git a/benchmarks/config.nims b/benchmarks/config.nims
@@ -0,0 +1,15 @@
+--path:
+  ".."
+--path:
+  "../tests"
+--threads:
+  on
+--tlsEmulation:
+  off
+--d:
+  release
+
+# when not defined(chronicles_log_level):
+#   --define:"chronicles_log_level:NONE" # compile all log statements
+#   --define:"chronicles_sinks:textlines[dynamic]" # allow logs to be filtered at runtime
+#   --"import":"logging" # ensure that logging is ignored at runtime
diff --git a/benchmarks/create_circuits.nim b/benchmarks/create_circuits.nim
@@ -0,0 +1,187 @@
+import std/[hashes, json, strutils, strformat, os, osproc, uri]
+
+import ./utils
+
+type
+  CircuitEnv* = object
+    nimCircuitCli*: string
+    circuitDirIncludes*: string
+    ptauPath*: string
+    ptauUrl*: Uri
+    codexProjDir*: string
+
+  CircuitArgs* = object
+    depth*: int
+    maxslots*: int
+    cellsize*: int
+    blocksize*: int
+    nsamples*: int
+    entropy*: int
+    seed*: int
+    nslots*: int
+    ncells*: int
+    index*: int
+
+proc findCodexProjectDir(): string =
+  ## find codex proj dir -- assumes this script is in codex/benchmarks
+  result = currentSourcePath().parentDir.parentDir
+
+func default*(tp: typedesc[CircuitEnv]): CircuitEnv =
+  let codexDir = findCodexProjectDir()
+  result.nimCircuitCli =
+    codexDir / "vendor" / "codex-storage-proofs-circuits" / "reference" / "nim" /
+    "proof_input" / "cli"
+  result.circuitDirIncludes =
+    codexDir / "vendor" / "codex-storage-proofs-circuits" / "circuit"
+  result.ptauPath =
+    codexDir / "benchmarks" / "ceremony" / "powersOfTau28_hez_final_23.ptau"
+  result.ptauUrl = "https://storage.googleapis.com/zkevm/ptau".parseUri
+  result.codexProjDir = codexDir
+
+proc check*(env: var CircuitEnv) =
+  ## check that the CWD of script is in the codex parent
+  let codexProjDir = findCodexProjectDir()
+  echo "\n\nFound project dir: ", codexProjDir
+
+  let snarkjs = findExe("snarkjs")
+  if snarkjs == "":
+    echo dedent"""
+    ERROR: must install snarkjs first
+
+      npm install -g snarkjs@latest
+    """
+
+  let circom = findExe("circom")
+  if circom == "":
+    echo dedent"""
+    ERROR: must install circom first
+
+      git clone https://github.com/iden3/circom.git
+      cargo install --path circom
+    """
+
+  if snarkjs == "" or circom == "":
+    quit 2
+
+  echo "Found SnarkJS: ", snarkjs
+  echo "Found Circom: ", circom
+
+  if not env.nimCircuitCli.fileExists:
+    echo "Nim Circuit reference cli not found: ", env.nimCircuitCli
+    echo "Building Circuit reference cli...\n"
+    withDir env.nimCircuitCli.parentDir:
+      runit "nimble build -d:release --styleCheck:off cli"
+    echo "CWD: ", getCurrentDir()
+    assert env.nimCircuitCli.fileExists()
+
+  echo "Found NimCircuitCli: ", env.nimCircuitCli
+  echo "Found Circuit Path: ", env.circuitDirIncludes
+  echo "Found PTAU file: ", env.ptauPath
+
+proc downloadPtau*(ptauPath: string, ptauUrl: Uri) =
+  ## download ptau file using curl if needed
+  if not ptauPath.fileExists:
+    echo "Ceremony file not found, downloading..."
+    createDir ptauPath.parentDir
+    withDir ptauPath.parentDir:
+      runit fmt"curl --output '{ptauPath}' '{$ptauUrl}/{ptauPath.splitPath().tail}'"
+  else:
+    echo "Found PTAU file at: ", ptauPath
+
+proc getCircuitBenchStr*(args: CircuitArgs): string =
+  for f, v in fieldPairs(args):
+    result &= "_" & f & $v
+
+proc getCircuitBenchPath*(args: CircuitArgs, env: CircuitEnv): string =
+  ## generate folder name for unique circuit args
+  result = env.codexProjDir / "benchmarks/circuit_bench" & getCircuitBenchStr(args)
+
+proc generateCircomAndSamples*(args: CircuitArgs, env: CircuitEnv, name: string) =
+  ## run nim circuit and sample generator 
+  var cliCmd = env.nimCircuitCli
+  for f, v in fieldPairs(args):
+    cliCmd &= " --" & f & "=" & $v
+
+  if not "input.json".fileExists:
+    echo "Generating Circom Files..."
+    runit fmt"{cliCmd} -v --circom={name}.circom --output=input.json"
+
+proc createCircuit*(
+    args: CircuitArgs,
+    env: CircuitEnv,
+    name = "proof_main",
+    circBenchDir = getCircuitBenchPath(args, env),
+    someEntropy = "some_entropy_75289v3b7rcawcsyiur",
+    doGenerateWitness = false,
+): tuple[dir: string, name: string] =
+  ## Generates all the files needed for to run a proof circuit. Downloads the PTAU file if needed.
+  ## 
+  ## All needed circuit files will be generated as needed. 
+  ## They will be located in `circBenchDir` which defaults to a folder like:
+  ##    `nim-codex/benchmarks/circuit_bench_depth32_maxslots256_cellsize2048_blocksize65536_nsamples9_entropy1234567_seed12345_nslots11_ncells512_index3`
+  ## with all the given CircuitArgs.
+  ## 
+  let circdir = circBenchDir
+
+  downloadPtau env.ptauPath, env.ptauUrl
+
+  echo "Creating circuit dir: ", circdir
+  createDir circdir
+  withDir circdir:
+    writeFile("circuit_params.json", pretty(%*args))
+    let
+      inputs = circdir / "input.json"
+      zkey = circdir / fmt"{name}.zkey"
+      wasm = circdir / fmt"{name}.wasm"
+      r1cs = circdir / fmt"{name}.r1cs"
+      wtns = circdir / fmt"{name}.wtns"
+
+    generateCircomAndSamples(args, env, name)
+
+    if not wasm.fileExists or not r1cs.fileExists:
+      runit fmt"circom --r1cs --wasm --O2 -l{env.circuitDirIncludes} {name}.circom"
+      moveFile fmt"{name}_js" / fmt"{name}.wasm", fmt"{name}.wasm"
+    echo "Found wasm: ", wasm
+    echo "Found r1cs: ", r1cs
+
+    if not zkey.fileExists:
+      echo "ZKey not found, generating..."
+      putEnv "NODE_OPTIONS", "--max-old-space-size=8192"
+      if not fmt"{name}_0000.zkey".fileExists:
+        runit fmt"snarkjs groth16 setup {r1cs} {env.ptauPath} {name}_0000.zkey"
+        echo fmt"Generated {name}_0000.zkey"
+
+      let cmd =
+        fmt"snarkjs zkey contribute {name}_0000.zkey {name}_0001.zkey --name='1st Contributor Name'"
+      echo "CMD: ", cmd
+      let cmdRes = execCmdEx(cmd, options = {}, input = someEntropy & "\n")
+      assert cmdRes.exitCode == 0
+
+      moveFile fmt"{name}_0001.zkey", fmt"{name}.zkey"
+      removeFile fmt"{name}_0000.zkey"
+
+    if not wtns.fileExists and doGenerateWitness:
+      runit fmt"node generate_witness.js {wtns} ../input.json ../witness.wtns"
+
+  return (circdir, name)
+
+when isMainModule:
+  echo "findCodexProjectDir: ", findCodexProjectDir()
+  ## test run creating a circuit
+  var env = CircuitEnv.default()
+  env.check()
+
+  let args = CircuitArgs(
+    depth: 32, # maximum depth of the slot tree 
+    maxslots: 256, # maximum number of slots
+    cellsize: 2048, # cell size in bytes 
+    blocksize: 65536, # block size in bytes 
+    nsamples: 5, # number of samples to prove
+    entropy: 1234567, # external randomness
+    seed: 12345, # seed for creating fake data
+    nslots: 11, # number of slots in the dataset
+    index: 3, # which slot we prove (0..NSLOTS-1)
+    ncells: 512, # number of cells in this slot
+  )
+  let benchenv = createCircuit(args, env)
+  echo "\nBench dir:\n", benchenv
diff --git a/benchmarks/run_benchmarks.nim b/benchmarks/run_benchmarks.nim
@@ -0,0 +1,105 @@
+import std/[sequtils, strformat, os, options, importutils]
+import std/[times, os, strutils, terminal]
+
+import pkg/questionable
+import pkg/questionable/results
+import pkg/datastore
+
+import pkg/codex/[rng, stores, merkletree, codextypes, slots]
+import pkg/codex/utils/[json, poseidon2digest]
+import pkg/codex/slots/[builder, sampler/utils, backends/helpers]
+import pkg/constantine/math/[arithmetic, io/io_bigints, io/io_fields]
+
+import ./utils
+import ./create_circuits
+
+type CircuitFiles* = object
+  r1cs*: string
+  wasm*: string
+  zkey*: string
+  inputs*: string
+
+proc runArkCircom(args: CircuitArgs, files: CircuitFiles, benchmarkLoops: int) =
+  echo "Loading sample proof..."
+  var
+    inputData = files.inputs.readFile()
+    inputJson = !JsonNode.parse(inputData)
+    proofInputs = Poseidon2Hash.jsonToProofInput(inputJson)
+    circom = CircomCompat.init(
+      files.r1cs,
+      files.wasm,
+      files.zkey,
+      slotDepth = args.depth,
+      numSamples = args.nsamples,
+    )
+  defer:
+    circom.release() # this comes from the rust FFI
+
+  echo "Sample proof loaded..."
+  echo "Proving..."
+
+  let nameArgs = getCircuitBenchStr(args)
+  var proof: CircomProof
+  benchmark fmt"prover-{nameArgs}", benchmarkLoops:
+    proof = circom.prove(proofInputs).tryGet
+
+  var verRes: bool
+  benchmark fmt"verify-{nameArgs}", benchmarkLoops:
+    verRes = circom.verify(proof, proofInputs).tryGet
+  echo "verify result: ", verRes
+
+proc runRapidSnark(args: CircuitArgs, files: CircuitFiles, benchmarkLoops: int) =
+  # time rapidsnark ${CIRCUIT_MAIN}.zkey witness.wtns proof.json public.json
+
+  echo "generating the witness..."
+  ## TODO
+
+proc runBenchmark(args: CircuitArgs, env: CircuitEnv, benchmarkLoops: int) =
+  ## execute benchmarks given a set of args
+  ## will create a folder in `benchmarks/circuit_bench_$(args)`
+  ## 
+
+  let env = createCircuit(args, env)
+
+  ## TODO: copy over testcircomcompat proving
+  let files = CircuitFiles(
+    r1cs: env.dir / fmt"{env.name}.r1cs",
+    wasm: env.dir / fmt"{env.name}.wasm",
+    zkey: env.dir / fmt"{env.name}.zkey",
+    inputs: env.dir / fmt"input.json",
+  )
+
+  runArkCircom(args, files, benchmarkLoops)
+
+proc runAllBenchmarks*() =
+  echo "Running benchmark"
+  # setup()
+  var env = CircuitEnv.default()
+  env.check()
+
+  var args = CircuitArgs(
+    depth: 32, # maximum depth of the slot tree 
+    maxslots: 256, # maximum number of slots  
+    cellsize: 2048, # cell size in bytes 
+    blocksize: 65536, # block size in bytes 
+    nsamples: 1, # number of samples to prove
+    entropy: 1234567, # external randomness
+    seed: 12345, # seed for creating fake data
+    nslots: 11, # number of slots in the dataset
+    index: 3, # which slot we prove (0..NSLOTS-1)
+    ncells: 512, # number of cells in this slot
+  )
+
+  let
+    numberSamples = 3
+    benchmarkLoops = 5
+
+  for i in 1 .. numberSamples:
+    args.nsamples = i
+    stdout.styledWriteLine(fgYellow, "\nbenchmarking args: ", $args)
+    runBenchmark(args, env, benchmarkLoops)
+
+  printBenchMarkSummaries()
+
+when isMainModule:
+  runAllBenchmarks()