Pr add prover benchmark tool (#790)

* initial setup * reorg * figuring out basic shell commands * benchmarks * benchmarks * Sets up environment for running benchmarks * updates * integrate setup and proving * updates * adding outputs * cleanup * check failure * benchmarks * benchmarks * benchmarks * benchmarks * benchmarks * benchmarks * formatting * fix running larger sizes * use larger ceremony file size * use larger ceremony file size * use larger ceremony file size * restore benchmarks * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * refactor env * refactor env * refactor env * refactor env * refactor env * rename * cleanup * cleanup * cleanup * cleanup * cleanup * cleanup * readme * readme * merge * initial splitout of codex ark prover cli * opts * copying nimcli opts * copying nimcli opts * copying nimcli opts * updating ark cli * updating ark cli * updating ark cli * updating ark cli * updating ark cli * updating ark cli * updating ark cli * updating ark cli * docs * remove file * add param * add benchmarkLoops param * update benchmark formatting * update benchmark formatting * update benchmark formatting * update benchmark formatting * fix naming * fix serde version * Apply suggestions from code review cleanup wording Signed-off-by: Dmitriy Ryajov <dryajov@gmail.com> --------- Signed-off-by: Dmitriy Ryajov <dryajov@gmail.com> Co-authored-by: Dmitriy Ryajov <dryajov@gmail.com>
2024-05-23 19:28:17 +03:00 · 2024-05-23 19:28:17 +03:00 · efd46148b0
parent 3046b7636c
commit efd46148b0
6 changed files with 418 additions and 0 deletions
--- a/benchmarks/.gitignore
+++ b/benchmarks/.gitignore
@ -0,0 +1,2 @@
 ceremony
 circuit_bench_*
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@ -0,0 +1,33 @@
 ## Benchmark Runner
 Modify `runAllBenchmarks` proc in `run_benchmarks.nim` to the desired parameters and variations.
 Then run it:
 ```sh
 nim c -r run_benchmarks
 ```
 By default all circuit files for each combinations of circuit args will be generated in a unique folder named like:
    nim-codex/benchmarks/circuit_bench_depth32_maxslots256_cellsize2048_blocksize65536_nsamples9_entropy1234567_seed12345_nslots11_ncells512_index3
 Generating the circuit files often takes longer than running benchmarks, so caching the results allows re-running the benchmark as needed.
 You can modify the `CircuitArgs` and `CircuitEnv` objects in `runAllBenchMarks` to suite your needs. See `create_circuits.nim` for their definition.
 The runner executes all commands relative to the `nim-codex` repo. This simplifies finding the correct circuit includes paths, etc. `CircuitEnv` sets all of this.
 ## Codex Ark Circom CLI
 Runs Codex's prover setup with Ark / Circom.
 Compile:
 ```sh
 nim c codex_ark_prover_cli.nim
 ```
 Run to see usage:
 ```sh
 ./codex_ark_prover_cli.nim -h
 ```
--- a/benchmarks/config.nims
+++ b/benchmarks/config.nims
@ -0,0 +1,15 @@
 --path:
  ".."
 --path:
  "../tests"
 --threads:
  on
 --tlsEmulation:
  off
 --d:
  release
 # when not defined(chronicles_log_level):
 #   --define:"chronicles_log_level:NONE" # compile all log statements
 #   --define:"chronicles_sinks:textlines[dynamic]" # allow logs to be filtered at runtime
 #   --"import":"logging" # ensure that logging is ignored at runtime
--- a/benchmarks/create_circuits.nim
+++ b/benchmarks/create_circuits.nim
@ -0,0 +1,187 @@
 import std/[hashes, json, strutils, strformat, os, osproc, uri]
 import ./utils
 type
  CircuitEnv* = object
    nimCircuitCli*: string
    circuitDirIncludes*: string
    ptauPath*: string
    ptauUrl*: Uri
    codexProjDir*: string
  CircuitArgs* = object
    depth*: int
    maxslots*: int
    cellsize*: int
    blocksize*: int
    nsamples*: int
    entropy*: int
    seed*: int
    nslots*: int
    ncells*: int
    index*: int
 proc findCodexProjectDir(): string =
  ## find codex proj dir -- assumes this script is in codex/benchmarks
  result = currentSourcePath().parentDir.parentDir
 func default*(tp: typedesc[CircuitEnv]): CircuitEnv =
  let codexDir = findCodexProjectDir()
  result.nimCircuitCli =
    codexDir / "vendor" / "codex-storage-proofs-circuits" / "reference" / "nim" /
    "proof_input" / "cli"
  result.circuitDirIncludes =
    codexDir / "vendor" / "codex-storage-proofs-circuits" / "circuit"
  result.ptauPath =
    codexDir / "benchmarks" / "ceremony" / "powersOfTau28_hez_final_23.ptau"
  result.ptauUrl = "https://storage.googleapis.com/zkevm/ptau".parseUri
  result.codexProjDir = codexDir
 proc check*(env: var CircuitEnv) =
  ## check that the CWD of script is in the codex parent
  let codexProjDir = findCodexProjectDir()
  echo "\n\nFound project dir: ", codexProjDir
  let snarkjs = findExe("snarkjs")
  if snarkjs == "":
    echo dedent"""
    ERROR: must install snarkjs first
      npm install -g snarkjs@latest
    """
  let circom = findExe("circom")
  if circom == "":
    echo dedent"""
    ERROR: must install circom first
      git clone https://github.com/iden3/circom.git
      cargo install --path circom
    """
  if snarkjs == "" or circom == "":
    quit 2
  echo "Found SnarkJS: ", snarkjs
  echo "Found Circom: ", circom
  if not env.nimCircuitCli.fileExists:
    echo "Nim Circuit reference cli not found: ", env.nimCircuitCli
    echo "Building Circuit reference cli...\n"
    withDir env.nimCircuitCli.parentDir:
      runit "nimble build -d:release --styleCheck:off cli"
    echo "CWD: ", getCurrentDir()
    assert env.nimCircuitCli.fileExists()
  echo "Found NimCircuitCli: ", env.nimCircuitCli
  echo "Found Circuit Path: ", env.circuitDirIncludes
  echo "Found PTAU file: ", env.ptauPath
 proc downloadPtau*(ptauPath: string, ptauUrl: Uri) =
  ## download ptau file using curl if needed
  if not ptauPath.fileExists:
    echo "Ceremony file not found, downloading..."
    createDir ptauPath.parentDir
    withDir ptauPath.parentDir:
      runit fmt"curl --output '{ptauPath}' '{$ptauUrl}/{ptauPath.splitPath().tail}'"
  else:
    echo "Found PTAU file at: ", ptauPath
 proc getCircuitBenchStr*(args: CircuitArgs): string =
  for f, v in fieldPairs(args):
    result &= "_" & f & $v
 proc getCircuitBenchPath*(args: CircuitArgs, env: CircuitEnv): string =
  ## generate folder name for unique circuit args
  result = env.codexProjDir / "benchmarks/circuit_bench" & getCircuitBenchStr(args)
 proc generateCircomAndSamples*(args: CircuitArgs, env: CircuitEnv, name: string) =
  ## run nim circuit and sample generator 
  var cliCmd = env.nimCircuitCli
  for f, v in fieldPairs(args):
    cliCmd &= " --" & f & "=" & $v
  if not "input.json".fileExists:
    echo "Generating Circom Files..."
    runit fmt"{cliCmd} -v --circom={name}.circom --output=input.json"
 proc createCircuit*(
    args: CircuitArgs,
    env: CircuitEnv,
    name = "proof_main",
    circBenchDir = getCircuitBenchPath(args, env),
    someEntropy = "some_entropy_75289v3b7rcawcsyiur",
    doGenerateWitness = false,
 ): tuple[dir: string, name: string] =
  ## Generates all the files needed for to run a proof circuit. Downloads the PTAU file if needed.
  ## 
  ## All needed circuit files will be generated as needed. 
  ## They will be located in `circBenchDir` which defaults to a folder like:
  ##    `nim-codex/benchmarks/circuit_bench_depth32_maxslots256_cellsize2048_blocksize65536_nsamples9_entropy1234567_seed12345_nslots11_ncells512_index3`
  ## with all the given CircuitArgs.
  ## 
  let circdir = circBenchDir
  downloadPtau env.ptauPath, env.ptauUrl
  echo "Creating circuit dir: ", circdir
  createDir circdir
  withDir circdir:
    writeFile("circuit_params.json", pretty(%*args))
    let
      inputs = circdir / "input.json"
      zkey = circdir / fmt"{name}.zkey"
      wasm = circdir / fmt"{name}.wasm"
      r1cs = circdir / fmt"{name}.r1cs"
      wtns = circdir / fmt"{name}.wtns"
    generateCircomAndSamples(args, env, name)
    if not wasm.fileExists or not r1cs.fileExists:
      runit fmt"circom --r1cs --wasm --O2 -l{env.circuitDirIncludes} {name}.circom"
      moveFile fmt"{name}_js" / fmt"{name}.wasm", fmt"{name}.wasm"
    echo "Found wasm: ", wasm
    echo "Found r1cs: ", r1cs
    if not zkey.fileExists:
      echo "ZKey not found, generating..."
      putEnv "NODE_OPTIONS", "--max-old-space-size=8192"
      if not fmt"{name}_0000.zkey".fileExists:
        runit fmt"snarkjs groth16 setup {r1cs} {env.ptauPath} {name}_0000.zkey"
        echo fmt"Generated {name}_0000.zkey"
      let cmd =
        fmt"snarkjs zkey contribute {name}_0000.zkey {name}_0001.zkey --name='1st Contributor Name'"
      echo "CMD: ", cmd
      let cmdRes = execCmdEx(cmd, options = {}, input = someEntropy & "\n")
      assert cmdRes.exitCode == 0
      moveFile fmt"{name}_0001.zkey", fmt"{name}.zkey"
      removeFile fmt"{name}_0000.zkey"
    if not wtns.fileExists and doGenerateWitness:
      runit fmt"node generate_witness.js {wtns} ../input.json ../witness.wtns"
  return (circdir, name)
 when isMainModule:
  echo "findCodexProjectDir: ", findCodexProjectDir()
  ## test run creating a circuit
  var env = CircuitEnv.default()
  env.check()
  let args = CircuitArgs(
    depth: 32, # maximum depth of the slot tree 
    maxslots: 256, # maximum number of slots
    cellsize: 2048, # cell size in bytes 
    blocksize: 65536, # block size in bytes 
    nsamples: 5, # number of samples to prove
    entropy: 1234567, # external randomness
    seed: 12345, # seed for creating fake data
    nslots: 11, # number of slots in the dataset
    index: 3, # which slot we prove (0..NSLOTS-1)
    ncells: 512, # number of cells in this slot
  )
  let benchenv = createCircuit(args, env)
  echo "\nBench dir:\n", benchenv
--- a/benchmarks/run_benchmarks.nim
+++ b/benchmarks/run_benchmarks.nim
@ -0,0 +1,105 @@
 import std/[sequtils, strformat, os, options, importutils]
 import std/[times, os, strutils, terminal]
 import pkg/questionable
 import pkg/questionable/results
 import pkg/datastore
 import pkg/codex/[rng, stores, merkletree, codextypes, slots]
 import pkg/codex/utils/[json, poseidon2digest]
 import pkg/codex/slots/[builder, sampler/utils, backends/helpers]
 import pkg/constantine/math/[arithmetic, io/io_bigints, io/io_fields]
 import ./utils
 import ./create_circuits
 type CircuitFiles* = object
  r1cs*: string
  wasm*: string
  zkey*: string
  inputs*: string
 proc runArkCircom(args: CircuitArgs, files: CircuitFiles, benchmarkLoops: int) =
  echo "Loading sample proof..."
  var
    inputData = files.inputs.readFile()
    inputJson = !JsonNode.parse(inputData)
    proofInputs = Poseidon2Hash.jsonToProofInput(inputJson)
    circom = CircomCompat.init(
      files.r1cs,
      files.wasm,
      files.zkey,
      slotDepth = args.depth,
      numSamples = args.nsamples,
    )
  defer:
    circom.release() # this comes from the rust FFI
  echo "Sample proof loaded..."
  echo "Proving..."
  let nameArgs = getCircuitBenchStr(args)
  var proof: CircomProof
  benchmark fmt"prover-{nameArgs}", benchmarkLoops:
    proof = circom.prove(proofInputs).tryGet
  var verRes: bool
  benchmark fmt"verify-{nameArgs}", benchmarkLoops:
    verRes = circom.verify(proof, proofInputs).tryGet
  echo "verify result: ", verRes
 proc runRapidSnark(args: CircuitArgs, files: CircuitFiles, benchmarkLoops: int) =
  # time rapidsnark ${CIRCUIT_MAIN}.zkey witness.wtns proof.json public.json
  echo "generating the witness..."
  ## TODO
 proc runBenchmark(args: CircuitArgs, env: CircuitEnv, benchmarkLoops: int) =
  ## execute benchmarks given a set of args
  ## will create a folder in `benchmarks/circuit_bench_$(args)`
  ## 
  let env = createCircuit(args, env)
  ## TODO: copy over testcircomcompat proving
  let files = CircuitFiles(
    r1cs: env.dir / fmt"{env.name}.r1cs",
    wasm: env.dir / fmt"{env.name}.wasm",
    zkey: env.dir / fmt"{env.name}.zkey",
    inputs: env.dir / fmt"input.json",
  )
  runArkCircom(args, files, benchmarkLoops)
 proc runAllBenchmarks*() =
  echo "Running benchmark"
  # setup()
  var env = CircuitEnv.default()
  env.check()
  var args = CircuitArgs(
    depth: 32, # maximum depth of the slot tree 
    maxslots: 256, # maximum number of slots  
    cellsize: 2048, # cell size in bytes 
    blocksize: 65536, # block size in bytes 
    nsamples: 1, # number of samples to prove
    entropy: 1234567, # external randomness
    seed: 12345, # seed for creating fake data
    nslots: 11, # number of slots in the dataset
    index: 3, # which slot we prove (0..NSLOTS-1)
    ncells: 512, # number of cells in this slot
  )
  let
    numberSamples = 3
    benchmarkLoops = 5
  for i in 1 .. numberSamples:
    args.nsamples = i
    stdout.styledWriteLine(fgYellow, "\nbenchmarking args: ", $args)
    runBenchmark(args, env, benchmarkLoops)
  printBenchMarkSummaries()
 when isMainModule:
  runAllBenchmarks()
--- a/benchmarks/utils.nim
+++ b/benchmarks/utils.nim
@ -0,0 +1,76 @@
 import std/tables
 template withDir*(dir: string, blk: untyped) =
  ## set working dir for duration of blk
  let prev = getCurrentDir()
  try:
    setCurrentDir(dir)
    `blk`
  finally:
    setCurrentDir(prev)
 template runit*(cmd: string) =
  ## run shell commands and verify it runs without an error code
  echo "RUNNING: ", cmd
  let cmdRes = execShellCmd(cmd)
  echo "STATUS: ", cmdRes
  assert cmdRes == 0
 var benchRuns* = newTable[string, tuple[avgTimeSec: float, count: int]]()
 func avg(vals: openArray[float]): float =
  for v in vals:
    result += v / vals.len().toFloat()
 template benchmark*(name: untyped, count: int, blk: untyped) =
  let benchmarkName: string = name
  ## simple benchmarking of a block of code
  var runs = newSeqOfCap[float](count)
  for i in 1 .. count:
    block:
      let t0 = epochTime()
      `blk`
      let elapsed = epochTime() - t0
      runs.add elapsed
  var elapsedStr = ""
  for v in runs:
    elapsedStr &= ", " & v.formatFloat(format = ffDecimal, precision = 3)
  stdout.styledWriteLine(
    fgGreen, "CPU Time [", benchmarkName, "] ", "avg(", $count, "): ", elapsedStr, " s"
  )
  benchRuns[benchmarkName] = (runs.avg(), count)
 template printBenchMarkSummaries*(printRegular=true, printTsv=true) =
  if printRegular:
    echo ""
    for k, v in benchRuns:
      echo "Benchmark average run ", v.avgTimeSec, " for ", v.count, " runs ", "for ", k
  if printTsv:
    echo ""
    echo "name", "\t", "avgTimeSec", "\t", "count"
    for k, v in benchRuns:
      echo k, "\t", v.avgTimeSec, "\t", v.count
 import std/math
 func floorLog2*(x: int): int =
  var k = -1
  var y = x
  while (y > 0):
    k += 1
    y = y shr 1
  return k
 func ceilingLog2*(x: int): int =
  if (x == 0):
    return -1
  else:
    return (floorLog2(x - 1) + 1)
 func checkPowerOfTwo*(x: int, what: string): int =
  let k = ceilingLog2(x)
  assert(x == 2 ^ k, ("`" & what & "` is expected to be a power of 2"))
  return x