Pr add prover benchmark tool (#790)

* initial setup

* reorg

* figuring out basic shell commands

* benchmarks

* benchmarks

* Sets up environment for running benchmarks

* updates

* integrate setup and proving

* updates

* adding outputs

* cleanup

* check failure

* benchmarks

* benchmarks

* benchmarks

* benchmarks

* benchmarks

* benchmarks

* formatting

* fix running larger sizes

* use larger ceremony file size

* use larger ceremony file size

* use larger ceremony file size

* restore benchmarks

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* refactor env

* refactor env

* refactor env

* refactor env

* refactor env

* rename

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* cleanup

* readme

* readme

* merge

* initial splitout of codex ark prover cli

* opts

* copying nimcli opts

* copying nimcli opts

* copying nimcli opts

* updating ark cli

* updating ark cli

* updating ark cli

* updating ark cli

* updating ark cli

* updating ark cli

* updating ark cli

* updating ark cli

* docs

* remove file

* add param

* add benchmarkLoops param

* update benchmark formatting

* update benchmark formatting

* update benchmark formatting

* update benchmark formatting

* fix naming

* fix serde version

* Apply suggestions from code review

cleanup wording

Signed-off-by: Dmitriy Ryajov <dryajov@gmail.com>

---------

Signed-off-by: Dmitriy Ryajov <dryajov@gmail.com>
Co-authored-by: Dmitriy Ryajov <dryajov@gmail.com>
This commit is contained in:
Jaremy Creechley 2024-05-23 19:28:17 +03:00 committed by GitHub
parent 3046b7636c
commit efd46148b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 418 additions and 0 deletions

2
benchmarks/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
ceremony
circuit_bench_*

33
benchmarks/README.md Normal file
View File

@ -0,0 +1,33 @@
## Benchmark Runner
Modify `runAllBenchmarks` proc in `run_benchmarks.nim` to the desired parameters and variations.
Then run it:
```sh
nim c -r run_benchmarks
```
By default all circuit files for each combinations of circuit args will be generated in a unique folder named like:
nim-codex/benchmarks/circuit_bench_depth32_maxslots256_cellsize2048_blocksize65536_nsamples9_entropy1234567_seed12345_nslots11_ncells512_index3
Generating the circuit files often takes longer than running benchmarks, so caching the results allows re-running the benchmark as needed.
You can modify the `CircuitArgs` and `CircuitEnv` objects in `runAllBenchMarks` to suite your needs. See `create_circuits.nim` for their definition.
The runner executes all commands relative to the `nim-codex` repo. This simplifies finding the correct circuit includes paths, etc. `CircuitEnv` sets all of this.
## Codex Ark Circom CLI
Runs Codex's prover setup with Ark / Circom.
Compile:
```sh
nim c codex_ark_prover_cli.nim
```
Run to see usage:
```sh
./codex_ark_prover_cli.nim -h
```

15
benchmarks/config.nims Normal file
View File

@ -0,0 +1,15 @@
--path:
".."
--path:
"../tests"
--threads:
on
--tlsEmulation:
off
--d:
release
# when not defined(chronicles_log_level):
# --define:"chronicles_log_level:NONE" # compile all log statements
# --define:"chronicles_sinks:textlines[dynamic]" # allow logs to be filtered at runtime
# --"import":"logging" # ensure that logging is ignored at runtime

View File

@ -0,0 +1,187 @@
import std/[hashes, json, strutils, strformat, os, osproc, uri]
import ./utils
type
CircuitEnv* = object
nimCircuitCli*: string
circuitDirIncludes*: string
ptauPath*: string
ptauUrl*: Uri
codexProjDir*: string
CircuitArgs* = object
depth*: int
maxslots*: int
cellsize*: int
blocksize*: int
nsamples*: int
entropy*: int
seed*: int
nslots*: int
ncells*: int
index*: int
proc findCodexProjectDir(): string =
## find codex proj dir -- assumes this script is in codex/benchmarks
result = currentSourcePath().parentDir.parentDir
func default*(tp: typedesc[CircuitEnv]): CircuitEnv =
let codexDir = findCodexProjectDir()
result.nimCircuitCli =
codexDir / "vendor" / "codex-storage-proofs-circuits" / "reference" / "nim" /
"proof_input" / "cli"
result.circuitDirIncludes =
codexDir / "vendor" / "codex-storage-proofs-circuits" / "circuit"
result.ptauPath =
codexDir / "benchmarks" / "ceremony" / "powersOfTau28_hez_final_23.ptau"
result.ptauUrl = "https://storage.googleapis.com/zkevm/ptau".parseUri
result.codexProjDir = codexDir
proc check*(env: var CircuitEnv) =
## check that the CWD of script is in the codex parent
let codexProjDir = findCodexProjectDir()
echo "\n\nFound project dir: ", codexProjDir
let snarkjs = findExe("snarkjs")
if snarkjs == "":
echo dedent"""
ERROR: must install snarkjs first
npm install -g snarkjs@latest
"""
let circom = findExe("circom")
if circom == "":
echo dedent"""
ERROR: must install circom first
git clone https://github.com/iden3/circom.git
cargo install --path circom
"""
if snarkjs == "" or circom == "":
quit 2
echo "Found SnarkJS: ", snarkjs
echo "Found Circom: ", circom
if not env.nimCircuitCli.fileExists:
echo "Nim Circuit reference cli not found: ", env.nimCircuitCli
echo "Building Circuit reference cli...\n"
withDir env.nimCircuitCli.parentDir:
runit "nimble build -d:release --styleCheck:off cli"
echo "CWD: ", getCurrentDir()
assert env.nimCircuitCli.fileExists()
echo "Found NimCircuitCli: ", env.nimCircuitCli
echo "Found Circuit Path: ", env.circuitDirIncludes
echo "Found PTAU file: ", env.ptauPath
proc downloadPtau*(ptauPath: string, ptauUrl: Uri) =
## download ptau file using curl if needed
if not ptauPath.fileExists:
echo "Ceremony file not found, downloading..."
createDir ptauPath.parentDir
withDir ptauPath.parentDir:
runit fmt"curl --output '{ptauPath}' '{$ptauUrl}/{ptauPath.splitPath().tail}'"
else:
echo "Found PTAU file at: ", ptauPath
proc getCircuitBenchStr*(args: CircuitArgs): string =
for f, v in fieldPairs(args):
result &= "_" & f & $v
proc getCircuitBenchPath*(args: CircuitArgs, env: CircuitEnv): string =
## generate folder name for unique circuit args
result = env.codexProjDir / "benchmarks/circuit_bench" & getCircuitBenchStr(args)
proc generateCircomAndSamples*(args: CircuitArgs, env: CircuitEnv, name: string) =
## run nim circuit and sample generator
var cliCmd = env.nimCircuitCli
for f, v in fieldPairs(args):
cliCmd &= " --" & f & "=" & $v
if not "input.json".fileExists:
echo "Generating Circom Files..."
runit fmt"{cliCmd} -v --circom={name}.circom --output=input.json"
proc createCircuit*(
args: CircuitArgs,
env: CircuitEnv,
name = "proof_main",
circBenchDir = getCircuitBenchPath(args, env),
someEntropy = "some_entropy_75289v3b7rcawcsyiur",
doGenerateWitness = false,
): tuple[dir: string, name: string] =
## Generates all the files needed for to run a proof circuit. Downloads the PTAU file if needed.
##
## All needed circuit files will be generated as needed.
## They will be located in `circBenchDir` which defaults to a folder like:
## `nim-codex/benchmarks/circuit_bench_depth32_maxslots256_cellsize2048_blocksize65536_nsamples9_entropy1234567_seed12345_nslots11_ncells512_index3`
## with all the given CircuitArgs.
##
let circdir = circBenchDir
downloadPtau env.ptauPath, env.ptauUrl
echo "Creating circuit dir: ", circdir
createDir circdir
withDir circdir:
writeFile("circuit_params.json", pretty(%*args))
let
inputs = circdir / "input.json"
zkey = circdir / fmt"{name}.zkey"
wasm = circdir / fmt"{name}.wasm"
r1cs = circdir / fmt"{name}.r1cs"
wtns = circdir / fmt"{name}.wtns"
generateCircomAndSamples(args, env, name)
if not wasm.fileExists or not r1cs.fileExists:
runit fmt"circom --r1cs --wasm --O2 -l{env.circuitDirIncludes} {name}.circom"
moveFile fmt"{name}_js" / fmt"{name}.wasm", fmt"{name}.wasm"
echo "Found wasm: ", wasm
echo "Found r1cs: ", r1cs
if not zkey.fileExists:
echo "ZKey not found, generating..."
putEnv "NODE_OPTIONS", "--max-old-space-size=8192"
if not fmt"{name}_0000.zkey".fileExists:
runit fmt"snarkjs groth16 setup {r1cs} {env.ptauPath} {name}_0000.zkey"
echo fmt"Generated {name}_0000.zkey"
let cmd =
fmt"snarkjs zkey contribute {name}_0000.zkey {name}_0001.zkey --name='1st Contributor Name'"
echo "CMD: ", cmd
let cmdRes = execCmdEx(cmd, options = {}, input = someEntropy & "\n")
assert cmdRes.exitCode == 0
moveFile fmt"{name}_0001.zkey", fmt"{name}.zkey"
removeFile fmt"{name}_0000.zkey"
if not wtns.fileExists and doGenerateWitness:
runit fmt"node generate_witness.js {wtns} ../input.json ../witness.wtns"
return (circdir, name)
when isMainModule:
echo "findCodexProjectDir: ", findCodexProjectDir()
## test run creating a circuit
var env = CircuitEnv.default()
env.check()
let args = CircuitArgs(
depth: 32, # maximum depth of the slot tree
maxslots: 256, # maximum number of slots
cellsize: 2048, # cell size in bytes
blocksize: 65536, # block size in bytes
nsamples: 5, # number of samples to prove
entropy: 1234567, # external randomness
seed: 12345, # seed for creating fake data
nslots: 11, # number of slots in the dataset
index: 3, # which slot we prove (0..NSLOTS-1)
ncells: 512, # number of cells in this slot
)
let benchenv = createCircuit(args, env)
echo "\nBench dir:\n", benchenv

View File

@ -0,0 +1,105 @@
import std/[sequtils, strformat, os, options, importutils]
import std/[times, os, strutils, terminal]
import pkg/questionable
import pkg/questionable/results
import pkg/datastore
import pkg/codex/[rng, stores, merkletree, codextypes, slots]
import pkg/codex/utils/[json, poseidon2digest]
import pkg/codex/slots/[builder, sampler/utils, backends/helpers]
import pkg/constantine/math/[arithmetic, io/io_bigints, io/io_fields]
import ./utils
import ./create_circuits
type CircuitFiles* = object
r1cs*: string
wasm*: string
zkey*: string
inputs*: string
proc runArkCircom(args: CircuitArgs, files: CircuitFiles, benchmarkLoops: int) =
echo "Loading sample proof..."
var
inputData = files.inputs.readFile()
inputJson = !JsonNode.parse(inputData)
proofInputs = Poseidon2Hash.jsonToProofInput(inputJson)
circom = CircomCompat.init(
files.r1cs,
files.wasm,
files.zkey,
slotDepth = args.depth,
numSamples = args.nsamples,
)
defer:
circom.release() # this comes from the rust FFI
echo "Sample proof loaded..."
echo "Proving..."
let nameArgs = getCircuitBenchStr(args)
var proof: CircomProof
benchmark fmt"prover-{nameArgs}", benchmarkLoops:
proof = circom.prove(proofInputs).tryGet
var verRes: bool
benchmark fmt"verify-{nameArgs}", benchmarkLoops:
verRes = circom.verify(proof, proofInputs).tryGet
echo "verify result: ", verRes
proc runRapidSnark(args: CircuitArgs, files: CircuitFiles, benchmarkLoops: int) =
# time rapidsnark ${CIRCUIT_MAIN}.zkey witness.wtns proof.json public.json
echo "generating the witness..."
## TODO
proc runBenchmark(args: CircuitArgs, env: CircuitEnv, benchmarkLoops: int) =
## execute benchmarks given a set of args
## will create a folder in `benchmarks/circuit_bench_$(args)`
##
let env = createCircuit(args, env)
## TODO: copy over testcircomcompat proving
let files = CircuitFiles(
r1cs: env.dir / fmt"{env.name}.r1cs",
wasm: env.dir / fmt"{env.name}.wasm",
zkey: env.dir / fmt"{env.name}.zkey",
inputs: env.dir / fmt"input.json",
)
runArkCircom(args, files, benchmarkLoops)
proc runAllBenchmarks*() =
echo "Running benchmark"
# setup()
var env = CircuitEnv.default()
env.check()
var args = CircuitArgs(
depth: 32, # maximum depth of the slot tree
maxslots: 256, # maximum number of slots
cellsize: 2048, # cell size in bytes
blocksize: 65536, # block size in bytes
nsamples: 1, # number of samples to prove
entropy: 1234567, # external randomness
seed: 12345, # seed for creating fake data
nslots: 11, # number of slots in the dataset
index: 3, # which slot we prove (0..NSLOTS-1)
ncells: 512, # number of cells in this slot
)
let
numberSamples = 3
benchmarkLoops = 5
for i in 1 .. numberSamples:
args.nsamples = i
stdout.styledWriteLine(fgYellow, "\nbenchmarking args: ", $args)
runBenchmark(args, env, benchmarkLoops)
printBenchMarkSummaries()
when isMainModule:
runAllBenchmarks()

76
benchmarks/utils.nim Normal file
View File

@ -0,0 +1,76 @@
import std/tables
template withDir*(dir: string, blk: untyped) =
## set working dir for duration of blk
let prev = getCurrentDir()
try:
setCurrentDir(dir)
`blk`
finally:
setCurrentDir(prev)
template runit*(cmd: string) =
## run shell commands and verify it runs without an error code
echo "RUNNING: ", cmd
let cmdRes = execShellCmd(cmd)
echo "STATUS: ", cmdRes
assert cmdRes == 0
var benchRuns* = newTable[string, tuple[avgTimeSec: float, count: int]]()
func avg(vals: openArray[float]): float =
for v in vals:
result += v / vals.len().toFloat()
template benchmark*(name: untyped, count: int, blk: untyped) =
let benchmarkName: string = name
## simple benchmarking of a block of code
var runs = newSeqOfCap[float](count)
for i in 1 .. count:
block:
let t0 = epochTime()
`blk`
let elapsed = epochTime() - t0
runs.add elapsed
var elapsedStr = ""
for v in runs:
elapsedStr &= ", " & v.formatFloat(format = ffDecimal, precision = 3)
stdout.styledWriteLine(
fgGreen, "CPU Time [", benchmarkName, "] ", "avg(", $count, "): ", elapsedStr, " s"
)
benchRuns[benchmarkName] = (runs.avg(), count)
template printBenchMarkSummaries*(printRegular=true, printTsv=true) =
if printRegular:
echo ""
for k, v in benchRuns:
echo "Benchmark average run ", v.avgTimeSec, " for ", v.count, " runs ", "for ", k
if printTsv:
echo ""
echo "name", "\t", "avgTimeSec", "\t", "count"
for k, v in benchRuns:
echo k, "\t", v.avgTimeSec, "\t", v.count
import std/math
func floorLog2*(x: int): int =
var k = -1
var y = x
while (y > 0):
k += 1
y = y shr 1
return k
func ceilingLog2*(x: int): int =
if (x == 0):
return -1
else:
return (floorLog2(x - 1) + 1)
func checkPowerOfTwo*(x: int, what: string): int =
let k = ceilingLog2(x)
assert(x == 2 ^ k, ("`" & what & "` is expected to be a power of 2"))
return x