diff --git a/Cargo.lock b/Cargo.lock index 4236cd7e..7255dbee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2075,9 +2075,11 @@ dependencies = [ "amm_core", "anyhow", "ata_core", + "authenticated_transfer_core", "borsh", "clap", "clock_core", + "criterion", "nssa", "nssa_core", "risc0-zkvm", diff --git a/Justfile b/Justfile index ac003a15..7af964a6 100644 --- a/Justfile +++ b/Justfile @@ -23,6 +23,12 @@ test: @echo "๐Ÿงช Running tests" RISC0_DEV_MODE=1 cargo nextest run --no-fail-fast +# Run criterion benches: fast crypto primitives, then the slow PPE verify (real proving setup). +bench: + @echo "๐Ÿ“Š Running criterion benches" + cargo bench -p crypto_primitives_bench --bench primitives + cargo bench -p cycle_bench --features ppe --bench verify + # Run Bedrock node in docker [working-directory: 'bedrock'] run-bedrock: diff --git a/docs/benchmarks/cycle_bench.md b/docs/benchmarks/cycle_bench.md index fca9f12c..0e880070 100644 --- a/docs/benchmarks/cycle_bench.md +++ b/docs/benchmarks/cycle_bench.md @@ -63,23 +63,24 @@ Same `auth_transfer Transfer` instruction, standalone vs wrapped in the privacy Linear fit depth=1..9: โ‰ˆ 53 s per additional chained call, intercept โ‰ˆ 73 s. Composition tax (single program PPE โˆ’ standalone): โ‰ˆ 48 s. `proof_bytes` is constant: the outer succinct proof has fixed size; the journal carried alongside it scales with public state and is reported separately by `--verify`. -## Verifier (`--verify`) +## Verifier (criterion bench) -One PPE receipt generated once (auth_transfer Transfer in PPE), then `Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)` measured over 1000 iterations. +One PPE receipt generated once (auth_transfer Transfer in PPE), then `Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)` measured under criterion's statistical sampler. Bench file: `tools/cycle_bench/benches/verify.rs`. Setup (one full PPE prove) is outside the timed `iter` loop. -| Field | Value | -|---|---| -| case | auth_transfer Transfer in PPE | -| proof_bytes (S_agg) | 223,551 | -| journal_bytes | 412 | -| verify_ms (best / mean ยฑ stdev, n=1000) | 11.71 / 12.06 ยฑ 1.99 | +Numbers from the most recent local run on the machine listed above. Criterion sample_size = 100, measurement_time = 15 s, warm_up_time = 2 s. Slope-regression point estimate in the middle column; 95% CI bounds on either side. Run `cargo bench -p cycle_bench --features ppe --bench verify` to refresh. + +| Bench | low | point | high | outliers (mild + severe) | +|---|---:|---:|---:|---:| +| ppe/verify_auth_transfer | 12.016 ms | 12.215 ms | 12.469 ms | 1 + 10 | + +The corresponding `proof_bytes` (S_agg) for the bench receipt is captured by `--ppe` above; the verify bench itself only times the verify call. ## Findings - Proving cost scales with po2-bucketed `total_cycles`, not raw `user_cycles`. Trimming user_cycles only helps if it crosses a 2^N boundary. - Single-program PPE composition tax on M2 Pro CPU: โ‰ˆ 48 s (61.5 โˆ’ 13.7). - Chained-call cost is linear at โ‰ˆ 53 s per call. A max-depth chain (10) would take โ‰ˆ 600 s standalone on this CPU. -- `G_verify` is โ‰ˆ 12 ms and roughly constant per outer receipt (1000-iter stdev โ‰ˆ 2 ms). The succinct outer proof is fixed at 223,551 bytes (S_agg); verify is not on the latency critical path. +- `G_verify` is โ‰ˆ 12 ms (criterion CI: 12.0โ€“12.5 ms over 100 samples) and roughly constant per outer receipt. The succinct outer proof is fixed at 223,551 bytes (S_agg); verify is not on the latency critical path. ## Reproduce @@ -87,10 +88,12 @@ One PPE receipt generated once (auth_transfer Transfer in PPE), then `Receipt::v cargo run --release -p cycle_bench cargo run --release -p cycle_bench --features prove -- --prove cargo run --release -p cycle_bench --features ppe -- --prove --ppe -cargo run --release -p cycle_bench --features ppe -- --verify --verify-iters 1000 + +# Verifier microbench via criterion: +cargo bench -p cycle_bench --features ppe --bench verify ``` -JSON output: `target/cycle_bench.json`. +JSON output: `target/cycle_bench.json` (bin), `target/criterion/ppe/verify_auth_transfer/` (verify bench). ## Caveats diff --git a/tools/cycle_bench/Cargo.toml b/tools/cycle_bench/Cargo.toml index 1a4a9db3..aa30ebc6 100644 --- a/tools/cycle_bench/Cargo.toml +++ b/tools/cycle_bench/Cargo.toml @@ -28,3 +28,11 @@ serde.workspace = true serde_json.workspace = true anyhow.workspace = true clap = { workspace = true } + +[dev-dependencies] +criterion = { workspace = true, features = ["html_reports"] } + +[[bench]] +name = "verify" +harness = false +required-features = ["ppe"] diff --git a/tools/cycle_bench/README.md b/tools/cycle_bench/README.md index 3b416dc8..7f3b3d95 100644 --- a/tools/cycle_bench/README.md +++ b/tools/cycle_bench/README.md @@ -4,6 +4,8 @@ Per-program Risc0 cycle counts, prover wall time, PPE composition cost, and veri ## Run +The binary handles executor cycles, prover wall time, and PPE composition cost: + ```sh # Executor cycles only (fast, ~seconds) cargo run --release -p cycle_bench @@ -13,16 +15,30 @@ cargo run --release -p cycle_bench --features prove -- --prove # + PPE composition cases (very slow, ~hour) cargo run --release -p cycle_bench --features ppe -- --prove --ppe - -# + verifier microbench (G_verify): generates one PPE receipt, times verify x1000 -cargo run --release -p cycle_bench --features ppe -- --verify --verify-iters 1000 ``` -`RISC0_DEV_MODE=1` skips proving entirely and is only useful for the executor path. Combine flags freely; output is printed to stdout and written to `target/cycle_bench.json` for regression diffs. +The verifier microbenchmark (`G_verify`) lives in a criterion bench under `benches/verify.rs`: + +```sh +# Generates one PPE receipt for auth_transfer Transfer (~minutes of setup), +# then times Receipt::verify under criterion's statistical sampler. +cargo bench -p cycle_bench --features ppe --bench verify +``` + +`RISC0_DEV_MODE=1` skips proving entirely and is only useful for the executor path. The bin writes to `target/cycle_bench.json`; criterion writes per-bench estimates under `target/criterion/`. ## What you'll see - Per-program executor cycles and segments, plus exec wall time as `best / mean ยฑ stdev (n=N)`. - With `--prove`: prover total cycles, paging cycles, segments, and wall time. -- With `--ppe`: end-to-end `execute_and_prove` wall time and S_agg (the borsh-serialized InnerReceipt length) for one auth-transfer-in-PPE case and a chain-caller depth sweep. -- With `--verify`: verify wall time `best / mean ยฑ stdev`, plus `proof_bytes` and `journal_bytes`. +- With `--ppe`: end-to-end `execute_and_prove` wall time and `S_agg` (the borsh-serialized InnerReceipt length) for one auth-transfer-in-PPE case and a chain-caller depth sweep. +- From the `verify` criterion bench: `ppe/verify_auth_transfer` slope-regression point estimate with 95% CI bounds. + +## Baseline comparison (verify bench) + +```sh +# On main: +cargo bench -p cycle_bench --features ppe --bench verify -- --save-baseline main +# On your branch: +cargo bench -p cycle_bench --features ppe --bench verify -- --baseline main +``` diff --git a/tools/cycle_bench/benches/verify.rs b/tools/cycle_bench/benches/verify.rs new file mode 100644 index 00000000..d7bdfbe3 --- /dev/null +++ b/tools/cycle_bench/benches/verify.rs @@ -0,0 +1,47 @@ +//! Criterion bench for `Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)`. +//! +//! Produces the `G_verify` fee-model parameter. Setup: one full PPE prove of an +//! `auth_transfer` Transfer (minutes, runs once outside the timed loop). Measured +//! op: `Receipt::verify` over a real PPE receipt. +//! +//! Run with: `cargo bench -p cycle_bench --features ppe --bench verify`. + +use std::{hint::black_box, time::Duration}; + +use anyhow::Context as _; +use criterion::{Criterion, criterion_group, criterion_main}; +use cycle_bench::ppe::prove_auth_transfer_in_ppe; +use nssa::program_methods::PRIVACY_PRESERVING_CIRCUIT_ID; +use risc0_zkvm::{InnerReceipt, Receipt}; + +fn bench_verify(c: &mut Criterion) { + let (output, proof) = prove_auth_transfer_in_ppe().expect("prove auth_transfer in PPE"); + let journal = output.to_bytes(); + let proof_bytes = proof.into_inner(); + let inner: InnerReceipt = borsh::from_slice(&proof_bytes) + .context("decode InnerReceipt") + .expect("InnerReceipt deserialize"); + let receipt = Receipt::new(inner, journal); + + // Sanity check before the timed loop. + receipt + .verify(PRIVACY_PRESERVING_CIRCUIT_ID) + .expect("verify sanity check"); + + let mut g = c.benchmark_group("ppe"); + g.sample_size(100) + .warm_up_time(Duration::from_secs(2)) + .measurement_time(Duration::from_secs(15)) + .noise_threshold(0.05); + g.bench_function("verify_auth_transfer", |b| { + b.iter(|| { + receipt + .verify(black_box(PRIVACY_PRESERVING_CIRCUIT_ID)) + .expect("verify failed mid-loop"); + }); + }); + g.finish(); +} + +criterion_group!(benches, bench_verify); +criterion_main!(benches);