feat(cycle_bench): add criterion verify bench for G_verify

This commit is contained in:
Moudy 2026-05-21 16:44:40 +02:00
parent fb89e7549b
commit a9bf3fbfe7
6 changed files with 99 additions and 17 deletions

2
Cargo.lock generated
View File

@ -2075,9 +2075,11 @@ dependencies = [
"amm_core", "amm_core",
"anyhow", "anyhow",
"ata_core", "ata_core",
"authenticated_transfer_core",
"borsh", "borsh",
"clap", "clap",
"clock_core", "clock_core",
"criterion",
"nssa", "nssa",
"nssa_core", "nssa_core",
"risc0-zkvm", "risc0-zkvm",

View File

@ -23,6 +23,12 @@ test:
@echo "🧪 Running tests" @echo "🧪 Running tests"
RISC0_DEV_MODE=1 cargo nextest run --no-fail-fast RISC0_DEV_MODE=1 cargo nextest run --no-fail-fast
# Run criterion benches: fast crypto primitives, then the slow PPE verify (real proving setup).
bench:
@echo "📊 Running criterion benches"
cargo bench -p crypto_primitives_bench --bench primitives
cargo bench -p cycle_bench --features ppe --bench verify
# Run Bedrock node in docker # Run Bedrock node in docker
[working-directory: 'bedrock'] [working-directory: 'bedrock']
run-bedrock: run-bedrock:

View File

@ -63,23 +63,24 @@ Same `auth_transfer Transfer` instruction, standalone vs wrapped in the privacy
Linear fit depth=1..9: ≈ 53 s per additional chained call, intercept ≈ 73 s. Composition tax (single program PPE standalone): ≈ 48 s. `proof_bytes` is constant: the outer succinct proof has fixed size; the journal carried alongside it scales with public state and is reported separately by `--verify`. Linear fit depth=1..9: ≈ 53 s per additional chained call, intercept ≈ 73 s. Composition tax (single program PPE standalone): ≈ 48 s. `proof_bytes` is constant: the outer succinct proof has fixed size; the journal carried alongside it scales with public state and is reported separately by `--verify`.
## Verifier (`--verify`) ## Verifier (criterion bench)
One PPE receipt generated once (auth_transfer Transfer in PPE), then `Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)` measured over 1000 iterations. One PPE receipt generated once (auth_transfer Transfer in PPE), then `Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)` measured under criterion's statistical sampler. Bench file: `tools/cycle_bench/benches/verify.rs`. Setup (one full PPE prove) is outside the timed `iter` loop.
| Field | Value | Numbers from the most recent local run on the machine listed above. Criterion sample_size = 100, measurement_time = 15 s, warm_up_time = 2 s. Slope-regression point estimate in the middle column; 95% CI bounds on either side. Run `cargo bench -p cycle_bench --features ppe --bench verify` to refresh.
|---|---|
| case | auth_transfer Transfer in PPE | | Bench | low | point | high | outliers (mild + severe) |
| proof_bytes (S_agg) | 223,551 | |---|---:|---:|---:|---:|
| journal_bytes | 412 | | ppe/verify_auth_transfer | 12.016 ms | 12.215 ms | 12.469 ms | 1 + 10 |
| verify_ms (best / mean ± stdev, n=1000) | 11.71 / 12.06 ± 1.99 |
The corresponding `proof_bytes` (S_agg) for the bench receipt is captured by `--ppe` above; the verify bench itself only times the verify call.
## Findings ## Findings
- Proving cost scales with po2-bucketed `total_cycles`, not raw `user_cycles`. Trimming user_cycles only helps if it crosses a 2^N boundary. - Proving cost scales with po2-bucketed `total_cycles`, not raw `user_cycles`. Trimming user_cycles only helps if it crosses a 2^N boundary.
- Single-program PPE composition tax on M2 Pro CPU: ≈ 48 s (61.5 13.7). - Single-program PPE composition tax on M2 Pro CPU: ≈ 48 s (61.5 13.7).
- Chained-call cost is linear at ≈ 53 s per call. A max-depth chain (10) would take ≈ 600 s standalone on this CPU. - Chained-call cost is linear at ≈ 53 s per call. A max-depth chain (10) would take ≈ 600 s standalone on this CPU.
- `G_verify` is ≈ 12 ms and roughly constant per outer receipt (1000-iter stdev ≈ 2 ms). The succinct outer proof is fixed at 223,551 bytes (S_agg); verify is not on the latency critical path. - `G_verify` is ≈ 12 ms (criterion CI: 12.012.5 ms over 100 samples) and roughly constant per outer receipt. The succinct outer proof is fixed at 223,551 bytes (S_agg); verify is not on the latency critical path.
## Reproduce ## Reproduce
@ -87,10 +88,12 @@ One PPE receipt generated once (auth_transfer Transfer in PPE), then `Receipt::v
cargo run --release -p cycle_bench cargo run --release -p cycle_bench
cargo run --release -p cycle_bench --features prove -- --prove cargo run --release -p cycle_bench --features prove -- --prove
cargo run --release -p cycle_bench --features ppe -- --prove --ppe cargo run --release -p cycle_bench --features ppe -- --prove --ppe
cargo run --release -p cycle_bench --features ppe -- --verify --verify-iters 1000
# Verifier microbench via criterion:
cargo bench -p cycle_bench --features ppe --bench verify
``` ```
JSON output: `target/cycle_bench.json`. JSON output: `target/cycle_bench.json` (bin), `target/criterion/ppe/verify_auth_transfer/` (verify bench).
## Caveats ## Caveats

View File

@ -28,3 +28,11 @@ serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
anyhow.workspace = true anyhow.workspace = true
clap = { workspace = true } clap = { workspace = true }
[dev-dependencies]
criterion = { workspace = true, features = ["html_reports"] }
[[bench]]
name = "verify"
harness = false
required-features = ["ppe"]

View File

@ -4,6 +4,8 @@ Per-program Risc0 cycle counts, prover wall time, PPE composition cost, and veri
## Run ## Run
The binary handles executor cycles, prover wall time, and PPE composition cost:
```sh ```sh
# Executor cycles only (fast, ~seconds) # Executor cycles only (fast, ~seconds)
cargo run --release -p cycle_bench cargo run --release -p cycle_bench
@ -13,16 +15,30 @@ cargo run --release -p cycle_bench --features prove -- --prove
# + PPE composition cases (very slow, ~hour) # + PPE composition cases (very slow, ~hour)
cargo run --release -p cycle_bench --features ppe -- --prove --ppe cargo run --release -p cycle_bench --features ppe -- --prove --ppe
# + verifier microbench (G_verify): generates one PPE receipt, times verify x1000
cargo run --release -p cycle_bench --features ppe -- --verify --verify-iters 1000
``` ```
`RISC0_DEV_MODE=1` skips proving entirely and is only useful for the executor path. Combine flags freely; output is printed to stdout and written to `target/cycle_bench.json` for regression diffs. The verifier microbenchmark (`G_verify`) lives in a criterion bench under `benches/verify.rs`:
```sh
# Generates one PPE receipt for auth_transfer Transfer (~minutes of setup),
# then times Receipt::verify under criterion's statistical sampler.
cargo bench -p cycle_bench --features ppe --bench verify
```
`RISC0_DEV_MODE=1` skips proving entirely and is only useful for the executor path. The bin writes to `target/cycle_bench.json`; criterion writes per-bench estimates under `target/criterion/`.
## What you'll see ## What you'll see
- Per-program executor cycles and segments, plus exec wall time as `best / mean ± stdev (n=N)`. - Per-program executor cycles and segments, plus exec wall time as `best / mean ± stdev (n=N)`.
- With `--prove`: prover total cycles, paging cycles, segments, and wall time. - With `--prove`: prover total cycles, paging cycles, segments, and wall time.
- With `--ppe`: end-to-end `execute_and_prove` wall time and S_agg (the borsh-serialized InnerReceipt length) for one auth-transfer-in-PPE case and a chain-caller depth sweep. - With `--ppe`: end-to-end `execute_and_prove` wall time and `S_agg` (the borsh-serialized InnerReceipt length) for one auth-transfer-in-PPE case and a chain-caller depth sweep.
- With `--verify`: verify wall time `best / mean ± stdev`, plus `proof_bytes` and `journal_bytes`. - From the `verify` criterion bench: `ppe/verify_auth_transfer` slope-regression point estimate with 95% CI bounds.
## Baseline comparison (verify bench)
```sh
# On main:
cargo bench -p cycle_bench --features ppe --bench verify -- --save-baseline main
# On your branch:
cargo bench -p cycle_bench --features ppe --bench verify -- --baseline main
```

View File

@ -0,0 +1,47 @@
//! Criterion bench for `Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)`.
//!
//! Produces the `G_verify` fee-model parameter. Setup: one full PPE prove of an
//! `auth_transfer` Transfer (minutes, runs once outside the timed loop). Measured
//! op: `Receipt::verify` over a real PPE receipt.
//!
//! Run with: `cargo bench -p cycle_bench --features ppe --bench verify`.
use std::{hint::black_box, time::Duration};
use anyhow::Context as _;
use criterion::{Criterion, criterion_group, criterion_main};
use cycle_bench::ppe::prove_auth_transfer_in_ppe;
use nssa::program_methods::PRIVACY_PRESERVING_CIRCUIT_ID;
use risc0_zkvm::{InnerReceipt, Receipt};
fn bench_verify(c: &mut Criterion) {
let (output, proof) = prove_auth_transfer_in_ppe().expect("prove auth_transfer in PPE");
let journal = output.to_bytes();
let proof_bytes = proof.into_inner();
let inner: InnerReceipt = borsh::from_slice(&proof_bytes)
.context("decode InnerReceipt")
.expect("InnerReceipt deserialize");
let receipt = Receipt::new(inner, journal);
// Sanity check before the timed loop.
receipt
.verify(PRIVACY_PRESERVING_CIRCUIT_ID)
.expect("verify sanity check");
let mut g = c.benchmark_group("ppe");
g.sample_size(100)
.warm_up_time(Duration::from_secs(2))
.measurement_time(Duration::from_secs(15))
.noise_threshold(0.05);
g.bench_function("verify_auth_transfer", |b| {
b.iter(|| {
receipt
.verify(black_box(PRIVACY_PRESERVING_CIRCUIT_ID))
.expect("verify failed mid-loop");
});
});
g.finish();
}
criterion_group!(benches, bench_verify);
criterion_main!(benches);