diff --git a/Cargo.lock b/Cargo.lock
index 4236cd7e..7255dbee 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2075,9 +2075,11 @@ dependencies = [
  "amm_core",
  "anyhow",
  "ata_core",
+ "authenticated_transfer_core",
  "borsh",
  "clap",
  "clock_core",
+ "criterion",
  "nssa",
  "nssa_core",
  "risc0-zkvm",
diff --git a/Justfile b/Justfile
index ac003a15..7af964a6 100644
--- a/Justfile
+++ b/Justfile
@@ -23,6 +23,12 @@ test:
     @echo "🧪 Running tests"
     RISC0_DEV_MODE=1 cargo nextest run --no-fail-fast
 
+# Run criterion benches: fast crypto primitives, then the slow PPE verify (real proving setup).
+bench:
+    @echo "📊 Running criterion benches"
+    cargo bench -p crypto_primitives_bench --bench primitives
+    cargo bench -p cycle_bench --features ppe --bench verify
+
 # Run Bedrock node in docker
 [working-directory: 'bedrock']
 run-bedrock:
diff --git a/docs/benchmarks/cycle_bench.md b/docs/benchmarks/cycle_bench.md
index fca9f12c..0e880070 100644
--- a/docs/benchmarks/cycle_bench.md
+++ b/docs/benchmarks/cycle_bench.md
@@ -63,23 +63,24 @@ Same `auth_transfer Transfer` instruction, standalone vs wrapped in the privacy
 
 Linear fit depth=1..9: ≈ 53 s per additional chained call, intercept ≈ 73 s. Composition tax (single program PPE − standalone): ≈ 48 s. `proof_bytes` is constant: the outer succinct proof has fixed size; the journal carried alongside it scales with public state and is reported separately by `--verify`.
 
-## Verifier (`--verify`)
+## Verifier (criterion bench)
 
-One PPE receipt generated once (auth_transfer Transfer in PPE), then `Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)` measured over 1000 iterations.
+One PPE receipt generated once (auth_transfer Transfer in PPE), then `Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)` measured under criterion's statistical sampler. Bench file: `tools/cycle_bench/benches/verify.rs`. Setup (one full PPE prove) is outside the timed `iter` loop.
 
-| Field | Value |
-|---|---|
-| case | auth_transfer Transfer in PPE |
-| proof_bytes (S_agg) | 223,551 |
-| journal_bytes | 412 |
-| verify_ms (best / mean ± stdev, n=1000) | 11.71 / 12.06 ± 1.99 |
+Numbers from the most recent local run on the machine listed above. Criterion sample_size = 100, measurement_time = 15 s, warm_up_time = 2 s. Slope-regression point estimate in the middle column; 95% CI bounds on either side. Run `cargo bench -p cycle_bench --features ppe --bench verify` to refresh.
+
+| Bench | low | point | high | outliers (mild + severe) |
+|---|---:|---:|---:|---:|
+| ppe/verify_auth_transfer | 12.016 ms | 12.215 ms | 12.469 ms | 1 + 10 |
+
+The corresponding `proof_bytes` (S_agg) for the bench receipt is captured by `--ppe` above; the verify bench itself only times the verify call.
 
 ## Findings
 
 - Proving cost scales with po2-bucketed `total_cycles`, not raw `user_cycles`. Trimming user_cycles only helps if it crosses a 2^N boundary.
 - Single-program PPE composition tax on M2 Pro CPU: ≈ 48 s (61.5 − 13.7).
 - Chained-call cost is linear at ≈ 53 s per call. A max-depth chain (10) would take ≈ 600 s standalone on this CPU.
-- `G_verify` is ≈ 12 ms and roughly constant per outer receipt (1000-iter stdev ≈ 2 ms). The succinct outer proof is fixed at 223,551 bytes (S_agg); verify is not on the latency critical path.
+- `G_verify` is ≈ 12 ms (criterion CI: 12.0–12.5 ms over 100 samples) and roughly constant per outer receipt. The succinct outer proof is fixed at 223,551 bytes (S_agg); verify is not on the latency critical path.
 
 ## Reproduce
 
@@ -87,10 +88,12 @@ One PPE receipt generated once (auth_transfer Transfer in PPE), then `Receipt::v
 cargo run --release -p cycle_bench
 cargo run --release -p cycle_bench --features prove -- --prove
 cargo run --release -p cycle_bench --features ppe -- --prove --ppe
-cargo run --release -p cycle_bench --features ppe -- --verify --verify-iters 1000
+
+# Verifier microbench via criterion:
+cargo bench -p cycle_bench --features ppe --bench verify
 ```
 
-JSON output: `target/cycle_bench.json`.
+JSON output: `target/cycle_bench.json` (bin), `target/criterion/ppe/verify_auth_transfer/` (verify bench).
 
 ## Caveats
 
diff --git a/tools/cycle_bench/Cargo.toml b/tools/cycle_bench/Cargo.toml
index 1a4a9db3..aa30ebc6 100644
--- a/tools/cycle_bench/Cargo.toml
+++ b/tools/cycle_bench/Cargo.toml
@@ -28,3 +28,11 @@ serde.workspace = true
 serde_json.workspace = true
 anyhow.workspace = true
 clap = { workspace = true }
+
+[dev-dependencies]
+criterion = { workspace = true, features = ["html_reports"] }
+
+[[bench]]
+name = "verify"
+harness = false
+required-features = ["ppe"]
diff --git a/tools/cycle_bench/README.md b/tools/cycle_bench/README.md
index 3b416dc8..7f3b3d95 100644
--- a/tools/cycle_bench/README.md
+++ b/tools/cycle_bench/README.md
@@ -4,6 +4,8 @@ Per-program Risc0 cycle counts, prover wall time, PPE composition cost, and veri
 
 ## Run
 
+The binary handles executor cycles, prover wall time, and PPE composition cost:
+
 ```sh
 # Executor cycles only (fast, ~seconds)
 cargo run --release -p cycle_bench
@@ -13,16 +15,30 @@ cargo run --release -p cycle_bench --features prove -- --prove
 
 # + PPE composition cases (very slow, ~hour)
 cargo run --release -p cycle_bench --features ppe -- --prove --ppe
-
-# + verifier microbench (G_verify): generates one PPE receipt, times verify x1000
-cargo run --release -p cycle_bench --features ppe -- --verify --verify-iters 1000
 ```
 
-`RISC0_DEV_MODE=1` skips proving entirely and is only useful for the executor path. Combine flags freely; output is printed to stdout and written to `target/cycle_bench.json` for regression diffs.
+The verifier microbenchmark (`G_verify`) lives in a criterion bench under `benches/verify.rs`:
+
+```sh
+# Generates one PPE receipt for auth_transfer Transfer (~minutes of setup),
+# then times Receipt::verify under criterion's statistical sampler.
+cargo bench -p cycle_bench --features ppe --bench verify
+```
+
+`RISC0_DEV_MODE=1` skips proving entirely and is only useful for the executor path. The bin writes to `target/cycle_bench.json`; criterion writes per-bench estimates under `target/criterion/`.
 
 ## What you'll see
 
 - Per-program executor cycles and segments, plus exec wall time as `best / mean ± stdev (n=N)`.
 - With `--prove`: prover total cycles, paging cycles, segments, and wall time.
-- With `--ppe`: end-to-end `execute_and_prove` wall time and S_agg (the borsh-serialized InnerReceipt length) for one auth-transfer-in-PPE case and a chain-caller depth sweep.
-- With `--verify`: verify wall time `best / mean ± stdev`, plus `proof_bytes` and `journal_bytes`.
+- With `--ppe`: end-to-end `execute_and_prove` wall time and `S_agg` (the borsh-serialized InnerReceipt length) for one auth-transfer-in-PPE case and a chain-caller depth sweep.
+- From the `verify` criterion bench: `ppe/verify_auth_transfer` slope-regression point estimate with 95% CI bounds.
+
+## Baseline comparison (verify bench)
+
+```sh
+# On main:
+cargo bench -p cycle_bench --features ppe --bench verify -- --save-baseline main
+# On your branch:
+cargo bench -p cycle_bench --features ppe --bench verify -- --baseline main
+```
diff --git a/tools/cycle_bench/benches/verify.rs b/tools/cycle_bench/benches/verify.rs
new file mode 100644
index 00000000..d7bdfbe3
--- /dev/null
+++ b/tools/cycle_bench/benches/verify.rs
@@ -0,0 +1,47 @@
+//! Criterion bench for `Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)`.
+//!
+//! Produces the `G_verify` fee-model parameter. Setup: one full PPE prove of an
+//! `auth_transfer` Transfer (minutes, runs once outside the timed loop). Measured
+//! op: `Receipt::verify` over a real PPE receipt.
+//!
+//! Run with: `cargo bench -p cycle_bench --features ppe --bench verify`.
+
+use std::{hint::black_box, time::Duration};
+
+use anyhow::Context as _;
+use criterion::{Criterion, criterion_group, criterion_main};
+use cycle_bench::ppe::prove_auth_transfer_in_ppe;
+use nssa::program_methods::PRIVACY_PRESERVING_CIRCUIT_ID;
+use risc0_zkvm::{InnerReceipt, Receipt};
+
+fn bench_verify(c: &mut Criterion) {
+    let (output, proof) = prove_auth_transfer_in_ppe().expect("prove auth_transfer in PPE");
+    let journal = output.to_bytes();
+    let proof_bytes = proof.into_inner();
+    let inner: InnerReceipt = borsh::from_slice(&proof_bytes)
+        .context("decode InnerReceipt")
+        .expect("InnerReceipt deserialize");
+    let receipt = Receipt::new(inner, journal);
+
+    // Sanity check before the timed loop.
+    receipt
+        .verify(PRIVACY_PRESERVING_CIRCUIT_ID)
+        .expect("verify sanity check");
+
+    let mut g = c.benchmark_group("ppe");
+    g.sample_size(100)
+        .warm_up_time(Duration::from_secs(2))
+        .measurement_time(Duration::from_secs(15))
+        .noise_threshold(0.05);
+    g.bench_function("verify_auth_transfer", |b| {
+        b.iter(|| {
+            receipt
+                .verify(black_box(PRIVACY_PRESERVING_CIRCUIT_ID))
+                .expect("verify failed mid-loop");
+        });
+    });
+    g.finish();
+}
+
+criterion_group!(benches, bench_verify);
+criterion_main!(benches);