From 05f41f81e97973d7ed24209c88f5606baa92dca7 Mon Sep 17 00:00:00 2001 From: Moudy Date: Thu, 14 May 2026 18:14:37 +0200 Subject: [PATCH] feat: add cycle_bench tool for executor, prove, PPE, and verify cycle measurements --- Cargo.toml | 1 + docs/benchmarks/README.md | 11 + docs/benchmarks/cycle_bench.md | 117 ++++++ tools/cycle_bench/Cargo.toml | 29 ++ tools/cycle_bench/README.md | 36 ++ tools/cycle_bench/src/main.rs | 639 +++++++++++++++++++++++++++++++++ tools/cycle_bench/src/ppe.rs | 307 ++++++++++++++++ tools/cycle_bench/src/stats.rs | 54 +++ 8 files changed, 1194 insertions(+) create mode 100644 docs/benchmarks/README.md create mode 100644 docs/benchmarks/cycle_bench.md create mode 100644 tools/cycle_bench/Cargo.toml create mode 100644 tools/cycle_bench/README.md create mode 100644 tools/cycle_bench/src/main.rs create mode 100644 tools/cycle_bench/src/ppe.rs create mode 100644 tools/cycle_bench/src/stats.rs diff --git a/Cargo.toml b/Cargo.toml index 1bce967f..a69a71dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ members = [ "examples/program_deployment/methods/guest", "testnet_initial_state", "indexer/ffi", + "tools/cycle_bench", ] [workspace.dependencies] diff --git a/docs/benchmarks/README.md b/docs/benchmarks/README.md new file mode 100644 index 00000000..18f22225 --- /dev/null +++ b/docs/benchmarks/README.md @@ -0,0 +1,11 @@ +# Benchmarks + +Bench tools live under `tools/` with READMEs for how to run each one. +This directory holds the result write-ups: machine, raw tables, and +short findings. + +| Bench | Doc | +|---|---| +| cycle_bench | [cycle_bench.md](cycle_bench.md) | + +All numbers are from a single M2 Pro dev box unless noted otherwise. diff --git a/docs/benchmarks/cycle_bench.md b/docs/benchmarks/cycle_bench.md new file mode 100644 index 00000000..62db9b1d --- /dev/null +++ b/docs/benchmarks/cycle_bench.md @@ -0,0 +1,117 @@ +# cycle_bench + +Per-program Risc0 cycle counts, prover wall time, PPE composition cost, +and verifier wall time for the built-in LEZ programs. Inputs for the +fee model's `G_executor`, `G_prove`, `G_verify`, and `S_agg` parameters. + +## Machine + +| Field | Value | +|---|---| +| Chip | Apple M2 Pro (8P+4E) | +| RAM | 16 GB | +| OS | macOS 15.5 | +| Rust | 1.94.0 | +| Risc0 zkVM | 3.0.5 | +| Profile | release | +| GPU acceleration | none | + +## Executor cycles + +`SessionInfo::cycles()` per instruction. Deterministic across runs. Wall time +is `best / mean ± stdev` over 5 timed iterations (1 warmup discarded). + +| Program | Instruction | user_cycles | segments | exec_ms (best / mean ± stdev) | +|---|---|---:|---:|---| +| authenticated_transfer | Initialize | 43,642 | 1 | 18.86 / 19.41 ± 0.48 | +| authenticated_transfer | Transfer | 77,095 | 1 | 19.67 / 20.84 ± 1.16 | +| token | Burn | 116,546 | 1 | 24.86 / 25.46 ± 0.63 | +| token | Mint | 116,862 | 1 | 24.47 / 25.08 ± 0.42 | +| token | Transfer | 127,726 | 1 | 25.00 / 25.40 ± 0.29 | +| clock | Tick (no rollups) | 137,022 | 1 | 21.18 / 21.57 ± 0.41 | +| ata | Create | 175,056 | 1 | 23.64 / 24.94 ± 1.09 | +| amm | SwapExactInput | 508,634 | 1 | 34.21 / 34.77 ± 0.55 | +| amm | AddLiquidity | 642,774 | 1 | 37.59 / 37.87 ± 0.28 | + +## Real proving (`--prove`) + +`prover.prove(env, elf)` wall time per program on CPU. `total_cycles` is +`user_cycles` rounded up to the next power of two (Risc0 padding). + +| Program | Instruction | total_cycles | prove_ms | prove_s | +|---|---|---:|---:|---:| +| authenticated_transfer | Initialize | 131,072 | 11,881 | 11.9 | +| authenticated_transfer | Transfer | 131,072 | 13,705 | 13.7 | +| token | Burn | 262,144 | 22,893 | 22.9 | +| token | Mint | 262,144 | 23,927 | 23.9 | +| token | Transfer | 262,144 | 27,178 | 27.2 | +| clock | Tick | 262,144 | 23,486 | 23.5 | +| ata | Create | 262,144 | 21,093 | 21.1 | +| amm | AddLiquidity | 1,048,576 | 111,654 | 111.7 | +| amm | SwapExactInput | 1,048,576 | 126,400 | 126.4 | + +Linear fit across po2 buckets: ≈ 100 µs per total cycle (≈ 10k cycles/s +throughput on this CPU). + +## PPE composition + chain-call sweep (`--ppe`) + +Same `auth_transfer Transfer` instruction, standalone vs wrapped in the +privacy circuit; plus the `chain_caller` test program with N chained +`authenticated_transfer` calls. `proof_bytes` is the borsh-serialized +InnerReceipt (S_agg in the fee model). + +| Case | prove_ms | prove_s | proof_bytes | +|---|---:|---:|---:| +| auth_transfer Transfer standalone | 13,705 | 13.7 | n/a | +| auth_transfer Transfer in PPE | 61,486 | 61.5 | 223,551 | +| chain_caller depth=1 | 122,590 | 122.6 | 223,551 | +| chain_caller depth=3 | 231,974 | 232.0 | 223,551 | +| chain_caller depth=5 | 372,123 | 372.1 | 223,551 | +| chain_caller depth=9 | 544,280 | 544.3 | 223,551 | + +Linear fit depth=1..9: ≈ 53 s per additional chained call, intercept ≈ 73 s. +Composition tax (single program PPE − standalone): ≈ 48 s. `proof_bytes` is +constant: the outer succinct proof has fixed size; the journal carried +alongside it scales with public state and is reported separately by `--verify`. + +## Verifier (`--verify`) + +One PPE receipt generated once (auth_transfer Transfer in PPE), then +`Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)` measured over 1000 iterations. + +| Field | Value | +|---|---| +| case | auth_transfer Transfer in PPE | +| proof_bytes (S_agg) | 223,551 | +| journal_bytes | 412 | +| verify_ms (best / mean ± stdev, n=1000) | 11.71 / 12.06 ± 1.99 | + +## Findings + +- Proving cost scales with po2-bucketed `total_cycles`, not raw `user_cycles`. + Trimming user_cycles only helps if it crosses a 2^N boundary. +- Single-program PPE composition tax on M2 Pro CPU: ≈ 48 s (61.5 − 13.7). +- Chained-call cost is linear at ≈ 53 s per call. A max-depth chain (10) would + take ≈ 600 s standalone on this CPU. +- `G_verify` is ≈ 12 ms and roughly constant per outer receipt (1000-iter + stdev ≈ 2 ms). The succinct outer proof is fixed at 223,551 bytes (S_agg); + verify is not on the latency critical path. + +## Reproduce + +```sh +cargo run --release -p cycle_bench +cargo run --release -p cycle_bench --features prove -- --prove +cargo run --release -p cycle_bench --features ppe -- --prove --ppe +cargo run --release -p cycle_bench --features ppe -- --verify --verify-iters 1000 +``` + +JSON output: `target/cycle_bench.json`. + +## Caveats + +- CPU-only proving on a dev laptop. Production prover hardware (GPU, + specialised CPU pipelines) will produce much smaller numbers; relative + ordering should be preserved. +- Single-segment cases only; multi-segment programs would pay continuation + overhead not measured here. diff --git a/tools/cycle_bench/Cargo.toml b/tools/cycle_bench/Cargo.toml new file mode 100644 index 00000000..6847b0c5 --- /dev/null +++ b/tools/cycle_bench/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "cycle_bench" +version = "0.1.0" +edition = "2024" +license = { workspace = true } +publish = false + +[lints] +workspace = true + +[features] +default = [] +prove = ["nssa/prove", "risc0-zkvm/prove"] +ppe = ["prove"] + +[dependencies] +nssa = { workspace = true } +nssa_core = { workspace = true, features = ["host"] } +clock_core.workspace = true +token_core.workspace = true +amm_core.workspace = true +ata_core.workspace = true + +risc0-zkvm.workspace = true +borsh.workspace = true +serde.workspace = true +serde_json.workspace = true +anyhow.workspace = true +clap = { workspace = true } diff --git a/tools/cycle_bench/README.md b/tools/cycle_bench/README.md new file mode 100644 index 00000000..2bc5462f --- /dev/null +++ b/tools/cycle_bench/README.md @@ -0,0 +1,36 @@ +# cycle_bench + +Per-program Risc0 cycle counts, prover wall time, PPE composition cost, and +verifier wall time for the built-in LEZ programs. Feeds the fee model +(`G_executor`, `G_prove`, `G_verify`, `S_agg`). + +## Run + +```sh +# Executor cycles only (fast, ~seconds) +cargo run --release -p cycle_bench + +# + real proving per program (slow, ~minutes) +cargo run --release -p cycle_bench --features prove -- --prove + +# + PPE composition cases (very slow, ~hour) +cargo run --release -p cycle_bench --features ppe -- --prove --ppe + +# + verifier microbench (G_verify): generates one PPE receipt, times verify x1000 +cargo run --release -p cycle_bench --features ppe -- --verify --verify-iters 1000 +``` + +`RISC0_DEV_MODE=1` skips proving entirely and is only useful for the executor path. +Combine flags freely; output is printed to stdout and written to +`target/cycle_bench.json` for regression diffs. + +## What you'll see + +- Per-program executor cycles and segments, plus exec wall time as + `best / mean ± stdev (n=N)`. +- With `--prove`: prover total cycles, paging cycles, segments, and wall time. +- With `--ppe`: end-to-end `execute_and_prove` wall time and S_agg + (the borsh-serialized InnerReceipt length) for one auth-transfer-in-PPE + case and a chain-caller depth sweep. +- With `--verify`: verify wall time `best / mean ± stdev`, plus + `proof_bytes` and `journal_bytes`. diff --git a/tools/cycle_bench/src/main.rs b/tools/cycle_bench/src/main.rs new file mode 100644 index 00000000..6b4c0e57 --- /dev/null +++ b/tools/cycle_bench/src/main.rs @@ -0,0 +1,639 @@ +//! Measures Risc0 user cycles per built-in program instruction. +//! +//! Runs each guest ELF through the Risc0 executor (no proving) with realistic inputs +//! drawn from the existing per-program unit tests, then prints a table and writes a +//! JSON dump for regression comparison. +//! +//! Run with `cargo run --release -p cycle_bench`. `RISC0_DEV_MODE` has no effect on +//! executor cycle counts. + +#![allow( + clippy::arithmetic_side_effects, + clippy::print_stdout, + clippy::print_stderr, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + reason = "Bench tool: matches test-style fixture code" +)] + +use std::{path::PathBuf, time::Instant}; + +mod ppe; +mod stats; + +use stats::Stats; + +use amm_core::{ + PoolDefinition, compute_liquidity_token_pda, compute_pool_pda, compute_vault_pda, +}; +use anyhow::Result; +use ata_core::{compute_ata_seed, get_associated_token_account_id}; +use clap::Parser; +use clock_core::{ + CLOCK_01_PROGRAM_ACCOUNT_ID, CLOCK_10_PROGRAM_ACCOUNT_ID, CLOCK_50_PROGRAM_ACCOUNT_ID, + ClockAccountData, +}; +use nssa::program_methods::{ + AMM_ELF, ASSOCIATED_TOKEN_ACCOUNT_ELF, AUTHENTICATED_TRANSFER_ELF, CLOCK_ELF, TOKEN_ELF, +}; +use nssa_core::{ + Timestamp, + account::{Account, AccountId, AccountWithMetadata, Data}, + program::{InstructionData, ProgramId}, +}; +use risc0_zkvm::{ExecutorEnv, default_executor, default_prover}; +use serde::Serialize; +use token_core::{TokenDefinition, TokenHolding}; + +#[derive(Parser, Debug)] +#[command(about = "Per-program executor and (optionally) prover cycle measurements")] +struct Cli { + /// Also run prover.prove for each case and report wall time + cycles. Slow. + #[arg(long)] + prove: bool, + + /// Also run privacy-preserving execution circuit (PPE) composition cases: + /// (a) single auth_transfer Transfer through `execute_and_prove`, (b) chain_caller + /// with depth N=1,3,5,9. Requires --features ppe at build time. Very slow. + #[arg(long)] + ppe: bool, + + /// After running --ppe-style proving once for auth_transfer-in-PPE, time + /// receipt.verify(PRIVACY_PRESERVING_CIRCUIT_ID) over many iterations. + /// Produces G_verify for the fee model. Requires --features ppe. + #[arg(long)] + verify: bool, + + /// Iterations for --verify. Default matches the fee-model handoff target. + #[arg(long, default_value_t = 1000)] + verify_iters: usize, + + /// Iterations for executor wall-time sampling per case. First iter is + /// discarded as warmup, remaining N feed the stats. + #[arg(long, default_value_t = 5)] + exec_iters: usize, +} + +const AMM_PROGRAM_ID: ProgramId = [42; 8]; +const TOKEN_PROGRAM_ID: ProgramId = [15; 8]; +const ATA_PROGRAM_ID: ProgramId = [88; 8]; +const CLOCK_PROGRAM_ID: ProgramId = [13; 8]; +const AUTH_TRANSFER_PROGRAM_ID: ProgramId = [7; 8]; + +#[derive(Debug, Serialize)] +struct BenchResult { + program: &'static str, + instruction: &'static str, + user_cycles: u64, + segments: usize, + exec_stats: Stats, + /// Stats over prover.prove(env, elf) wall-clock samples. Only populated when --prove is set. + /// Single-sample (n=1) when --prove is on without explicit repetition, since proving is slow. + prove_stats: Option, + /// Total cycles (with continuation overhead, paging, po2 padding) from ProveInfo.stats. + prove_total_cycles: Option, + /// User cycles from ProveInfo.stats (should match executor cycles). + prove_user_cycles: Option, + /// Paging cycles from ProveInfo.stats. + prove_paging_cycles: Option, + /// Segments from ProveInfo.stats. + prove_segments: Option, +} + +fn run_case( + program: &'static str, + instruction_label: &'static str, + elf: &[u8], + self_program_id: ProgramId, + pre_states: Vec, + instruction: &I, + prove: bool, + exec_iters: usize, +) -> Result { + let caller_program_id: Option = None; + let instruction_words: InstructionData = risc0_zkvm::serde::to_vec(instruction)?; + + // One warmup pass discarded, then `exec_iters` samples. The executor has + // large per-call setup overhead (ELF parsing, env init); reporting both + // best-of-N and mean ± stdev shows whether jitter is significant. + let mut samples: Vec = Vec::with_capacity(exec_iters); + let mut last_info = None; + let total = exec_iters.saturating_add(1).max(2); + for iter in 0..total { + let mut env_builder = ExecutorEnv::builder(); + env_builder + .write(&self_program_id)? + .write(&caller_program_id)? + .write(&pre_states)? + .write(&instruction_words)?; + let env = env_builder.build()?; + + let started = Instant::now(); + let info = default_executor().execute(env, elf)?; + let elapsed_ms = started.elapsed().as_secs_f64() * 1_000.0; + + if iter > 0 { + samples.push(elapsed_ms); + } + last_info = Some(info); + } + let info = last_info.expect("at least one iteration"); + let exec_stats = Stats::from_samples(&samples); + + let mut prove_stats = None; + let mut prove_total_cycles = None; + let mut prove_user_cycles = None; + let mut prove_paging_cycles = None; + let mut prove_segments = None; + if prove { + let mut env_builder = ExecutorEnv::builder(); + env_builder + .write(&self_program_id)? + .write(&caller_program_id)? + .write(&pre_states)? + .write(&instruction_words)?; + let env = env_builder.build()?; + + let started = Instant::now(); + let prove_info = default_prover() + .prove(env, elf) + .map_err(|e| anyhow::anyhow!("prove failed: {e}"))?; + let prove_ms = started.elapsed().as_secs_f64() * 1_000.0; + prove_stats = Some(Stats::from_samples(&[prove_ms])); + prove_total_cycles = Some(prove_info.stats.total_cycles); + prove_user_cycles = Some(prove_info.stats.user_cycles); + prove_paging_cycles = Some(prove_info.stats.paging_cycles); + prove_segments = Some(prove_info.stats.segments); + eprintln!( + " prove({program}/{instruction_label}): {prove_ms:.1} ms ({:.1}s), total_cycles={}, segments={}", + prove_ms / 1_000.0, prove_info.stats.total_cycles, prove_info.stats.segments, + ); + } + + Ok(BenchResult { + program, + instruction: instruction_label, + user_cycles: info.cycles(), + segments: info.segments.len(), + exec_stats, + prove_stats, + prove_total_cycles, + prove_user_cycles, + prove_paging_cycles, + prove_segments, + }) +} + +fn authenticated_transfer_init() -> Vec { + vec![AccountWithMetadata { + account: Account::default(), + is_authorized: true, + account_id: AccountId::new([1; 32]), + }] +} + +fn authenticated_transfer_transfer() -> Vec { + let sender = AccountWithMetadata { + account: Account { + balance: 1_000_000, + ..Account::default() + }, + is_authorized: true, + account_id: AccountId::new([1; 32]), + }; + let recipient = AccountWithMetadata { + account: Account::default(), + is_authorized: false, + account_id: AccountId::new([2; 32]), + }; + vec![sender, recipient] +} + +fn token_holding( + definition_id: AccountId, + account_id: AccountId, + balance: u128, + is_authorized: bool, +) -> AccountWithMetadata { + AccountWithMetadata { + account: Account { + program_owner: TOKEN_PROGRAM_ID, + balance: 0, + data: Data::from(&TokenHolding::Fungible { + definition_id, + balance, + }), + nonce: 0_u128.into(), + }, + is_authorized, + account_id, + } +} + +fn token_definition( + account_id: AccountId, + total_supply: u128, + is_authorized: bool, +) -> AccountWithMetadata { + AccountWithMetadata { + account: Account { + program_owner: TOKEN_PROGRAM_ID, + balance: 0, + data: Data::from(&TokenDefinition::Fungible { + name: String::from("test"), + total_supply, + metadata_id: None, + }), + nonce: 0_u128.into(), + }, + is_authorized, + account_id, + } +} + +fn token_transfer_pre_states() -> Vec { + let def = AccountId::new([15; 32]); + let sender = token_holding(def, AccountId::new([17; 32]), 100_000, true); + let recipient = token_holding(def, AccountId::new([42; 32]), 50_000, true); + vec![sender, recipient] +} + +fn token_mint_pre_states() -> Vec { + let def_id = AccountId::new([15; 32]); + let def = token_definition(def_id, 100_000, true); + let holding = token_holding(def_id, AccountId::new([17; 32]), 1_000, true); + vec![def, holding] +} + +fn token_burn_pre_states() -> Vec { + let def_id = AccountId::new([15; 32]); + let def = token_definition(def_id, 100_000, true); + let holding = token_holding(def_id, AccountId::new([17; 32]), 1_000, true); + vec![def, holding] +} + +fn clock_account(account_id: AccountId, block_id: u64) -> AccountWithMetadata { + AccountWithMetadata { + account: Account { + program_owner: CLOCK_PROGRAM_ID, + balance: 0, + data: ClockAccountData { + block_id, + timestamp: Timestamp::from(0_u64), + } + .to_bytes() + .try_into() + .expect("ClockAccountData should fit in account data"), + nonce: 0_u128.into(), + }, + is_authorized: false, + account_id, + } +} + +fn clock_pre_states_tick_at(block_id: u64) -> Vec { + vec![ + clock_account(CLOCK_01_PROGRAM_ACCOUNT_ID, block_id), + clock_account(CLOCK_10_PROGRAM_ACCOUNT_ID, block_id), + clock_account(CLOCK_50_PROGRAM_ACCOUNT_ID, block_id), + ] +} + +fn amm_token_a_def_id() -> AccountId { + AccountId::new([42; 32]) +} +fn amm_token_b_def_id() -> AccountId { + AccountId::new([43; 32]) +} +fn amm_pool_id() -> AccountId { + compute_pool_pda(AMM_PROGRAM_ID, amm_token_a_def_id(), amm_token_b_def_id()) +} +fn amm_vault_a_id() -> AccountId { + compute_vault_pda(AMM_PROGRAM_ID, amm_pool_id(), amm_token_a_def_id()) +} +fn amm_vault_b_id() -> AccountId { + compute_vault_pda(AMM_PROGRAM_ID, amm_pool_id(), amm_token_b_def_id()) +} +fn amm_lp_def_id() -> AccountId { + compute_liquidity_token_pda(AMM_PROGRAM_ID, amm_pool_id()) +} + +/// Pool seeded with reserves 1_000 / 500, lp supply sqrt(1000*500) = 707. +fn amm_pool_account() -> AccountWithMetadata { + let reserve_a: u128 = 1_000; + let reserve_b: u128 = 500; + let lp_supply: u128 = (reserve_a * reserve_b).isqrt(); + AccountWithMetadata { + account: Account { + program_owner: AMM_PROGRAM_ID, + balance: 0, + data: Data::from(&PoolDefinition { + definition_token_a_id: amm_token_a_def_id(), + definition_token_b_id: amm_token_b_def_id(), + vault_a_id: amm_vault_a_id(), + vault_b_id: amm_vault_b_id(), + liquidity_pool_id: amm_lp_def_id(), + liquidity_pool_supply: lp_supply, + reserve_a, + reserve_b, + fees: 0, + active: true, + }), + nonce: 0_u128.into(), + }, + is_authorized: true, + account_id: amm_pool_id(), + } +} + +fn amm_swap_pre_states() -> Vec { + let pool = amm_pool_account(); + let vault_a = token_holding(amm_token_a_def_id(), amm_vault_a_id(), 1_000, true); + let vault_b = token_holding(amm_token_b_def_id(), amm_vault_b_id(), 500, true); + let user_a = token_holding(amm_token_a_def_id(), AccountId::new([45; 32]), 1_000, true); + let user_b = token_holding(amm_token_b_def_id(), AccountId::new([46; 32]), 500, false); + vec![pool, vault_a, vault_b, user_a, user_b] +} + +fn amm_add_liquidity_pre_states() -> Vec { + let pool = amm_pool_account(); + let vault_a = token_holding(amm_token_a_def_id(), amm_vault_a_id(), 1_000, true); + let vault_b = token_holding(amm_token_b_def_id(), amm_vault_b_id(), 500, true); + let lp_supply: u128 = (1_000_u128 * 500_u128).isqrt(); + let lp_def = token_definition(amm_lp_def_id(), lp_supply, true); + let user_a = token_holding(amm_token_a_def_id(), AccountId::new([45; 32]), 1_000, true); + let user_b = token_holding(amm_token_b_def_id(), AccountId::new([46; 32]), 500, true); + let user_lp = token_holding(amm_lp_def_id(), AccountId::new([47; 32]), 0, true); + vec![pool, vault_a, vault_b, lp_def, user_a, user_b, user_lp] +} + +fn ata_create_pre_states() -> Vec { + let owner_id = AccountId::new([91; 32]); + let definition_id = AccountId::new([15; 32]); + let owner = AccountWithMetadata { + account: Account::default(), + is_authorized: true, + account_id: owner_id, + }; + let token_def = token_definition(definition_id, 100_000, false); + let seed = compute_ata_seed(owner_id, definition_id); + let ata_id = get_associated_token_account_id(&ATA_PROGRAM_ID, &seed); + let ata_account = AccountWithMetadata { + account: Account::default(), + is_authorized: false, + account_id: ata_id, + }; + vec![owner, token_def, ata_account] +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + let prove = cli.prove; + let exec_iters = cli.exec_iters.max(1); + if prove { + eprintln!("cycle_bench: prove mode ON, this will be slow (~minutes per program)"); + } + + let mut results: Vec = Vec::new(); + + let transfer_amount: u128 = 5_000; + results.push(run_case( + "authenticated_transfer", + "Transfer", + AUTHENTICATED_TRANSFER_ELF, + AUTH_TRANSFER_PROGRAM_ID, + authenticated_transfer_transfer(), + &transfer_amount, + prove, + exec_iters, + )?); + let init_amount: u128 = 0; + results.push(run_case( + "authenticated_transfer", + "Initialize", + AUTHENTICATED_TRANSFER_ELF, + AUTH_TRANSFER_PROGRAM_ID, + authenticated_transfer_init(), + &init_amount, + prove, + exec_iters, + )?); + + results.push(run_case( + "token", + "Transfer", + TOKEN_ELF, + TOKEN_PROGRAM_ID, + token_transfer_pre_states(), + &token_core::Instruction::Transfer { + amount_to_transfer: 5_000, + }, + prove, + exec_iters, + )?); + results.push(run_case( + "token", + "Mint", + TOKEN_ELF, + TOKEN_PROGRAM_ID, + token_mint_pre_states(), + &token_core::Instruction::Mint { + amount_to_mint: 5_000, + }, + prove, + exec_iters, + )?); + results.push(run_case( + "token", + "Burn", + TOKEN_ELF, + TOKEN_PROGRAM_ID, + token_burn_pre_states(), + &token_core::Instruction::Burn { + amount_to_burn: 500, + }, + prove, + exec_iters, + )?); + + let clock_timestamp = Timestamp::from(1_700_000_000_u64); + results.push(run_case( + "clock", + "Tick (block_id+1, no multiples)", + CLOCK_ELF, + CLOCK_PROGRAM_ID, + clock_pre_states_tick_at(0), + &clock_timestamp, + prove, + exec_iters, + )?); + + results.push(run_case( + "amm", + "SwapExactInput", + AMM_ELF, + AMM_PROGRAM_ID, + amm_swap_pre_states(), + &amm_core::Instruction::SwapExactInput { + swap_amount_in: 200, + min_amount_out: 1, + token_definition_id_in: amm_token_a_def_id(), + }, + prove, + exec_iters, + )?); + results.push(run_case( + "amm", + "AddLiquidity", + AMM_ELF, + AMM_PROGRAM_ID, + amm_add_liquidity_pre_states(), + &amm_core::Instruction::AddLiquidity { + min_amount_liquidity: 1, + max_amount_to_add_token_a: 400, + max_amount_to_add_token_b: 200, + }, + prove, + exec_iters, + )?); + + results.push(run_case( + "ata", + "Create", + ASSOCIATED_TOKEN_ACCOUNT_ELF, + ATA_PROGRAM_ID, + ata_create_pre_states(), + &ata_core::Instruction::Create { + ata_program_id: ATA_PROGRAM_ID, + }, + prove, + exec_iters, + )?); + + print_table(&results, prove); + + #[cfg(feature = "ppe")] + let ppe_results = if cli.ppe { + ppe::run_all()? + } else { + Vec::new() + }; + #[cfg(not(feature = "ppe"))] + let ppe_results: Vec = { + if cli.ppe { + eprintln!("cycle_bench: --ppe requires --features ppe at build time. Ignoring."); + } + Vec::new() + }; + if !ppe_results.is_empty() { + ppe::print_table(&ppe_results); + } + + #[cfg(feature = "ppe")] + let verify_result = if cli.verify { + Some(ppe::run_verify(cli.verify_iters)?) + } else { + None + }; + #[cfg(not(feature = "ppe"))] + let verify_result: Option = { + if cli.verify { + eprintln!("cycle_bench: --verify requires --features ppe at build time. Ignoring."); + } + None + }; + if let Some(ref vr) = verify_result { + ppe::print_verify(vr); + } + + let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .canonicalize()?; + let out_path = workspace_root.join("target").join("cycle_bench.json"); + if let Some(parent) = out_path.parent() { + std::fs::create_dir_all(parent)?; + } + let combined = serde_json::json!({ + "standalone": results, + "ppe": ppe_results, + "verify": verify_result, + }); + std::fs::write(&out_path, serde_json::to_string_pretty(&combined)?)?; + println!("\nJSON written to {}", out_path.display()); + + Ok(()) +} + +fn print_table(results: &[BenchResult], prove: bool) { + let pw = results + .iter() + .map(|r| r.program.len()) + .max() + .unwrap_or(0) + .max("program".len()); + let iw = results + .iter() + .map(|r| r.instruction.len()) + .max() + .unwrap_or(0) + .max("instruction".len()); + let cw = 12_usize; + let sw = 8_usize; + let exec_w = results + .iter() + .map(|r| r.exec_stats.format().len()) + .max() + .unwrap_or(0) + .max("exec_ms (best / mean ± stdev)".len()); + + println!( + "{:cw$} {:>sw$} {:cw$} {:>sw$} {:pcw$} {:>pwallw$} {:>psw$}", + "program", "instruction", "prove_total_c", "prove_ms (s)", "prove_segs", + ); + println!("{}", "-".repeat(pw + iw + pcw + pwallw + psw + 8)); + for r in results { + let total = r + .prove_total_cycles + .map(|c| c.to_string()) + .unwrap_or_else(|| "-".to_owned()); + let pms = r + .prove_stats + .map(|s| format!("{:.1} ({:.1}s)", s.best_ms, s.best_ms / 1_000.0)) + .unwrap_or_else(|| "-".to_owned()); + let psegs = r + .prove_segments + .map(|s| s.to_string()) + .unwrap_or_else(|| "-".to_owned()); + println!( + "{:pcw$} {:>pwallw$} {:>psw$}", + r.program, r.instruction, total, pms, psegs, + ); + } + } +} diff --git a/tools/cycle_bench/src/ppe.rs b/tools/cycle_bench/src/ppe.rs new file mode 100644 index 00000000..f83f38bd --- /dev/null +++ b/tools/cycle_bench/src/ppe.rs @@ -0,0 +1,307 @@ +//! Privacy-preserving execution (PPE) cases for cycle_bench. +//! +//! Composition cost is the delta between standalone `prover.prove(env, elf)` for +//! a single program (measured in the main bench) and a full `execute_and_prove` +//! that wraps the same program in the privacy circuit. Chained-call depth sweep +//! uses the `chain_caller` test program (loaded from artifacts/) with N=1, 3, 5, 9. +//! +//! `run_verify` produces G_verify for the fee model: it generates one PPE +//! receipt (auth_transfer Transfer in PPE) and times `Receipt::verify` over +//! `iters` iterations. The proof bytes captured here are also the on-wire +//! "outer proof" payload (S_agg in the fee model). + +#![allow( + dead_code, + reason = "Stubs are used when the `ppe` feature is disabled." +)] + +use anyhow::Result; +use serde::Serialize; + +use crate::stats::Stats; + +#[derive(Debug, Serialize, Clone)] +pub struct PpeBenchResult { + pub label: String, + pub chain_depth: usize, + pub prove_wall_ms: Option, + /// borsh-serialized InnerReceipt length (S_agg in the fee model). + pub proof_bytes: Option, + pub error: Option, +} + +#[derive(Debug, Serialize, Clone)] +pub struct VerifyBenchResult { + pub label: String, + pub stats: Stats, + pub proof_bytes: usize, + pub journal_bytes: usize, +} + +#[cfg(not(feature = "ppe"))] +pub fn run_all() -> Result> { + Ok(Vec::new()) +} + +#[cfg(feature = "ppe")] +pub fn run_all() -> Result> { + let mut results = Vec::new(); + + eprintln!("PPE: running composition cost (auth_transfer Transfer in PPE)"); + results.push(ppe_impl::run_auth_transfer_in_ppe()); + + for depth in [1_u32, 3, 5, 9] { + eprintln!("PPE: running chain_caller depth={depth}"); + results.push(ppe_impl::run_chain_caller(depth)); + } + + Ok(results) +} + +#[cfg(not(feature = "ppe"))] +pub fn run_verify(_iters: usize) -> Result { + anyhow::bail!("--verify requires --features ppe at build time") +} + +#[cfg(feature = "ppe")] +pub fn run_verify(iters: usize) -> Result { + ppe_impl::run_verify(iters) +} + +pub fn print_table(results: &[PpeBenchResult]) { + let lw = results + .iter() + .map(|r| r.label.len()) + .max() + .unwrap_or(0) + .max("label".len()); + + println!( + "\n{:5} {:>20} {:>12} {}", + "label", "depth", "prove_ms (s)", "proof_bytes", "error", + lw = lw, + ); + println!("{}", "-".repeat(lw + 60)); + for r in results { + let p = r + .prove_wall_ms + .map(|v| format!("{v:.1} ({:.1}s)", v / 1_000.0)) + .unwrap_or_else(|| "-".to_owned()); + let b = r + .proof_bytes + .map(|n| n.to_string()) + .unwrap_or_else(|| "-".to_owned()); + let e = r.error.as_deref().unwrap_or(""); + println!( + "{:5} {:>20} {:>12} {}", + r.label, r.chain_depth, p, b, e, + lw = lw, + ); + } +} + +pub fn print_verify(r: &VerifyBenchResult) { + println!("\nVerify (G_verify):"); + println!(" case : {}", r.label); + println!(" proof_bytes : {} (borsh InnerReceipt, S_agg)", r.proof_bytes); + println!(" journal_bytes : {}", r.journal_bytes); + println!(" verify_ms : {}", r.stats.format()); +} + +#[cfg(feature = "ppe")] +mod ppe_impl { + use std::{collections::HashMap, time::Instant}; + + use nssa::{ + execute_and_prove, + privacy_preserving_transaction::circuit::{Proof, ProgramWithDependencies}, + program::Program, + program_methods::PRIVACY_PRESERVING_CIRCUIT_ID, + }; + use nssa_core::{ + InputAccountIdentity, PrivacyPreservingCircuitOutput, + account::{Account, AccountId, AccountWithMetadata}, + program::ProgramId, + }; + use risc0_zkvm::{InnerReceipt, Receipt, serde::to_vec}; + + use super::{PpeBenchResult, VerifyBenchResult}; + use crate::stats::Stats; + + const AUTH_TRANSFER_ID: ProgramId = + nssa::program_methods::AUTHENTICATED_TRANSFER_ID; + const AUTH_TRANSFER_ELF: &[u8] = nssa::program_methods::AUTHENTICATED_TRANSFER_ELF; + + /// chain_caller bytecode shipped at artifacts/test_program_methods/chain_caller.bin. + /// Loaded at compile time so we don't need a dev-dependency on test_program_methods. + const CHAIN_CALLER_ELF: &[u8] = + include_bytes!("../../../artifacts/test_program_methods/chain_caller.bin"); + + pub fn run_auth_transfer_in_ppe() -> PpeBenchResult { + let label = "auth_transfer Transfer in PPE".to_owned(); + let started = Instant::now(); + match prove_auth_transfer_in_ppe() { + Ok((_out, proof)) => { + let prove_ms = started.elapsed().as_secs_f64() * 1_000.0; + PpeBenchResult { + label, + chain_depth: 0, + prove_wall_ms: Some(prove_ms), + proof_bytes: Some(proof.into_inner().len()), + error: None, + } + } + Err(err) => PpeBenchResult { + label, + chain_depth: 0, + prove_wall_ms: None, + proof_bytes: None, + error: Some(err.to_string()), + }, + } + } + + fn prove_auth_transfer_in_ppe() + -> anyhow::Result<(PrivacyPreservingCircuitOutput, Proof)> { + let program = Program::new(AUTH_TRANSFER_ELF.to_vec())?; + let pwd = ProgramWithDependencies::from(program); + + // For PPE to allow the sender's balance to be decremented by this + // program, the sender must already be claimed by auth_transfer. + // Recipient stays default-owned so the first call can claim it. + let sender = AccountWithMetadata { + account: Account { + program_owner: AUTH_TRANSFER_ID, + balance: 1_000_000, + ..Account::default() + }, + is_authorized: true, + account_id: AccountId::new([1; 32]), + }; + let recipient = AccountWithMetadata { + account: Account::default(), + is_authorized: true, + account_id: AccountId::new([2; 32]), + }; + let pre_states = vec![sender, recipient]; + + let balance_to_move: u128 = 5_000; + let instruction_data = to_vec(&balance_to_move)?; + + let account_identities = vec![InputAccountIdentity::Public; pre_states.len()]; + + Ok(execute_and_prove( + pre_states, + instruction_data, + account_identities, + &pwd, + )?) + } + + pub fn run_chain_caller(depth: u32) -> PpeBenchResult { + let label = format!("chain_caller depth={depth}"); + let started = Instant::now(); + match prove_chain_caller(depth) { + Ok((_out, proof)) => { + let prove_ms = started.elapsed().as_secs_f64() * 1_000.0; + PpeBenchResult { + label, + chain_depth: depth as usize, + prove_wall_ms: Some(prove_ms), + proof_bytes: Some(proof.into_inner().len()), + error: None, + } + } + Err(err) => PpeBenchResult { + label, + chain_depth: depth as usize, + prove_wall_ms: None, + proof_bytes: None, + error: Some(err.to_string()), + }, + } + } + + fn prove_chain_caller( + num_chain_calls: u32, + ) -> anyhow::Result<(PrivacyPreservingCircuitOutput, Proof)> { + let chain_caller = Program::new(CHAIN_CALLER_ELF.to_vec())?; + let auth_transfer = Program::new(AUTH_TRANSFER_ELF.to_vec())?; + let mut deps = HashMap::new(); + deps.insert(AUTH_TRANSFER_ID, auth_transfer); + let pwd = ProgramWithDependencies::new(chain_caller, deps); + + // Both accounts pre-claimed by auth_transfer. chain_caller doesn't + // track recipient's post-claim program_owner, so a default recipient + // would cause a state mismatch on subsequent chained calls. + let recipient_pre = AccountWithMetadata { + account: Account { + program_owner: AUTH_TRANSFER_ID, + ..Account::default() + }, + is_authorized: true, + account_id: AccountId::new([2; 32]), + }; + let sender_pre = AccountWithMetadata { + account: Account { + program_owner: AUTH_TRANSFER_ID, + balance: 1_000_000, + ..Account::default() + }, + is_authorized: true, + account_id: AccountId::new([1; 32]), + }; + // chain_caller expects pre_states = [recipient, sender]. + let pre_states = vec![recipient_pre, sender_pre]; + + let balance: u128 = 1; + let pda_seed: Option = None; + let instruction = (balance, AUTH_TRANSFER_ID, num_chain_calls, pda_seed); + let instruction_data = to_vec(&instruction)?; + + let account_identities = vec![InputAccountIdentity::Public; pre_states.len()]; + + Ok(execute_and_prove( + pre_states, + instruction_data, + account_identities, + &pwd, + )?) + } + + pub fn run_verify(iters: usize) -> anyhow::Result { + eprintln!("verify: generating PPE receipt for auth_transfer Transfer (~1 prove)"); + let (output, proof) = prove_auth_transfer_in_ppe()?; + let journal = output.to_bytes(); + let journal_bytes = journal.len(); + let proof_bytes_vec = proof.into_inner(); + let proof_bytes = proof_bytes_vec.len(); + + let inner: InnerReceipt = borsh::from_slice(&proof_bytes_vec) + .map_err(|e| anyhow::anyhow!("InnerReceipt deserialize: {e}"))?; + let receipt = Receipt::new(inner, journal); + + // Sanity-check before the timing loop so we don't measure 1000 failures. + receipt + .verify(PRIVACY_PRESERVING_CIRCUIT_ID) + .map_err(|e| anyhow::anyhow!("verify sanity check failed: {e}"))?; + + eprintln!("verify: timing {iters} iters of receipt.verify(...)"); + let mut samples = Vec::with_capacity(iters); + for _ in 0..iters { + let started = Instant::now(); + receipt + .verify(PRIVACY_PRESERVING_CIRCUIT_ID) + .map_err(|e| anyhow::anyhow!("verify failed mid-loop: {e}"))?; + samples.push(started.elapsed().as_secs_f64() * 1_000.0); + } + let stats = Stats::from_samples(&samples); + + Ok(VerifyBenchResult { + label: "auth_transfer Transfer in PPE".to_owned(), + stats, + proof_bytes, + journal_bytes, + }) + } +} diff --git a/tools/cycle_bench/src/stats.rs b/tools/cycle_bench/src/stats.rs new file mode 100644 index 00000000..b1e45b56 --- /dev/null +++ b/tools/cycle_bench/src/stats.rs @@ -0,0 +1,54 @@ +//! Small helper for best / mean / stdev over wall-time samples. +//! +//! We report both best-of-N (the figure that strips OS noise and matches what most +//! bench READMEs print) and mean +/- stdev (the figure the fee model wants, since +//! it cares about the steady-state cost not a single fastest sample). + +use serde::Serialize; + +#[derive(Debug, Serialize, Clone, Copy, Default)] +pub struct Stats { + pub n: usize, + pub best_ms: f64, + pub mean_ms: f64, + pub stdev_ms: f64, +} + +impl Stats { + pub fn from_samples(samples: &[f64]) -> Self { + let n = samples.len(); + if n == 0 { + return Self::default(); + } + let best_ms = samples.iter().copied().fold(f64::INFINITY, f64::min); + let sum: f64 = samples.iter().sum(); + let mean_ms = sum / n as f64; + let stdev_ms = if n > 1 { + let var: f64 = samples + .iter() + .map(|s| { + let d = s - mean_ms; + d * d + }) + .sum::() + / (n - 1) as f64; + var.sqrt() + } else { + 0.0 + }; + Self { + n, + best_ms, + mean_ms, + stdev_ms, + } + } + + /// Format as `best / mean ± stdev (n=N)` for table display. + pub fn format(&self) -> String { + format!( + "{:.2} / {:.2} ± {:.2} (n={})", + self.best_ms, self.mean_ms, self.stdev_ms, self.n, + ) + } +}