diff --git a/Cargo.lock b/Cargo.lock index 195a5087..52a11f00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1988,6 +1988,24 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "cycle_bench" +version = "0.1.0" +dependencies = [ + "amm_core", + "anyhow", + "ata_core", + "borsh", + "clap", + "clock_core", + "nssa", + "nssa_core", + "risc0-zkvm", + "serde", + "serde_json", + "token_core", +] + [[package]] name = "darling" version = "0.20.11" diff --git a/Cargo.toml b/Cargo.toml index 724f53e0..256e633d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ members = [ "examples/program_deployment/methods/guest", "testnet_initial_state", "indexer/ffi", + "tools/cycle_bench", ] [workspace.dependencies] diff --git a/docs/benchmarks/README.md b/docs/benchmarks/README.md new file mode 100644 index 00000000..269272b0 --- /dev/null +++ b/docs/benchmarks/README.md @@ -0,0 +1,9 @@ +# Benchmarks + +Bench tools live under `tools/` with READMEs for how to run each one. This directory holds the result write-ups: machine, raw tables, and short findings. + +| Bench | Doc | +|---|---| +| cycle_bench | [cycle_bench.md](cycle_bench.md) | + +All numbers are from a single M2 Pro dev box unless noted otherwise. diff --git a/docs/benchmarks/cycle_bench.md b/docs/benchmarks/cycle_bench.md new file mode 100644 index 00000000..fca9f12c --- /dev/null +++ b/docs/benchmarks/cycle_bench.md @@ -0,0 +1,98 @@ +# cycle_bench + +Per-program Risc0 cycle counts, prover wall time, PPE composition cost, and verifier wall time for the built-in LEZ programs. Inputs for the fee model's `G_executor`, `G_prove`, `G_verify`, and `S_agg` parameters. + +## Machine + +| Field | Value | +|---|---| +| Chip | Apple M2 Pro (8P+4E) | +| RAM | 16 GB | +| OS | macOS 15.5 | +| Rust | 1.94.0 | +| Risc0 zkVM | 3.0.5 | +| Profile | release | +| GPU acceleration | none | + +## Executor cycles + +`SessionInfo::cycles()` per instruction. Deterministic across runs. Wall time is `best / mean ± stdev` over 5 timed iterations (1 warmup discarded). + +| Program | Instruction | user_cycles | segments | exec_ms (best / mean ± stdev) | +|---|---|---:|---:|---| +| authenticated_transfer | Initialize | 43,642 | 1 | 18.86 / 19.41 ± 0.48 | +| authenticated_transfer | Transfer | 77,095 | 1 | 19.67 / 20.84 ± 1.16 | +| token | Burn | 116,546 | 1 | 24.86 / 25.46 ± 0.63 | +| token | Mint | 116,862 | 1 | 24.47 / 25.08 ± 0.42 | +| token | Transfer | 127,726 | 1 | 25.00 / 25.40 ± 0.29 | +| clock | Tick (no rollups) | 137,022 | 1 | 21.18 / 21.57 ± 0.41 | +| ata | Create | 175,056 | 1 | 23.64 / 24.94 ± 1.09 | +| amm | SwapExactInput | 508,634 | 1 | 34.21 / 34.77 ± 0.55 | +| amm | AddLiquidity | 642,774 | 1 | 37.59 / 37.87 ± 0.28 | + +## Real proving (`--prove`) + +`prover.prove(env, elf)` wall time per program on CPU. `total_cycles` is `user_cycles` rounded up to the next power of two (Risc0 padding). + +| Program | Instruction | total_cycles | prove_ms | prove_s | +|---|---|---:|---:|---:| +| authenticated_transfer | Initialize | 131,072 | 11,881 | 11.9 | +| authenticated_transfer | Transfer | 131,072 | 13,705 | 13.7 | +| token | Burn | 262,144 | 22,893 | 22.9 | +| token | Mint | 262,144 | 23,927 | 23.9 | +| token | Transfer | 262,144 | 27,178 | 27.2 | +| clock | Tick | 262,144 | 23,486 | 23.5 | +| ata | Create | 262,144 | 21,093 | 21.1 | +| amm | AddLiquidity | 1,048,576 | 111,654 | 111.7 | +| amm | SwapExactInput | 1,048,576 | 126,400 | 126.4 | + +Linear fit across po2 buckets: ≈ 100 µs per total cycle (≈ 10k cycles/s throughput on this CPU). + +## PPE composition + chain-call sweep (`--ppe`) + +Same `auth_transfer Transfer` instruction, standalone vs wrapped in the privacy circuit; plus the `chain_caller` test program with N chained `authenticated_transfer` calls. `proof_bytes` is the borsh-serialized. InnerReceipt (S_agg in the fee model). + +| Case | prove_ms | prove_s | proof_bytes | +|---|---:|---:|---:| +| auth_transfer Transfer standalone | 13,705 | 13.7 | n/a | +| auth_transfer Transfer in PPE | 61,486 | 61.5 | 223,551 | +| chain_caller depth=1 | 122,590 | 122.6 | 223,551 | +| chain_caller depth=3 | 231,974 | 232.0 | 223,551 | +| chain_caller depth=5 | 372,123 | 372.1 | 223,551 | +| chain_caller depth=9 | 544,280 | 544.3 | 223,551 | + +Linear fit depth=1..9: ≈ 53 s per additional chained call, intercept ≈ 73 s. Composition tax (single program PPE − standalone): ≈ 48 s. `proof_bytes` is constant: the outer succinct proof has fixed size; the journal carried alongside it scales with public state and is reported separately by `--verify`. + +## Verifier (`--verify`) + +One PPE receipt generated once (auth_transfer Transfer in PPE), then `Receipt::verify(PRIVACY_PRESERVING_CIRCUIT_ID)` measured over 1000 iterations. + +| Field | Value | +|---|---| +| case | auth_transfer Transfer in PPE | +| proof_bytes (S_agg) | 223,551 | +| journal_bytes | 412 | +| verify_ms (best / mean ± stdev, n=1000) | 11.71 / 12.06 ± 1.99 | + +## Findings + +- Proving cost scales with po2-bucketed `total_cycles`, not raw `user_cycles`. Trimming user_cycles only helps if it crosses a 2^N boundary. +- Single-program PPE composition tax on M2 Pro CPU: ≈ 48 s (61.5 − 13.7). +- Chained-call cost is linear at ≈ 53 s per call. A max-depth chain (10) would take ≈ 600 s standalone on this CPU. +- `G_verify` is ≈ 12 ms and roughly constant per outer receipt (1000-iter stdev ≈ 2 ms). The succinct outer proof is fixed at 223,551 bytes (S_agg); verify is not on the latency critical path. + +## Reproduce + +```sh +cargo run --release -p cycle_bench +cargo run --release -p cycle_bench --features prove -- --prove +cargo run --release -p cycle_bench --features ppe -- --prove --ppe +cargo run --release -p cycle_bench --features ppe -- --verify --verify-iters 1000 +``` + +JSON output: `target/cycle_bench.json`. + +## Caveats + +- CPU-only proving on a dev laptop. Production prover hardware (GPU, specialised CPU pipelines) will produce much smaller numbers; relative ordering should be preserved. +- Single-segment cases only; multi-segment programs would pay continuation overhead not measured here. diff --git a/tools/cycle_bench/Cargo.toml b/tools/cycle_bench/Cargo.toml new file mode 100644 index 00000000..6847b0c5 --- /dev/null +++ b/tools/cycle_bench/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "cycle_bench" +version = "0.1.0" +edition = "2024" +license = { workspace = true } +publish = false + +[lints] +workspace = true + +[features] +default = [] +prove = ["nssa/prove", "risc0-zkvm/prove"] +ppe = ["prove"] + +[dependencies] +nssa = { workspace = true } +nssa_core = { workspace = true, features = ["host"] } +clock_core.workspace = true +token_core.workspace = true +amm_core.workspace = true +ata_core.workspace = true + +risc0-zkvm.workspace = true +borsh.workspace = true +serde.workspace = true +serde_json.workspace = true +anyhow.workspace = true +clap = { workspace = true } diff --git a/tools/cycle_bench/README.md b/tools/cycle_bench/README.md new file mode 100644 index 00000000..3b416dc8 --- /dev/null +++ b/tools/cycle_bench/README.md @@ -0,0 +1,28 @@ +# cycle_bench + +Per-program Risc0 cycle counts, prover wall time, PPE composition cost, and verifier wall time for the built-in LEZ programs. Feeds the fee model (`G_executor`, `G_prove`, `G_verify`, `S_agg`). + +## Run + +```sh +# Executor cycles only (fast, ~seconds) +cargo run --release -p cycle_bench + +# + real proving per program (slow, ~minutes) +cargo run --release -p cycle_bench --features prove -- --prove + +# + PPE composition cases (very slow, ~hour) +cargo run --release -p cycle_bench --features ppe -- --prove --ppe + +# + verifier microbench (G_verify): generates one PPE receipt, times verify x1000 +cargo run --release -p cycle_bench --features ppe -- --verify --verify-iters 1000 +``` + +`RISC0_DEV_MODE=1` skips proving entirely and is only useful for the executor path. Combine flags freely; output is printed to stdout and written to `target/cycle_bench.json` for regression diffs. + +## What you'll see + +- Per-program executor cycles and segments, plus exec wall time as `best / mean ± stdev (n=N)`. +- With `--prove`: prover total cycles, paging cycles, segments, and wall time. +- With `--ppe`: end-to-end `execute_and_prove` wall time and S_agg (the borsh-serialized InnerReceipt length) for one auth-transfer-in-PPE case and a chain-caller depth sweep. +- With `--verify`: verify wall time `best / mean ± stdev`, plus `proof_bytes` and `journal_bytes`. diff --git a/tools/cycle_bench/src/main.rs b/tools/cycle_bench/src/main.rs new file mode 100644 index 00000000..acdd9eff --- /dev/null +++ b/tools/cycle_bench/src/main.rs @@ -0,0 +1,632 @@ +//! Measures Risc0 user cycles per built-in program instruction. +//! +//! Runs each guest ELF through the Risc0 executor (no proving) with realistic inputs +//! drawn from the existing per-program unit tests, then prints a table and writes a +//! JSON dump for regression comparison. +//! +//! Run with `cargo run --release -p cycle_bench`. `RISC0_DEV_MODE` has no effect on +//! executor cycle counts. + +#![expect( + clippy::arithmetic_side_effects, + clippy::as_conversions, + clippy::cast_precision_loss, + clippy::float_arithmetic, + clippy::missing_const_for_fn, + clippy::non_ascii_literal, + clippy::print_literal, + clippy::print_stderr, + clippy::print_stdout, + clippy::ref_patterns, + reason = "Bench tool: matches test-style fixture code" +)] + +use std::{path::PathBuf, time::Instant}; + +use amm_core::{PoolDefinition, compute_liquidity_token_pda, compute_pool_pda, compute_vault_pda}; +use anyhow::Result; +use ata_core::{compute_ata_seed, get_associated_token_account_id}; +use clap::Parser; +use clock_core::{ + CLOCK_01_PROGRAM_ACCOUNT_ID, CLOCK_10_PROGRAM_ACCOUNT_ID, CLOCK_50_PROGRAM_ACCOUNT_ID, + ClockAccountData, +}; +use nssa::program_methods::{ + AMM_ELF, AMM_ID, ASSOCIATED_TOKEN_ACCOUNT_ELF, ASSOCIATED_TOKEN_ACCOUNT_ID, + AUTHENTICATED_TRANSFER_ELF, AUTHENTICATED_TRANSFER_ID, CLOCK_ELF, CLOCK_ID, TOKEN_ELF, + TOKEN_ID, +}; +use nssa_core::{ + Timestamp, + account::{Account, AccountId, AccountWithMetadata, Data}, + program::{InstructionData, ProgramId}, +}; +use risc0_zkvm::{ExecutorEnv, default_executor, default_prover}; +use serde::Serialize; +use stats::Stats; +use token_core::{TokenDefinition, TokenHolding}; + +mod ppe; +mod stats; + +#[derive(Parser, Debug)] +#[command(about = "Per-program executor and (optionally) prover cycle measurements")] +struct Cli { + /// Also run prover.prove for each case and report wall time + cycles. Slow. + #[arg(long)] + prove: bool, + + /// Also run privacy-preserving execution circuit (PPE) composition cases: + /// (a) single `auth_transfer` Transfer through `execute_and_prove`, (b) `chain_caller` + /// with depth N=1,3,5,9. Requires --features ppe at build time. Very slow. + #[arg(long)] + ppe: bool, + + /// After running --ppe-style proving once for auth_transfer-in-PPE, time + /// `receipt.verify(PRIVACY_PRESERVING_CIRCUIT_ID)` over many iterations. + /// Produces `G_verify` for the fee model. Requires --features ppe. + #[arg(long)] + verify: bool, + + /// Iterations for --verify. Default matches the fee-model handoff target. + #[arg(long, default_value_t = 1000)] + verify_iters: usize, + + /// Iterations for executor wall-time sampling per case. First iter is + /// discarded as warmup, remaining N feed the stats. + #[arg(long, default_value_t = 5)] + exec_iters: usize, +} + +#[derive(Debug, Serialize)] +struct BenchResult { + program: &'static str, + instruction: &'static str, + user_cycles: u64, + segments: usize, + exec_stats: Stats, + /// Stats over prover.prove(env, elf) wall-clock samples. Only populated when --prove is set. + /// Single-sample (n=1) when --prove is on without explicit repetition, since proving is slow. + prove_stats: Option, + /// Total cycles (with continuation overhead, paging, po2 padding) from ProveInfo.stats. + prove_total_cycles: Option, + /// User cycles from ProveInfo.stats (should match executor cycles). + prove_user_cycles: Option, + /// Paging cycles from ProveInfo.stats. + prove_paging_cycles: Option, + /// Segments from ProveInfo.stats. + prove_segments: Option, +} + +struct Case { + program: &'static str, + instruction_label: &'static str, + elf: &'static [u8], + self_program_id: ProgramId, + pre_states: Vec, + instruction_words: InstructionData, +} + +impl Case { + fn new( + program: &'static str, + instruction_label: &'static str, + elf: &'static [u8], + self_program_id: ProgramId, + pre_states: Vec, + instruction: &I, + ) -> Result { + Ok(Self { + program, + instruction_label, + elf, + self_program_id, + pre_states, + instruction_words: risc0_zkvm::serde::to_vec(instruction)?, + }) + } + + fn run(self, prove: bool, exec_iters: usize) -> Result { + let Self { + program, + instruction_label, + elf, + self_program_id, + pre_states, + instruction_words, + } = self; + let caller_program_id: Option = None; + + // One warmup pass discarded, then `exec_iters` samples. The executor has + // large per-call setup overhead (ELF parsing, env init); reporting both + // best-of-N and mean ± stdev shows whether jitter is significant. + let mut samples: Vec = Vec::with_capacity(exec_iters); + let mut last_info = None; + let total = exec_iters.saturating_add(1).max(2); + for iter in 0..total { + let mut env_builder = ExecutorEnv::builder(); + env_builder + .write(&self_program_id)? + .write(&caller_program_id)? + .write(&pre_states)? + .write(&instruction_words)?; + let env = env_builder.build()?; + + let started = Instant::now(); + let info = default_executor().execute(env, elf)?; + let elapsed_ms = started.elapsed().as_secs_f64() * 1_000.0; + + if iter > 0 { + samples.push(elapsed_ms); + } + last_info = Some(info); + } + let info = last_info.expect("at least one iteration"); + let exec_stats = Stats::from_samples(&samples); + + let mut prove_stats = None; + let mut prove_total_cycles = None; + let mut prove_user_cycles = None; + let mut prove_paging_cycles = None; + let mut prove_segments = None; + if prove { + let mut env_builder = ExecutorEnv::builder(); + env_builder + .write(&self_program_id)? + .write(&caller_program_id)? + .write(&pre_states)? + .write(&instruction_words)?; + let env = env_builder.build()?; + + let started = Instant::now(); + let prove_info = default_prover() + .prove(env, elf) + .map_err(|e| anyhow::anyhow!("prove failed: {e}"))?; + let prove_ms = started.elapsed().as_secs_f64() * 1_000.0; + prove_stats = Some(Stats::from_samples(&[prove_ms])); + prove_total_cycles = Some(prove_info.stats.total_cycles); + prove_user_cycles = Some(prove_info.stats.user_cycles); + prove_paging_cycles = Some(prove_info.stats.paging_cycles); + prove_segments = Some(prove_info.stats.segments); + eprintln!( + " prove({program}/{instruction_label}): {prove_ms:.1} ms ({:.1}s), total_cycles={}, segments={}", + prove_ms / 1_000.0, + prove_info.stats.total_cycles, + prove_info.stats.segments, + ); + } + + Ok(BenchResult { + program, + instruction: instruction_label, + user_cycles: info.cycles(), + segments: info.segments.len(), + exec_stats, + prove_stats, + prove_total_cycles, + prove_user_cycles, + prove_paging_cycles, + prove_segments, + }) + } +} + +fn authenticated_transfer_init() -> Vec { + vec![AccountWithMetadata { + account: Account::default(), + is_authorized: true, + account_id: AccountId::new([1; 32]), + }] +} + +fn authenticated_transfer_transfer() -> Vec { + let sender = AccountWithMetadata { + account: Account { + balance: 1_000_000, + ..Account::default() + }, + is_authorized: true, + account_id: AccountId::new([1; 32]), + }; + let recipient = AccountWithMetadata { + account: Account::default(), + is_authorized: false, + account_id: AccountId::new([2; 32]), + }; + vec![sender, recipient] +} + +fn token_holding( + definition_id: AccountId, + account_id: AccountId, + balance: u128, + is_authorized: bool, +) -> AccountWithMetadata { + AccountWithMetadata { + account: Account { + program_owner: TOKEN_ID, + balance: 0, + data: Data::from(&TokenHolding::Fungible { + definition_id, + balance, + }), + nonce: 0_u128.into(), + }, + is_authorized, + account_id, + } +} + +fn token_definition( + account_id: AccountId, + total_supply: u128, + is_authorized: bool, +) -> AccountWithMetadata { + AccountWithMetadata { + account: Account { + program_owner: TOKEN_ID, + balance: 0, + data: Data::from(&TokenDefinition::Fungible { + name: String::from("test"), + total_supply, + metadata_id: None, + }), + nonce: 0_u128.into(), + }, + is_authorized, + account_id, + } +} + +fn token_transfer_pre_states() -> Vec { + let def = AccountId::new([15; 32]); + let sender = token_holding(def, AccountId::new([17; 32]), 100_000, true); + let recipient = token_holding(def, AccountId::new([42; 32]), 50_000, true); + vec![sender, recipient] +} + +fn token_mint_pre_states() -> Vec { + let def_id = AccountId::new([15; 32]); + let def = token_definition(def_id, 100_000, true); + let holding = token_holding(def_id, AccountId::new([17; 32]), 1_000, true); + vec![def, holding] +} + +fn token_burn_pre_states() -> Vec { + let def_id = AccountId::new([15; 32]); + let def = token_definition(def_id, 100_000, true); + let holding = token_holding(def_id, AccountId::new([17; 32]), 1_000, true); + vec![def, holding] +} + +fn clock_account(account_id: AccountId, block_id: u64) -> AccountWithMetadata { + AccountWithMetadata { + account: Account { + program_owner: CLOCK_ID, + balance: 0, + data: ClockAccountData { + block_id, + timestamp: Timestamp::from(0_u64), + } + .to_bytes() + .try_into() + .expect("ClockAccountData should fit in account data"), + nonce: 0_u128.into(), + }, + is_authorized: false, + account_id, + } +} + +fn clock_pre_states_tick_at(block_id: u64) -> Vec { + vec![ + clock_account(CLOCK_01_PROGRAM_ACCOUNT_ID, block_id), + clock_account(CLOCK_10_PROGRAM_ACCOUNT_ID, block_id), + clock_account(CLOCK_50_PROGRAM_ACCOUNT_ID, block_id), + ] +} + +fn amm_token_a_def_id() -> AccountId { + AccountId::new([42; 32]) +} +fn amm_token_b_def_id() -> AccountId { + AccountId::new([43; 32]) +} +fn amm_pool_id() -> AccountId { + compute_pool_pda(AMM_ID, amm_token_a_def_id(), amm_token_b_def_id()) +} +fn amm_vault_a_id() -> AccountId { + compute_vault_pda(AMM_ID, amm_pool_id(), amm_token_a_def_id()) +} +fn amm_vault_b_id() -> AccountId { + compute_vault_pda(AMM_ID, amm_pool_id(), amm_token_b_def_id()) +} +fn amm_lp_def_id() -> AccountId { + compute_liquidity_token_pda(AMM_ID, amm_pool_id()) +} + +/// Pool seeded with reserves `1_000` / `500`, lp supply `sqrt(1000*500) = 707`. +fn amm_pool_account() -> AccountWithMetadata { + let reserve_a: u128 = 1_000; + let reserve_b: u128 = 500; + let lp_supply = (reserve_a * reserve_b).isqrt(); + AccountWithMetadata { + account: Account { + program_owner: AMM_ID, + balance: 0, + data: Data::from(&PoolDefinition { + definition_token_a_id: amm_token_a_def_id(), + definition_token_b_id: amm_token_b_def_id(), + vault_a_id: amm_vault_a_id(), + vault_b_id: amm_vault_b_id(), + liquidity_pool_id: amm_lp_def_id(), + liquidity_pool_supply: lp_supply, + reserve_a, + reserve_b, + fees: 0, + active: true, + }), + nonce: 0_u128.into(), + }, + is_authorized: true, + account_id: amm_pool_id(), + } +} + +fn amm_swap_pre_states() -> Vec { + let pool = amm_pool_account(); + let vault_a = token_holding(amm_token_a_def_id(), amm_vault_a_id(), 1_000, true); + let vault_b = token_holding(amm_token_b_def_id(), amm_vault_b_id(), 500, true); + let user_a = token_holding(amm_token_a_def_id(), AccountId::new([45; 32]), 1_000, true); + let user_b = token_holding(amm_token_b_def_id(), AccountId::new([46; 32]), 500, false); + vec![pool, vault_a, vault_b, user_a, user_b] +} + +fn amm_add_liquidity_pre_states() -> Vec { + let pool = amm_pool_account(); + let vault_a = token_holding(amm_token_a_def_id(), amm_vault_a_id(), 1_000, true); + let vault_b = token_holding(amm_token_b_def_id(), amm_vault_b_id(), 500, true); + let lp_supply = (1_000_u128 * 500_u128).isqrt(); + let lp_def = token_definition(amm_lp_def_id(), lp_supply, true); + let user_a = token_holding(amm_token_a_def_id(), AccountId::new([45; 32]), 1_000, true); + let user_b = token_holding(amm_token_b_def_id(), AccountId::new([46; 32]), 500, true); + let user_lp = token_holding(amm_lp_def_id(), AccountId::new([47; 32]), 0, true); + vec![pool, vault_a, vault_b, lp_def, user_a, user_b, user_lp] +} + +fn ata_create_pre_states() -> Vec { + let owner_id = AccountId::new([91; 32]); + let definition_id = AccountId::new([15; 32]); + let owner = AccountWithMetadata { + account: Account::default(), + is_authorized: true, + account_id: owner_id, + }; + let token_def = token_definition(definition_id, 100_000, false); + let seed = compute_ata_seed(owner_id, definition_id); + let ata_id = get_associated_token_account_id(&ASSOCIATED_TOKEN_ACCOUNT_ID, &seed); + let ata_account = AccountWithMetadata { + account: Account::default(), + is_authorized: false, + account_id: ata_id, + }; + vec![owner, token_def, ata_account] +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + let prove = cli.prove; + let exec_iters = cli.exec_iters.max(1); + if prove { + eprintln!("cycle_bench: prove mode ON, this will be slow (~minutes per program)"); + } + + let cases = [ + Case::new( + "authenticated_transfer", + "Transfer", + AUTHENTICATED_TRANSFER_ELF, + AUTHENTICATED_TRANSFER_ID, + authenticated_transfer_transfer(), + &5_000_u128, + )?, + Case::new( + "authenticated_transfer", + "Initialize", + AUTHENTICATED_TRANSFER_ELF, + AUTHENTICATED_TRANSFER_ID, + authenticated_transfer_init(), + &0_u128, + )?, + Case::new( + "token", + "Transfer", + TOKEN_ELF, + TOKEN_ID, + token_transfer_pre_states(), + &token_core::Instruction::Transfer { + amount_to_transfer: 5_000, + }, + )?, + Case::new( + "token", + "Mint", + TOKEN_ELF, + TOKEN_ID, + token_mint_pre_states(), + &token_core::Instruction::Mint { + amount_to_mint: 5_000, + }, + )?, + Case::new( + "token", + "Burn", + TOKEN_ELF, + TOKEN_ID, + token_burn_pre_states(), + &token_core::Instruction::Burn { + amount_to_burn: 500, + }, + )?, + Case::new( + "clock", + "Tick (block_id+1, no multiples)", + CLOCK_ELF, + CLOCK_ID, + clock_pre_states_tick_at(0), + &Timestamp::from(1_700_000_000_u64), + )?, + Case::new( + "amm", + "SwapExactInput", + AMM_ELF, + AMM_ID, + amm_swap_pre_states(), + &amm_core::Instruction::SwapExactInput { + swap_amount_in: 200, + min_amount_out: 1, + token_definition_id_in: amm_token_a_def_id(), + }, + )?, + Case::new( + "amm", + "AddLiquidity", + AMM_ELF, + AMM_ID, + amm_add_liquidity_pre_states(), + &amm_core::Instruction::AddLiquidity { + min_amount_liquidity: 1, + max_amount_to_add_token_a: 400, + max_amount_to_add_token_b: 200, + }, + )?, + Case::new( + "ata", + "Create", + ASSOCIATED_TOKEN_ACCOUNT_ELF, + ASSOCIATED_TOKEN_ACCOUNT_ID, + ata_create_pre_states(), + &ata_core::Instruction::Create { + ata_program_id: ASSOCIATED_TOKEN_ACCOUNT_ID, + }, + )?, + ]; + + let results: Vec = cases + .into_iter() + .map(|c| c.run(prove, exec_iters)) + .collect::>>()?; + + print_table(&results, prove); + + #[cfg(feature = "ppe")] + let ppe_results = if cli.ppe { ppe::run_all() } else { Vec::new() }; + #[cfg(not(feature = "ppe"))] + let ppe_results: Vec = { + if cli.ppe { + eprintln!("cycle_bench: --ppe requires --features ppe at build time. Ignoring."); + } + Vec::new() + }; + if !ppe_results.is_empty() { + ppe::print_table(&ppe_results); + } + + #[cfg(feature = "ppe")] + let verify_result = if cli.verify { + Some(ppe::run_verify(cli.verify_iters)?) + } else { + None + }; + #[cfg(not(feature = "ppe"))] + let verify_result: Option = { + if cli.verify { + eprintln!("cycle_bench: --verify requires --features ppe at build time. Ignoring."); + } + None + }; + if let Some(ref vr) = verify_result { + ppe::print_verify(vr); + } + + let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .canonicalize()?; + let out_path = workspace_root.join("target").join("cycle_bench.json"); + if let Some(parent) = out_path.parent() { + std::fs::create_dir_all(parent)?; + } + let combined = serde_json::json!({ + "standalone": results, + "ppe": ppe_results, + "verify": verify_result, + }); + std::fs::write(&out_path, serde_json::to_string_pretty(&combined)?)?; + println!("\nJSON written to {}", out_path.display()); + + Ok(()) +} + +fn print_table(results: &[BenchResult], prove: bool) { + let pw = results + .iter() + .map(|r| r.program.len()) + .max() + .unwrap_or(0) + .max("program".len()); + let iw = results + .iter() + .map(|r| r.instruction.len()) + .max() + .unwrap_or(0) + .max("instruction".len()); + let cw = 12_usize; + let sw = 8_usize; + let exec_w = results + .iter() + .map(|r| r.exec_stats.to_string().len()) + .max() + .unwrap_or(0) + .max("exec_ms (best / mean ± stdev)".len()); + + println!( + "{:cw$} {:>sw$} {:cw$} {:>sw$} {:pcw$} {:>pwallw$} {:>psw$}", + "program", "instruction", "prove_total_c", "prove_ms (s)", "prove_segs", + ); + println!("{}", "-".repeat(pw + iw + pcw + pwallw + psw + 8)); + for r in results { + let total = r + .prove_total_cycles + .map_or_else(|| "-".to_owned(), |c| c.to_string()); + let pms = r.prove_stats.map_or_else( + || "-".to_owned(), + |s| format!("{:.1} ({:.1}s)", s.best_ms, s.best_ms / 1_000.0), + ); + let psegs = r + .prove_segments + .map_or_else(|| "-".to_owned(), |s| s.to_string()); + println!( + "{:pcw$} {:>pwallw$} {:>psw$}", + r.program, r.instruction, total, pms, psegs, + ); + } + } +} diff --git a/tools/cycle_bench/src/ppe.rs b/tools/cycle_bench/src/ppe.rs new file mode 100644 index 00000000..e3bc4747 --- /dev/null +++ b/tools/cycle_bench/src/ppe.rs @@ -0,0 +1,122 @@ +//! Privacy-preserving execution (PPE) cases for `cycle_bench`. +//! +//! Composition cost is the delta between standalone `prover.prove(env, elf)` for +//! a single program (measured in the main bench) and a full `execute_and_prove` +//! that wraps the same program in the privacy circuit. Chained-call depth sweep +//! uses the `chain_caller` test program (loaded from artifacts/) with N=1, 3, 5, 9. +//! +//! `run_verify` produces `G_verify` for the fee model: it generates one PPE +//! receipt (`auth_transfer` Transfer in PPE) and times `Receipt::verify` over +//! `iters` iterations. The proof bytes captured here are also the on-wire +//! "outer proof" payload (`S_agg` in the fee model). + +#![allow( + dead_code, + reason = "Stubs are used when the `ppe` feature is disabled." +)] + +use anyhow::Result; +use serde::Serialize; + +use crate::stats::Stats; + +#[cfg(feature = "ppe")] +mod ppe_impl; + +#[derive(Debug, Serialize, Clone)] +pub struct PpeBenchResult { + pub label: String, + pub chain_depth: usize, + pub prove_wall_ms: Option, + /// borsh-serialized `InnerReceipt` length (`S_agg` in the fee model). + pub proof_bytes: Option, + pub error: Option, +} + +#[derive(Debug, Serialize, Clone)] +pub struct VerifyBenchResult { + pub label: String, + pub stats: Stats, + pub proof_bytes: usize, + pub journal_bytes: usize, +} + +#[cfg(not(feature = "ppe"))] +pub fn run_all() -> Vec { + Vec::new() +} + +#[cfg(feature = "ppe")] +pub fn run_all() -> Vec { + let mut results = Vec::new(); + + eprintln!("PPE: running composition cost (auth_transfer Transfer in PPE)"); + results.push(ppe_impl::run_auth_transfer_in_ppe()); + + for depth in [1_u32, 3, 5, 9] { + eprintln!("PPE: running chain_caller depth={depth}"); + results.push(ppe_impl::run_chain_caller(depth)); + } + + results +} + +#[cfg(not(feature = "ppe"))] +pub fn run_verify(_iters: usize) -> Result { + anyhow::bail!("--verify requires --features ppe at build time") +} + +#[cfg(feature = "ppe")] +pub fn run_verify(iters: usize) -> Result { + ppe_impl::run_verify(iters) +} + +pub fn print_table(results: &[PpeBenchResult]) { + let lw = results + .iter() + .map(|r| r.label.len()) + .max() + .unwrap_or(0) + .max("label".len()); + + println!( + "\n{:5} {:>20} {:>12} {}", + "label", + "depth", + "prove_ms (s)", + "proof_bytes", + "error", + lw = lw, + ); + println!("{}", "-".repeat(lw + 60)); + for r in results { + let p = r.prove_wall_ms.map_or_else( + || "-".to_owned(), + |v| format!("{v:.1} ({:.1}s)", v / 1_000.0), + ); + let b = r + .proof_bytes + .map_or_else(|| "-".to_owned(), |n| n.to_string()); + let e = r.error.as_deref().unwrap_or(""); + println!( + "{:5} {:>20} {:>12} {}", + r.label, + r.chain_depth, + p, + b, + e, + lw = lw, + ); + } +} + +pub fn print_verify(r: &VerifyBenchResult) { + println!("\nVerify (G_verify):"); + println!(" case : {}", r.label); + println!( + " proof_bytes : {} (borsh InnerReceipt, S_agg)", + r.proof_bytes + ); + println!(" journal_bytes : {}", r.journal_bytes); + println!(" verify_ms : {}", r.stats); +} diff --git a/tools/cycle_bench/src/ppe/ppe_impl.rs b/tools/cycle_bench/src/ppe/ppe_impl.rs new file mode 100644 index 00000000..c20db9ac --- /dev/null +++ b/tools/cycle_bench/src/ppe/ppe_impl.rs @@ -0,0 +1,194 @@ +//! Feature-gated implementation of PPE composition and verify benches. + +use std::{collections::HashMap, time::Instant}; + +use nssa::{ + execute_and_prove, + privacy_preserving_transaction::circuit::{ProgramWithDependencies, Proof}, + program::Program, + program_methods::PRIVACY_PRESERVING_CIRCUIT_ID, +}; +use nssa_core::{ + InputAccountIdentity, PrivacyPreservingCircuitOutput, + account::{Account, AccountId, AccountWithMetadata}, + program::ProgramId, +}; +use risc0_zkvm::{InnerReceipt, Receipt, serde::to_vec}; + +use super::{PpeBenchResult, VerifyBenchResult}; +use crate::stats::Stats; + +const AUTH_TRANSFER_ID: ProgramId = nssa::program_methods::AUTHENTICATED_TRANSFER_ID; +const AUTH_TRANSFER_ELF: &[u8] = nssa::program_methods::AUTHENTICATED_TRANSFER_ELF; + +/// `chain_caller` bytecode shipped at `artifacts/test_program_methods/chain_caller.bin`. +/// Loaded at compile time so we don't need a dev-dependency on `test_program_methods`. +const CHAIN_CALLER_ELF: &[u8] = + include_bytes!("../../../../artifacts/test_program_methods/chain_caller.bin"); + +pub fn run_auth_transfer_in_ppe() -> PpeBenchResult { + let label = "auth_transfer Transfer in PPE".to_owned(); + let started = Instant::now(); + match prove_auth_transfer_in_ppe() { + Ok((_out, proof)) => { + let prove_ms = started.elapsed().as_secs_f64() * 1_000.0; + PpeBenchResult { + label, + chain_depth: 0, + prove_wall_ms: Some(prove_ms), + proof_bytes: Some(proof.into_inner().len()), + error: None, + } + } + Err(err) => PpeBenchResult { + label, + chain_depth: 0, + prove_wall_ms: None, + proof_bytes: None, + error: Some(err.to_string()), + }, + } +} + +fn prove_auth_transfer_in_ppe() -> anyhow::Result<(PrivacyPreservingCircuitOutput, Proof)> { + let program = Program::new(AUTH_TRANSFER_ELF.to_vec())?; + let pwd = ProgramWithDependencies::from(program); + + // For PPE to allow the sender's balance to be decremented by this + // program, the sender must already be claimed by auth_transfer. + // Recipient stays default-owned so the first call can claim it. + let sender = AccountWithMetadata { + account: Account { + program_owner: AUTH_TRANSFER_ID, + balance: 1_000_000, + ..Account::default() + }, + is_authorized: true, + account_id: AccountId::new([1; 32]), + }; + let recipient = AccountWithMetadata { + account: Account::default(), + is_authorized: true, + account_id: AccountId::new([2; 32]), + }; + let pre_states = vec![sender, recipient]; + + let balance_to_move: u128 = 5_000; + let instruction_data = to_vec(&balance_to_move)?; + + let account_identities = vec![InputAccountIdentity::Public; pre_states.len()]; + + Ok(execute_and_prove( + pre_states, + instruction_data, + account_identities, + &pwd, + )?) +} + +pub fn run_chain_caller(depth: u32) -> PpeBenchResult { + let label = format!("chain_caller depth={depth}"); + let started = Instant::now(); + match prove_chain_caller(depth) { + Ok((_out, proof)) => { + let prove_ms = started.elapsed().as_secs_f64() * 1_000.0; + PpeBenchResult { + label, + chain_depth: depth as usize, + prove_wall_ms: Some(prove_ms), + proof_bytes: Some(proof.into_inner().len()), + error: None, + } + } + Err(err) => PpeBenchResult { + label, + chain_depth: depth as usize, + prove_wall_ms: None, + proof_bytes: None, + error: Some(err.to_string()), + }, + } +} + +fn prove_chain_caller( + num_chain_calls: u32, +) -> anyhow::Result<(PrivacyPreservingCircuitOutput, Proof)> { + let chain_caller = Program::new(CHAIN_CALLER_ELF.to_vec())?; + let auth_transfer = Program::new(AUTH_TRANSFER_ELF.to_vec())?; + let mut deps = HashMap::new(); + deps.insert(AUTH_TRANSFER_ID, auth_transfer); + let pwd = ProgramWithDependencies::new(chain_caller, deps); + + // Both accounts pre-claimed by auth_transfer. chain_caller doesn't + // track recipient's post-claim program_owner, so a default recipient + // would cause a state mismatch on subsequent chained calls. + let recipient_pre = AccountWithMetadata { + account: Account { + program_owner: AUTH_TRANSFER_ID, + ..Account::default() + }, + is_authorized: true, + account_id: AccountId::new([2; 32]), + }; + let sender_pre = AccountWithMetadata { + account: Account { + program_owner: AUTH_TRANSFER_ID, + balance: 1_000_000, + ..Account::default() + }, + is_authorized: true, + account_id: AccountId::new([1; 32]), + }; + // chain_caller expects pre_states = [recipient, sender]. + let pre_states = vec![recipient_pre, sender_pre]; + + let balance: u128 = 1; + let pda_seed: Option = None; + let instruction = (balance, AUTH_TRANSFER_ID, num_chain_calls, pda_seed); + let instruction_data = to_vec(&instruction)?; + + let account_identities = vec![InputAccountIdentity::Public; pre_states.len()]; + + Ok(execute_and_prove( + pre_states, + instruction_data, + account_identities, + &pwd, + )?) +} + +pub fn run_verify(iters: usize) -> anyhow::Result { + eprintln!("verify: generating PPE receipt for auth_transfer Transfer (~1 prove)"); + let (output, proof) = prove_auth_transfer_in_ppe()?; + let journal = output.to_bytes(); + let journal_bytes = journal.len(); + let proof_bytes_vec = proof.into_inner(); + let proof_bytes = proof_bytes_vec.len(); + + let inner: InnerReceipt = borsh::from_slice(&proof_bytes_vec) + .map_err(|e| anyhow::anyhow!("InnerReceipt deserialize: {e}"))?; + let receipt = Receipt::new(inner, journal); + + // Sanity-check before the timing loop so we don't measure 1000 failures. + receipt + .verify(PRIVACY_PRESERVING_CIRCUIT_ID) + .map_err(|e| anyhow::anyhow!("verify sanity check failed: {e}"))?; + + eprintln!("verify: timing {iters} iters of receipt.verify(...)"); + let mut samples = Vec::with_capacity(iters); + for _ in 0..iters { + let started = Instant::now(); + receipt + .verify(PRIVACY_PRESERVING_CIRCUIT_ID) + .map_err(|e| anyhow::anyhow!("verify failed mid-loop: {e}"))?; + samples.push(started.elapsed().as_secs_f64() * 1_000.0); + } + let stats = Stats::from_samples(&samples); + + Ok(VerifyBenchResult { + label: "auth_transfer Transfer in PPE".to_owned(), + stats, + proof_bytes, + journal_bytes, + }) +} diff --git a/tools/cycle_bench/src/stats.rs b/tools/cycle_bench/src/stats.rs new file mode 100644 index 00000000..7f75fd6f --- /dev/null +++ b/tools/cycle_bench/src/stats.rs @@ -0,0 +1,64 @@ +//! Small helper for best / mean / stdev over wall-time samples. +//! +//! We report both best-of-N (the figure that strips OS noise and matches what most +//! bench READMEs print) and mean +/- stdev (the figure the fee model wants, since +//! it cares about the steady-state cost not a single fastest sample). + +use std::fmt; + +use serde::Serialize; + +#[derive(Debug, Serialize, Clone, Copy, Default)] +pub struct Stats { + /// Number of samples in the aggregate (excluding warmup). + pub n: usize, + /// Lowest sample (ms). Strips OS jitter; matches the bench README "best of N" figure. + pub best_ms: f64, + /// Arithmetic mean of samples (ms). + pub mean_ms: f64, + /// Sample standard deviation of samples (ms), computed with Bessel's correction (n-1). + /// 0.0 when n < 2. + pub stdev_ms: f64, +} + +impl Stats { + pub fn from_samples(samples: &[f64]) -> Self { + let n = samples.len(); + if n == 0 { + return Self::default(); + } + let best_ms = samples.iter().copied().fold(f64::INFINITY, f64::min); + let sum: f64 = samples.iter().sum(); + let mean_ms = sum / n as f64; + let stdev_ms = if n > 1 { + let var: f64 = samples + .iter() + .map(|s| { + let d = s - mean_ms; + d * d + }) + .sum::() + / (n - 1) as f64; + var.sqrt() + } else { + 0.0 + }; + Self { + n, + best_ms, + mean_ms, + stdev_ms, + } + } +} + +/// `best / mean ± stdev (n=N)` for table display. +impl fmt::Display for Stats { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{:.2} / {:.2} ± {:.2} (n={})", + self.best_ms, self.mean_ms, self.stdev_ms, self.n, + ) + } +}