mirror of
https://github.com/logos-blockchain/logos-execution-zone.git
synced 2026-06-27 17:39:27 +00:00
811 lines
28 KiB
Rust
811 lines
28 KiB
Rust
//! Measures Risc0 user cycles per built-in program instruction.
|
|
//!
|
|
//! Runs each guest ELF through the Risc0 executor (no proving) with realistic inputs
|
|
//! drawn from the existing per-program unit tests, then prints a table and writes a
|
|
//! JSON dump for regression comparison.
|
|
//!
|
|
//! Run with `cargo run --release -p cycle_bench`. `RISC0_DEV_MODE` has no effect on
|
|
//! executor cycle counts.
|
|
|
|
#![expect(
|
|
clippy::arithmetic_side_effects,
|
|
clippy::as_conversions,
|
|
clippy::cast_precision_loss,
|
|
clippy::float_arithmetic,
|
|
clippy::missing_const_for_fn,
|
|
clippy::non_ascii_literal,
|
|
clippy::print_stderr,
|
|
clippy::print_stdout,
|
|
clippy::suboptimal_flops,
|
|
reason = "Bench tool: matches test-style fixture code"
|
|
)]
|
|
|
|
use std::{path::PathBuf, time::Instant};
|
|
|
|
use amm_core::{PoolDefinition, compute_liquidity_token_pda, compute_pool_pda, compute_vault_pda};
|
|
use anyhow::Result;
|
|
use ata_core::{compute_ata_seed, get_associated_token_account_id};
|
|
use clap::Parser;
|
|
use clock_core::{
|
|
CLOCK_01_PROGRAM_ACCOUNT_ID, CLOCK_10_PROGRAM_ACCOUNT_ID, CLOCK_50_PROGRAM_ACCOUNT_ID,
|
|
ClockAccountData,
|
|
};
|
|
use cycle_bench::{ppe, stats::Stats};
|
|
use lee::program_methods::{
|
|
AMM_ELF, AMM_ID, ASSOCIATED_TOKEN_ACCOUNT_ELF, ASSOCIATED_TOKEN_ACCOUNT_ID,
|
|
AUTHENTICATED_TRANSFER_ELF, AUTHENTICATED_TRANSFER_ID, CLOCK_ELF, CLOCK_ID, TOKEN_ELF,
|
|
TOKEN_ID,
|
|
};
|
|
use lee_core::{
|
|
Timestamp,
|
|
account::{Account, AccountId, AccountWithMetadata, Data},
|
|
program::{InstructionData, ProgramId},
|
|
};
|
|
use risc0_zkvm::{ExecutorEnv, default_executor, default_prover};
|
|
use serde::Serialize;
|
|
use token_core::{TokenDefinition, TokenHolding};
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[command(about = "Per-program executor and (optionally) prover cycle measurements")]
|
|
struct Cli {
|
|
/// Also run prover.prove for each case and report wall time + cycles. Slow.
|
|
#[arg(long)]
|
|
prove: bool,
|
|
|
|
/// Also run privacy-preserving execution circuit (PPE) composition cases:
|
|
/// (a) single `auth_transfer` Transfer through `execute_and_prove`, (b) `chain_caller`
|
|
/// with depth N=1,3,5,9. Requires --features ppe at build time. Very slow.
|
|
#[arg(long)]
|
|
ppe: bool,
|
|
|
|
/// Iterations for executor wall-time sampling per case. First iter is
|
|
/// discarded as warmup, remaining N feed the stats.
|
|
#[arg(long, default_value_t = 5)]
|
|
exec_iters: usize,
|
|
}
|
|
|
|
#[derive(Debug, Serialize)]
|
|
struct BenchResult {
|
|
program: &'static str,
|
|
instruction: &'static str,
|
|
user_cycles: u64,
|
|
segments: usize,
|
|
exec_stats: Stats,
|
|
/// Compute-only execution time (ms): best-of-N executor wall-time minus the calibrated
|
|
/// host-side fixed per-call overhead. Filled after the calibration fit over all cases.
|
|
net_compute_ms: Option<f64>,
|
|
/// Deterministic model prediction of compute time (ms): `user_cycles * slope` from the
|
|
/// calibration fit. Pure function of the deterministic cycle count and the pinned-hardware
|
|
/// throughput, so it reproduces across re-runs where raw wall-time does not.
|
|
calibrated_ms: Option<f64>,
|
|
/// Stats over prover.prove(env, elf) wall-clock samples. Only populated when --prove is set.
|
|
/// Single-sample (n=1) when --prove is on without explicit repetition, since proving is slow.
|
|
prove_stats: Option<Stats>,
|
|
/// Total cycles (with continuation overhead, paging, po2 padding) from ProveInfo.stats.
|
|
prove_total_cycles: Option<u64>,
|
|
/// User cycles from ProveInfo.stats (should match executor cycles).
|
|
prove_user_cycles: Option<u64>,
|
|
/// Paging cycles from ProveInfo.stats.
|
|
prove_paging_cycles: Option<u64>,
|
|
/// Segments from ProveInfo.stats.
|
|
prove_segments: Option<usize>,
|
|
}
|
|
|
|
/// Linear calibration of executor wall-time against deterministic user cycles,
|
|
/// fitted across all standalone cases as `best_ms = intercept_ms + slope_ms_per_cycle *
|
|
/// user_cycles`.
|
|
///
|
|
/// The intercept is the host-side fixed per-call cost (ELF parse, `ExecutorEnv` build) that is
|
|
/// outside the cycle count and does not scale with the instruction's work. The slope is the
|
|
/// per-cycle execution rate on the pinned box; its reciprocal is the throughput the tokenomics
|
|
/// fee model denominates public execution in, and is the public-side counterpart to the flat
|
|
/// `G_verify` verify cost. The intercept is an ELF-size-averaged constant, so `net_compute_ms`
|
|
/// is a first-order decomposition, not a mechanistic per-program overhead.
|
|
#[derive(Debug, Serialize, Clone, Copy)]
|
|
struct Calibration {
|
|
/// Cases the fit was computed over.
|
|
n: usize,
|
|
/// Slope: milliseconds of executor wall-time per user cycle.
|
|
slope_ms_per_cycle: f64,
|
|
/// Intercept: host-side fixed per-call overhead in milliseconds.
|
|
intercept_ms: f64,
|
|
/// Reciprocal of the slope: cycles executed per millisecond on the pinned box.
|
|
throughput_cycles_per_ms: f64,
|
|
/// Coefficient of determination of the fit (1.0 = perfect linear fit).
|
|
r2: f64,
|
|
}
|
|
|
|
impl Calibration {
|
|
/// Ordinary least squares of `best_ms` (y) on `user_cycles` (x) across `results`.
|
|
/// The fit uses best-of-N rather than the mean so a single OS scheduling spike in one
|
|
/// case cannot tilt the slope; best-of-N is the per-case noise floor and reproduces
|
|
/// run-to-run, which is what a pinned-hardware throughput constant needs.
|
|
/// Returns `None` when there are fewer than two distinct cycle counts to fit a line.
|
|
fn fit(results: &[BenchResult]) -> Option<Self> {
|
|
let n = results.len();
|
|
if n < 2 {
|
|
return None;
|
|
}
|
|
let xs: Vec<f64> = results.iter().map(|r| r.user_cycles as f64).collect();
|
|
let ys: Vec<f64> = results.iter().map(|r| r.exec_stats.best_ms).collect();
|
|
let nf = n as f64;
|
|
let sum_x: f64 = xs.iter().sum();
|
|
let sum_y: f64 = ys.iter().sum();
|
|
let sum_xy: f64 = xs.iter().zip(&ys).map(|(x, y)| x * y).sum();
|
|
let sum_xx: f64 = xs.iter().map(|x| x * x).sum();
|
|
let denom = nf * sum_xx - sum_x.powi(2);
|
|
if denom.abs() < f64::EPSILON {
|
|
return None;
|
|
}
|
|
let slope = (nf * sum_xy - sum_x * sum_y) / denom;
|
|
let intercept = (sum_y - slope * sum_x) / nf;
|
|
let mean_y = sum_y / nf;
|
|
let ss_tot: f64 = ys.iter().map(|y| (y - mean_y).powi(2)).sum();
|
|
let ss_res: f64 = xs
|
|
.iter()
|
|
.zip(&ys)
|
|
.map(|(x, y)| (y - (intercept + slope * x)).powi(2))
|
|
.sum();
|
|
// ss_tot ≈ 0 means every best_ms is identical; the ratio is 0/0. We report 1.0 (a flat
|
|
// line fits a flat cloud exactly). This is a degenerate guard, not a real-data path: the
|
|
// bench cases span a wide cycle range, so ss_tot is large in practice.
|
|
let r2 = if ss_tot.abs() < f64::EPSILON {
|
|
1.0
|
|
} else {
|
|
1.0 - ss_res / ss_tot
|
|
};
|
|
let throughput_cycles_per_ms = if slope.abs() < f64::EPSILON {
|
|
0.0
|
|
} else {
|
|
1.0 / slope
|
|
};
|
|
Some(Self {
|
|
n,
|
|
slope_ms_per_cycle: slope,
|
|
intercept_ms: intercept,
|
|
throughput_cycles_per_ms,
|
|
r2,
|
|
})
|
|
}
|
|
|
|
/// Compute-time prediction for a cycle count: `slope * user_cycles` (overhead excluded).
|
|
fn calibrated_ms(&self, user_cycles: u64) -> f64 {
|
|
self.slope_ms_per_cycle * user_cycles as f64
|
|
}
|
|
}
|
|
|
|
struct Case {
|
|
program: &'static str,
|
|
instruction_label: &'static str,
|
|
elf: &'static [u8],
|
|
self_program_id: ProgramId,
|
|
pre_states: Vec<AccountWithMetadata>,
|
|
instruction_words: InstructionData,
|
|
}
|
|
|
|
impl Case {
|
|
fn new<I: Serialize>(
|
|
program: &'static str,
|
|
instruction_label: &'static str,
|
|
elf: &'static [u8],
|
|
self_program_id: ProgramId,
|
|
pre_states: Vec<AccountWithMetadata>,
|
|
instruction: &I,
|
|
) -> Result<Self> {
|
|
Ok(Self {
|
|
program,
|
|
instruction_label,
|
|
elf,
|
|
self_program_id,
|
|
pre_states,
|
|
instruction_words: risc0_zkvm::serde::to_vec(instruction)?,
|
|
})
|
|
}
|
|
|
|
fn run(self, prove: bool, exec_iters: usize) -> Result<BenchResult> {
|
|
let Self {
|
|
program,
|
|
instruction_label,
|
|
elf,
|
|
self_program_id,
|
|
pre_states,
|
|
instruction_words,
|
|
} = self;
|
|
let caller_program_id: Option<ProgramId> = None;
|
|
|
|
// One warmup pass discarded, then `exec_iters` samples. The executor has
|
|
// large per-call setup overhead (ELF parsing, env init); reporting both
|
|
// best-of-N and mean ± stdev shows whether jitter is significant.
|
|
let mut samples: Vec<f64> = Vec::with_capacity(exec_iters);
|
|
let mut last_info = None;
|
|
let total = exec_iters.saturating_add(1).max(2);
|
|
for iter in 0..total {
|
|
let mut env_builder = ExecutorEnv::builder();
|
|
env_builder
|
|
.write(&self_program_id)?
|
|
.write(&caller_program_id)?
|
|
.write(&pre_states)?
|
|
.write(&instruction_words)?;
|
|
let env = env_builder.build()?;
|
|
|
|
let started = Instant::now();
|
|
let info = default_executor().execute(env, elf)?;
|
|
let elapsed_ms = started.elapsed().as_secs_f64() * 1_000.0;
|
|
|
|
if iter > 0 {
|
|
samples.push(elapsed_ms);
|
|
}
|
|
last_info = Some(info);
|
|
}
|
|
let info = last_info.expect("at least one iteration");
|
|
let exec_stats = Stats::from_samples(&samples);
|
|
|
|
let mut prove_stats = None;
|
|
let mut prove_total_cycles = None;
|
|
let mut prove_user_cycles = None;
|
|
let mut prove_paging_cycles = None;
|
|
let mut prove_segments = None;
|
|
if prove {
|
|
let mut env_builder = ExecutorEnv::builder();
|
|
env_builder
|
|
.write(&self_program_id)?
|
|
.write(&caller_program_id)?
|
|
.write(&pre_states)?
|
|
.write(&instruction_words)?;
|
|
let env = env_builder.build()?;
|
|
|
|
let started = Instant::now();
|
|
let prove_info = default_prover()
|
|
.prove(env, elf)
|
|
.map_err(|e| anyhow::anyhow!("prove failed: {e}"))?;
|
|
let prove_ms = started.elapsed().as_secs_f64() * 1_000.0;
|
|
prove_stats = Some(Stats::from_samples(&[prove_ms]));
|
|
prove_total_cycles = Some(prove_info.stats.total_cycles);
|
|
prove_user_cycles = Some(prove_info.stats.user_cycles);
|
|
prove_paging_cycles = Some(prove_info.stats.paging_cycles);
|
|
prove_segments = Some(prove_info.stats.segments);
|
|
eprintln!(
|
|
" prove({program}/{instruction_label}): {prove_ms:.1} ms ({:.1}s), total_cycles={}, segments={}",
|
|
prove_ms / 1_000.0,
|
|
prove_info.stats.total_cycles,
|
|
prove_info.stats.segments,
|
|
);
|
|
}
|
|
|
|
Ok(BenchResult {
|
|
program,
|
|
instruction: instruction_label,
|
|
user_cycles: info.cycles(),
|
|
segments: info.segments.len(),
|
|
exec_stats,
|
|
net_compute_ms: None,
|
|
calibrated_ms: None,
|
|
prove_stats,
|
|
prove_total_cycles,
|
|
prove_user_cycles,
|
|
prove_paging_cycles,
|
|
prove_segments,
|
|
})
|
|
}
|
|
}
|
|
|
|
fn authenticated_transfer_init() -> Vec<AccountWithMetadata> {
|
|
vec![AccountWithMetadata {
|
|
account: Account::default(),
|
|
is_authorized: true,
|
|
account_id: AccountId::new([1; 32]),
|
|
}]
|
|
}
|
|
|
|
fn authenticated_transfer_transfer() -> Vec<AccountWithMetadata> {
|
|
let sender = AccountWithMetadata {
|
|
account: Account {
|
|
balance: 1_000_000,
|
|
..Account::default()
|
|
},
|
|
is_authorized: true,
|
|
account_id: AccountId::new([1; 32]),
|
|
};
|
|
let recipient = AccountWithMetadata {
|
|
account: Account::default(),
|
|
is_authorized: false,
|
|
account_id: AccountId::new([2; 32]),
|
|
};
|
|
vec![sender, recipient]
|
|
}
|
|
|
|
fn token_holding(
|
|
definition_id: AccountId,
|
|
account_id: AccountId,
|
|
balance: u128,
|
|
is_authorized: bool,
|
|
) -> AccountWithMetadata {
|
|
AccountWithMetadata {
|
|
account: Account {
|
|
program_owner: TOKEN_ID,
|
|
balance: 0,
|
|
data: Data::from(&TokenHolding::Fungible {
|
|
definition_id,
|
|
balance,
|
|
}),
|
|
nonce: 0_u128.into(),
|
|
},
|
|
is_authorized,
|
|
account_id,
|
|
}
|
|
}
|
|
|
|
fn token_definition(
|
|
account_id: AccountId,
|
|
total_supply: u128,
|
|
is_authorized: bool,
|
|
) -> AccountWithMetadata {
|
|
AccountWithMetadata {
|
|
account: Account {
|
|
program_owner: TOKEN_ID,
|
|
balance: 0,
|
|
data: Data::from(&TokenDefinition::Fungible {
|
|
name: String::from("test"),
|
|
total_supply,
|
|
metadata_id: None,
|
|
}),
|
|
nonce: 0_u128.into(),
|
|
},
|
|
is_authorized,
|
|
account_id,
|
|
}
|
|
}
|
|
|
|
fn token_transfer_pre_states() -> Vec<AccountWithMetadata> {
|
|
let def = AccountId::new([15; 32]);
|
|
let sender = token_holding(def, AccountId::new([17; 32]), 100_000, true);
|
|
let recipient = token_holding(def, AccountId::new([42; 32]), 50_000, true);
|
|
vec![sender, recipient]
|
|
}
|
|
|
|
fn token_mint_pre_states() -> Vec<AccountWithMetadata> {
|
|
let def_id = AccountId::new([15; 32]);
|
|
let def = token_definition(def_id, 100_000, true);
|
|
let holding = token_holding(def_id, AccountId::new([17; 32]), 1_000, true);
|
|
vec![def, holding]
|
|
}
|
|
|
|
fn token_burn_pre_states() -> Vec<AccountWithMetadata> {
|
|
let def_id = AccountId::new([15; 32]);
|
|
let def = token_definition(def_id, 100_000, true);
|
|
let holding = token_holding(def_id, AccountId::new([17; 32]), 1_000, true);
|
|
vec![def, holding]
|
|
}
|
|
|
|
fn clock_account(account_id: AccountId, block_id: u64) -> AccountWithMetadata {
|
|
AccountWithMetadata {
|
|
account: Account {
|
|
program_owner: CLOCK_ID,
|
|
balance: 0,
|
|
data: ClockAccountData {
|
|
block_id,
|
|
timestamp: Timestamp::from(0_u64),
|
|
}
|
|
.to_bytes()
|
|
.try_into()
|
|
.expect("ClockAccountData should fit in account data"),
|
|
nonce: 0_u128.into(),
|
|
},
|
|
is_authorized: false,
|
|
account_id,
|
|
}
|
|
}
|
|
|
|
fn clock_pre_states_tick_at(block_id: u64) -> Vec<AccountWithMetadata> {
|
|
vec![
|
|
clock_account(CLOCK_01_PROGRAM_ACCOUNT_ID, block_id),
|
|
clock_account(CLOCK_10_PROGRAM_ACCOUNT_ID, block_id),
|
|
clock_account(CLOCK_50_PROGRAM_ACCOUNT_ID, block_id),
|
|
]
|
|
}
|
|
|
|
fn amm_token_a_def_id() -> AccountId {
|
|
AccountId::new([42; 32])
|
|
}
|
|
fn amm_token_b_def_id() -> AccountId {
|
|
AccountId::new([43; 32])
|
|
}
|
|
fn amm_pool_id() -> AccountId {
|
|
compute_pool_pda(AMM_ID, amm_token_a_def_id(), amm_token_b_def_id())
|
|
}
|
|
fn amm_vault_a_id() -> AccountId {
|
|
compute_vault_pda(AMM_ID, amm_pool_id(), amm_token_a_def_id())
|
|
}
|
|
fn amm_vault_b_id() -> AccountId {
|
|
compute_vault_pda(AMM_ID, amm_pool_id(), amm_token_b_def_id())
|
|
}
|
|
fn amm_lp_def_id() -> AccountId {
|
|
compute_liquidity_token_pda(AMM_ID, amm_pool_id())
|
|
}
|
|
|
|
/// Pool seeded with reserves `1_000` / `500`, lp supply `sqrt(1000*500) = 707`.
|
|
fn amm_pool_account() -> AccountWithMetadata {
|
|
let reserve_a: u128 = 1_000;
|
|
let reserve_b: u128 = 500;
|
|
let lp_supply = (reserve_a * reserve_b).isqrt();
|
|
AccountWithMetadata {
|
|
account: Account {
|
|
program_owner: AMM_ID,
|
|
balance: 0,
|
|
data: Data::from(&PoolDefinition {
|
|
definition_token_a_id: amm_token_a_def_id(),
|
|
definition_token_b_id: amm_token_b_def_id(),
|
|
vault_a_id: amm_vault_a_id(),
|
|
vault_b_id: amm_vault_b_id(),
|
|
liquidity_pool_id: amm_lp_def_id(),
|
|
liquidity_pool_supply: lp_supply,
|
|
reserve_a,
|
|
reserve_b,
|
|
fees: 0,
|
|
active: true,
|
|
}),
|
|
nonce: 0_u128.into(),
|
|
},
|
|
is_authorized: true,
|
|
account_id: amm_pool_id(),
|
|
}
|
|
}
|
|
|
|
fn amm_swap_pre_states() -> Vec<AccountWithMetadata> {
|
|
let pool = amm_pool_account();
|
|
let vault_a = token_holding(amm_token_a_def_id(), amm_vault_a_id(), 1_000, true);
|
|
let vault_b = token_holding(amm_token_b_def_id(), amm_vault_b_id(), 500, true);
|
|
let user_a = token_holding(amm_token_a_def_id(), AccountId::new([45; 32]), 1_000, true);
|
|
let user_b = token_holding(amm_token_b_def_id(), AccountId::new([46; 32]), 500, false);
|
|
vec![pool, vault_a, vault_b, user_a, user_b]
|
|
}
|
|
|
|
fn amm_add_liquidity_pre_states() -> Vec<AccountWithMetadata> {
|
|
let pool = amm_pool_account();
|
|
let vault_a = token_holding(amm_token_a_def_id(), amm_vault_a_id(), 1_000, true);
|
|
let vault_b = token_holding(amm_token_b_def_id(), amm_vault_b_id(), 500, true);
|
|
let lp_supply = (1_000_u128 * 500_u128).isqrt();
|
|
let lp_def = token_definition(amm_lp_def_id(), lp_supply, true);
|
|
let user_a = token_holding(amm_token_a_def_id(), AccountId::new([45; 32]), 1_000, true);
|
|
let user_b = token_holding(amm_token_b_def_id(), AccountId::new([46; 32]), 500, true);
|
|
let user_lp = token_holding(amm_lp_def_id(), AccountId::new([47; 32]), 0, true);
|
|
vec![pool, vault_a, vault_b, lp_def, user_a, user_b, user_lp]
|
|
}
|
|
|
|
fn ata_create_pre_states() -> Vec<AccountWithMetadata> {
|
|
let owner_id = AccountId::new([91; 32]);
|
|
let definition_id = AccountId::new([15; 32]);
|
|
let owner = AccountWithMetadata {
|
|
account: Account::default(),
|
|
is_authorized: true,
|
|
account_id: owner_id,
|
|
};
|
|
let token_def = token_definition(definition_id, 100_000, false);
|
|
let seed = compute_ata_seed(owner_id, definition_id);
|
|
let ata_id = get_associated_token_account_id(&ASSOCIATED_TOKEN_ACCOUNT_ID, &seed);
|
|
let ata_account = AccountWithMetadata {
|
|
account: Account::default(),
|
|
is_authorized: false,
|
|
account_id: ata_id,
|
|
};
|
|
vec![owner, token_def, ata_account]
|
|
}
|
|
|
|
fn main() -> Result<()> {
|
|
let cli = Cli::parse();
|
|
let prove = cli.prove;
|
|
let exec_iters = cli.exec_iters.max(1);
|
|
if prove {
|
|
eprintln!("cycle_bench: prove mode ON, this will be slow (~minutes per program)");
|
|
}
|
|
|
|
let cases = [
|
|
Case::new(
|
|
"authenticated_transfer",
|
|
"Transfer",
|
|
AUTHENTICATED_TRANSFER_ELF,
|
|
AUTHENTICATED_TRANSFER_ID,
|
|
authenticated_transfer_transfer(),
|
|
&authenticated_transfer_core::Instruction::Transfer { amount: 5_000 },
|
|
)?,
|
|
Case::new(
|
|
"authenticated_transfer",
|
|
"Initialize",
|
|
AUTHENTICATED_TRANSFER_ELF,
|
|
AUTHENTICATED_TRANSFER_ID,
|
|
authenticated_transfer_init(),
|
|
&authenticated_transfer_core::Instruction::Initialize,
|
|
)?,
|
|
Case::new(
|
|
"token",
|
|
"Transfer",
|
|
TOKEN_ELF,
|
|
TOKEN_ID,
|
|
token_transfer_pre_states(),
|
|
&token_core::Instruction::Transfer {
|
|
amount_to_transfer: 5_000,
|
|
},
|
|
)?,
|
|
Case::new(
|
|
"token",
|
|
"Mint",
|
|
TOKEN_ELF,
|
|
TOKEN_ID,
|
|
token_mint_pre_states(),
|
|
&token_core::Instruction::Mint {
|
|
amount_to_mint: 5_000,
|
|
},
|
|
)?,
|
|
Case::new(
|
|
"token",
|
|
"Burn",
|
|
TOKEN_ELF,
|
|
TOKEN_ID,
|
|
token_burn_pre_states(),
|
|
&token_core::Instruction::Burn {
|
|
amount_to_burn: 500,
|
|
},
|
|
)?,
|
|
Case::new(
|
|
"clock",
|
|
"Tick (block_id+1, no multiples)",
|
|
CLOCK_ELF,
|
|
CLOCK_ID,
|
|
clock_pre_states_tick_at(0),
|
|
&Timestamp::from(1_700_000_000_u64),
|
|
)?,
|
|
Case::new(
|
|
"amm",
|
|
"SwapExactInput",
|
|
AMM_ELF,
|
|
AMM_ID,
|
|
amm_swap_pre_states(),
|
|
&amm_core::Instruction::SwapExactInput {
|
|
swap_amount_in: 200,
|
|
min_amount_out: 1,
|
|
token_definition_id_in: amm_token_a_def_id(),
|
|
},
|
|
)?,
|
|
Case::new(
|
|
"amm",
|
|
"AddLiquidity",
|
|
AMM_ELF,
|
|
AMM_ID,
|
|
amm_add_liquidity_pre_states(),
|
|
&amm_core::Instruction::AddLiquidity {
|
|
min_amount_liquidity: 1,
|
|
max_amount_to_add_token_a: 400,
|
|
max_amount_to_add_token_b: 200,
|
|
},
|
|
)?,
|
|
Case::new(
|
|
"ata",
|
|
"Create",
|
|
ASSOCIATED_TOKEN_ACCOUNT_ELF,
|
|
ASSOCIATED_TOKEN_ACCOUNT_ID,
|
|
ata_create_pre_states(),
|
|
&ata_core::Instruction::Create {
|
|
ata_program_id: ASSOCIATED_TOKEN_ACCOUNT_ID,
|
|
},
|
|
)?,
|
|
];
|
|
|
|
let mut results: Vec<BenchResult> = cases
|
|
.into_iter()
|
|
.map(|c| c.run(prove, exec_iters))
|
|
.collect::<Result<Vec<_>>>()?;
|
|
|
|
let calibration = Calibration::fit(&results);
|
|
if let Some(cal) = calibration {
|
|
for r in &mut results {
|
|
r.calibrated_ms = Some(cal.calibrated_ms(r.user_cycles));
|
|
r.net_compute_ms = Some(r.exec_stats.best_ms - cal.intercept_ms);
|
|
}
|
|
}
|
|
|
|
print_table(&results, prove);
|
|
if let Some(cal) = calibration {
|
|
print_calibration(&cal);
|
|
}
|
|
|
|
#[cfg(feature = "ppe")]
|
|
let ppe_results = if cli.ppe { ppe::run_all() } else { Vec::new() };
|
|
#[cfg(not(feature = "ppe"))]
|
|
let ppe_results: Vec<ppe::PpeBenchResult> = {
|
|
if cli.ppe {
|
|
eprintln!("cycle_bench: --ppe requires --features ppe at build time. Ignoring.");
|
|
}
|
|
Vec::new()
|
|
};
|
|
if !ppe_results.is_empty() {
|
|
ppe::print_table(&ppe_results);
|
|
}
|
|
|
|
let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
|
.join("..")
|
|
.join("..")
|
|
.canonicalize()?;
|
|
let out_path = workspace_root.join("target").join("cycle_bench.json");
|
|
if let Some(parent) = out_path.parent() {
|
|
std::fs::create_dir_all(parent)?;
|
|
}
|
|
let combined = serde_json::json!({
|
|
"standalone": results,
|
|
"calibration": calibration,
|
|
"ppe": ppe_results,
|
|
});
|
|
std::fs::write(&out_path, serde_json::to_string_pretty(&combined)?)?;
|
|
println!("\nJSON written to {}", out_path.display());
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn print_calibration(cal: &Calibration) {
|
|
println!("\npublic-execution ms calibration (pinned hardware):");
|
|
println!(
|
|
" fit: best_ms = {:.4} + {:.3e} * user_cycles (n={}, R²={:.4})",
|
|
cal.intercept_ms, cal.slope_ms_per_cycle, cal.n, cal.r2,
|
|
);
|
|
println!(
|
|
" throughput: {:.0} cycles/ms",
|
|
cal.throughput_cycles_per_ms,
|
|
);
|
|
println!(
|
|
" fixed overhead: {:.3} ms host-side per call (ELF parse + env build, off-cycle)",
|
|
cal.intercept_ms,
|
|
);
|
|
println!(" calib_ms = user_cycles / throughput (compute only, overhead excluded)");
|
|
println!(" net_ms = best exec_ms - fixed overhead (measured compute, overhead stripped)");
|
|
}
|
|
|
|
fn print_table(results: &[BenchResult], prove: bool) {
|
|
let pw = results
|
|
.iter()
|
|
.map(|r| r.program.len())
|
|
.max()
|
|
.unwrap_or(0)
|
|
.max("program".len());
|
|
let iw = results
|
|
.iter()
|
|
.map(|r| r.instruction.len())
|
|
.max()
|
|
.unwrap_or(0)
|
|
.max("instruction".len());
|
|
let cw = 12_usize;
|
|
let sw = 8_usize;
|
|
let exec_w = results
|
|
.iter()
|
|
.map(|r| r.exec_stats.to_string().len())
|
|
.max()
|
|
.unwrap_or(0)
|
|
.max("exec_ms (best / mean ± stdev)".len());
|
|
|
|
let dw = 10_usize;
|
|
println!(
|
|
"{:<pw$} {:<iw$} {:>cw$} {:>sw$} {:<exec_w$} {:>dw$} {:>dw$}",
|
|
"program",
|
|
"instruction",
|
|
"user_cycles",
|
|
"segments",
|
|
"exec_ms (best / mean ± stdev)",
|
|
"calib_ms",
|
|
"net_ms",
|
|
);
|
|
println!("{}", "-".repeat(pw + iw + cw + sw + exec_w + 2 * dw + 12));
|
|
for r in results {
|
|
let calib = r
|
|
.calibrated_ms
|
|
.map_or_else(|| "-".to_owned(), |v| format!("{v:.2}"));
|
|
let net = r
|
|
.net_compute_ms
|
|
.map_or_else(|| "-".to_owned(), |v| format!("{v:.2}"));
|
|
println!(
|
|
"{:<pw$} {:<iw$} {:>cw$} {:>sw$} {:<exec_w$} {:>dw$} {:>dw$}",
|
|
r.program, r.instruction, r.user_cycles, r.segments, r.exec_stats, calib, net,
|
|
);
|
|
}
|
|
|
|
if prove {
|
|
println!("\nprove():");
|
|
let pcw = 14_usize;
|
|
let pwallw = 24_usize;
|
|
let psw = 10_usize;
|
|
println!(
|
|
"{:<pw$} {:<iw$} {:>pcw$} {:>pwallw$} {:>psw$}",
|
|
"program", "instruction", "prove_total_c", "prove_ms (s)", "prove_segs",
|
|
);
|
|
println!("{}", "-".repeat(pw + iw + pcw + pwallw + psw + 8));
|
|
for r in results {
|
|
let total = r
|
|
.prove_total_cycles
|
|
.map_or_else(|| "-".to_owned(), |c| c.to_string());
|
|
let pms = r.prove_stats.map_or_else(
|
|
|| "-".to_owned(),
|
|
|s| format!("{:.1} ({:.1}s)", s.best_ms, s.best_ms / 1_000.0),
|
|
);
|
|
let psegs = r
|
|
.prove_segments
|
|
.map_or_else(|| "-".to_owned(), |s| s.to_string());
|
|
println!(
|
|
"{:<pw$} {:<iw$} {:>pcw$} {:>pwallw$} {:>psw$}",
|
|
r.program, r.instruction, total, pms, psegs,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use cycle_bench::stats::Stats;
|
|
|
|
use super::{BenchResult, Calibration};
|
|
|
|
/// Minimal `BenchResult` carrying only the fields the calibration fit reads:
|
|
/// `user_cycles` (x) and `exec_stats.best_ms` (y).
|
|
fn point(user_cycles: u64, best_ms: f64) -> BenchResult {
|
|
BenchResult {
|
|
program: "test",
|
|
instruction: "test",
|
|
user_cycles,
|
|
segments: 1,
|
|
exec_stats: Stats::from_samples(&[best_ms]),
|
|
net_compute_ms: None,
|
|
calibrated_ms: None,
|
|
prove_stats: None,
|
|
prove_total_cycles: None,
|
|
prove_user_cycles: None,
|
|
prove_paging_cycles: None,
|
|
prove_segments: None,
|
|
}
|
|
}
|
|
|
|
fn close(a: f64, b: f64) -> bool {
|
|
(a - b).abs() < 1e-9
|
|
}
|
|
|
|
#[test]
|
|
fn fit_recovers_a_known_line() {
|
|
// best_ms = 10 + 0.001 * user_cycles -> slope 1e-3, intercept 10, throughput 1000.
|
|
let results = [point(1000, 11.0), point(2000, 12.0), point(3000, 13.0)];
|
|
let cal = Calibration::fit(&results).expect("fit over three points");
|
|
|
|
assert!(
|
|
close(cal.slope_ms_per_cycle, 0.001),
|
|
"slope {}",
|
|
cal.slope_ms_per_cycle
|
|
);
|
|
assert!(
|
|
close(cal.intercept_ms, 10.0),
|
|
"intercept {}",
|
|
cal.intercept_ms
|
|
);
|
|
assert!(
|
|
close(cal.throughput_cycles_per_ms, 1000.0),
|
|
"throughput {}",
|
|
cal.throughput_cycles_per_ms,
|
|
);
|
|
assert!(close(cal.r2, 1.0), "r2 {}", cal.r2);
|
|
assert_eq!(cal.n, 3);
|
|
// calibrated_ms is the overhead-excluded compute prediction: slope * cycles.
|
|
assert!(
|
|
close(cal.calibrated_ms(2000), 2.0),
|
|
"calib {}",
|
|
cal.calibrated_ms(2000)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn fit_needs_at_least_two_points() {
|
|
assert!(Calibration::fit(&[]).is_none());
|
|
assert!(Calibration::fit(&[point(1000, 11.0)]).is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn fit_with_identical_cycle_counts_returns_none() {
|
|
// Zero spread in x leaves the slope undetermined; the fit must decline rather than divide
|
|
// by zero.
|
|
let results = [point(1000, 11.0), point(1000, 12.0)];
|
|
assert!(Calibration::fit(&results).is_none());
|
|
}
|
|
}
|