From b6d71a7008b966d6bc5461ae1dfb9310a43d4006 Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Fri, 23 Sep 2022 10:54:17 -0700 Subject: [PATCH] Keccak benchmark And reworking things a bit to include the timing data we want. --- evm/src/all_stark.rs | 12 ++- evm/src/config.rs | 4 +- evm/src/generation/mod.rs | 16 ++-- evm/src/keccak/keccak_stark.rs | 98 ++++++++++++++++++-- evm/src/keccak_memory/keccak_memory_stark.rs | 8 +- evm/src/keccak_sponge/keccak_sponge_stark.rs | 8 +- evm/src/logic.rs | 25 ++++- evm/src/memory/memory_stark.rs | 17 ++-- evm/src/prover.rs | 76 +++++++++------ starky/src/config.rs | 4 +- 10 files changed, 195 insertions(+), 73 deletions(-) diff --git a/evm/src/all_stark.rs b/evm/src/all_stark.rs index 1775eef7..2786c36a 100644 --- a/evm/src/all_stark.rs +++ b/evm/src/all_stark.rs @@ -223,14 +223,18 @@ mod tests { let keccak_inputs = (0..num_keccak_perms) .map(|_| [0u64; NUM_INPUTS].map(|_| rng.gen())) .collect_vec(); - keccak_stark.generate_trace(keccak_inputs) + keccak_stark.generate_trace(keccak_inputs, &mut TimingTree::default()) } fn make_keccak_memory_trace( keccak_memory_stark: &KeccakMemoryStark, config: &StarkConfig, ) -> Vec> { - keccak_memory_stark.generate_trace(vec![], 1 << config.fri_config.cap_height) + keccak_memory_stark.generate_trace( + vec![], + 1 << config.fri_config.cap_height, + &mut TimingTree::default(), + ) } fn make_logic_trace( @@ -247,7 +251,7 @@ mod tests { Operation::new(op, input0, input1) }) .collect(); - logic_stark.generate_trace(ops) + logic_stark.generate_trace(ops, &mut TimingTree::default()) } fn make_memory_trace( @@ -256,7 +260,7 @@ mod tests { rng: &mut R, ) -> (Vec>, usize) { let memory_ops = generate_random_memory_ops(num_memory_ops, rng); - let trace = memory_stark.generate_trace(memory_ops); + let trace = memory_stark.generate_trace(memory_ops, &mut TimingTree::default()); let num_ops = trace[0].values.len(); (trace, num_ops) } diff --git a/evm/src/config.rs b/evm/src/config.rs index 500cd957..a593c827 100644 --- a/evm/src/config.rs +++ b/evm/src/config.rs @@ -21,9 +21,9 @@ impl StarkConfig { fri_config: FriConfig { rate_bits: 1, cap_height: 4, - proof_of_work_bits: 10, + proof_of_work_bits: 16, reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5), - num_query_rounds: 90, + num_query_rounds: 84, }, } } diff --git a/evm/src/generation/mod.rs b/evm/src/generation/mod.rs index 12a531c3..f91b70d2 100644 --- a/evm/src/generation/mod.rs +++ b/evm/src/generation/mod.rs @@ -6,6 +6,7 @@ use plonky2::field::extension::Extendable; use plonky2::field::polynomial::PolynomialValues; use plonky2::field::types::Field; use plonky2::hash::hash_types::RichField; +use plonky2::util::timing::TimingTree; use serde::{Deserialize, Serialize}; use crate::all_stark::{AllStark, NUM_TABLES}; @@ -56,6 +57,7 @@ pub(crate) fn generate_traces, const D: usize>( all_stark: &AllStark, inputs: GenerationInputs, config: &StarkConfig, + timing: &mut TimingTree, ) -> ([Vec>; NUM_TABLES], PublicValues) { let mut state = GenerationState::::new(inputs.clone()); @@ -101,12 +103,14 @@ pub(crate) fn generate_traces, const D: usize>( assert_eq!(current_cpu_row, [F::ZERO; NUM_CPU_COLUMNS].into()); let cpu_trace = trace_rows_to_poly_values(cpu_rows); - let keccak_trace = all_stark.keccak_stark.generate_trace(keccak_inputs); - let keccak_memory_trace = all_stark - .keccak_memory_stark - .generate_trace(keccak_memory_inputs, 1 << config.fri_config.cap_height); - let logic_trace = all_stark.logic_stark.generate_trace(logic_ops); - let memory_trace = all_stark.memory_stark.generate_trace(memory.log); + let keccak_trace = all_stark.keccak_stark.generate_trace(keccak_inputs, timing); + let keccak_memory_trace = all_stark.keccak_memory_stark.generate_trace( + keccak_memory_inputs, + 1 << config.fri_config.cap_height, + timing, + ); + let logic_trace = all_stark.logic_stark.generate_trace(logic_ops, timing); + let memory_trace = all_stark.memory_stark.generate_trace(memory.log, timing); let traces = [ cpu_trace, keccak_trace, diff --git a/evm/src/keccak/keccak_stark.rs b/evm/src/keccak/keccak_stark.rs index 23ffe0e9..87a61ae7 100644 --- a/evm/src/keccak/keccak_stark.rs +++ b/evm/src/keccak/keccak_stark.rs @@ -201,23 +201,22 @@ impl, const D: usize> KeccakStark { row[out_reg_hi] = F::from_canonical_u64(row[in_reg_hi].to_canonical_u64() ^ rc_hi); } - pub fn generate_trace(&self, inputs: Vec<[u64; NUM_INPUTS]>) -> Vec> { - let mut timing = TimingTree::new("generate trace", log::Level::Debug); - + pub fn generate_trace( + &self, + inputs: Vec<[u64; NUM_INPUTS]>, + timing: &mut TimingTree, + ) -> Vec> { // Generate the witness, except for permuted columns in the lookup argument. let trace_rows = timed!( - &mut timing, + timing, "generate trace rows", self.generate_trace_rows(inputs) ); - let trace_polys = timed!( - &mut timing, + timing, "convert to PolynomialValues", trace_rows_to_poly_values(trace_rows) ); - - timing.print(); trace_polys } } @@ -542,12 +541,22 @@ impl, const D: usize> Stark for KeccakStark Result<()> { + const NUM_PERMS: usize = 85; + const D: usize = 2; + type C = PoseidonGoldilocksConfig; + type F = >::F; + type S = KeccakStark; + let stark = S::default(); + let config = StarkConfig::standard_fast_config(); + + init_logger(); + + let input: Vec<[u64; NUM_INPUTS]> = (0..NUM_PERMS).map(|_| rand::random()).collect(); + + let mut timing = TimingTree::new("prove", log::Level::Debug); + let trace_poly_values = timed!( + timing, + "generate trace", + stark.generate_trace(input.try_into().unwrap(), &mut timing) + ); + + // TODO: Cloning this isn't great; consider having `from_values` accept a reference, + // or having `compute_permutation_z_polys` read trace values from the `PolynomialBatch`. + let cloned_trace_poly_values = timed!(timing, "clone", trace_poly_values.clone()); + + let trace_commitments = timed!( + timing, + "compute trace commitment", + PolynomialBatch::::from_values( + cloned_trace_poly_values, + config.fri_config.rate_bits, + false, + config.fri_config.cap_height, + &mut timing, + None, + ) + ); + let degree = 1 << trace_commitments.degree_log; + + // Fake CTL data. + let ctl_z_data = CtlZData { + z: PolynomialValues::zero(degree), + challenge: GrandProductChallenge { + beta: F::ZERO, + gamma: F::ZERO, + }, + columns: vec![], + filter_column: None, + }; + let ctl_data = CtlData { + zs_columns: vec![ctl_z_data; config.num_challenges], + }; + + prove_single_table( + &stark, + &config, + &trace_poly_values, + &trace_commitments, + &ctl_data, + &mut Challenger::new(), + &mut timing, + )?; + + timing.print(); + Ok(()) + } + + fn init_logger() { + let _ = try_init_from_env(Env::default().filter_or(DEFAULT_FILTER_ENV, "debug")); + } } diff --git a/evm/src/keccak_memory/keccak_memory_stark.rs b/evm/src/keccak_memory/keccak_memory_stark.rs index cf8955b3..1bbea168 100644 --- a/evm/src/keccak_memory/keccak_memory_stark.rs +++ b/evm/src/keccak_memory/keccak_memory_stark.rs @@ -93,23 +93,21 @@ impl, const D: usize> KeccakMemoryStark { &self, operations: Vec, min_rows: usize, + timing: &mut TimingTree, ) -> Vec> { - let mut timing = TimingTree::new("generate trace", log::Level::Debug); - // Generate the witness row-wise. let trace_rows = timed!( - &mut timing, + timing, "generate trace rows", self.generate_trace_rows(operations, min_rows) ); let trace_polys = timed!( - &mut timing, + timing, "convert to PolynomialValues", trace_rows_to_poly_values(trace_rows) ); - timing.print(); trace_polys } diff --git a/evm/src/keccak_sponge/keccak_sponge_stark.rs b/evm/src/keccak_sponge/keccak_sponge_stark.rs index afde02c2..219c0c21 100644 --- a/evm/src/keccak_sponge/keccak_sponge_stark.rs +++ b/evm/src/keccak_sponge/keccak_sponge_stark.rs @@ -171,23 +171,21 @@ impl, const D: usize> KeccakSpongeStark { &self, operations: Vec, min_rows: usize, + timing: &mut TimingTree, ) -> Vec> { - let mut timing = TimingTree::new("generate trace", log::Level::Debug); - // Generate the witness row-wise. let trace_rows = timed!( - &mut timing, + timing, "generate trace rows", self.generate_trace_rows(operations, min_rows) ); let trace_polys = timed!( - &mut timing, + timing, "convert to PolynomialValues", trace_rows_to_poly_values(trace_rows) ); - timing.print(); trace_polys } diff --git a/evm/src/logic.rs b/evm/src/logic.rs index 2fa9c810..dc6fc777 100644 --- a/evm/src/logic.rs +++ b/evm/src/logic.rs @@ -7,10 +7,13 @@ use plonky2::field::packed::PackedField; use plonky2::field::polynomial::PolynomialValues; use plonky2::field::types::Field; use plonky2::hash::hash_types::RichField; +use plonky2::timed; +use plonky2::util::timing::TimingTree; use plonky2_util::ceil_div_usize; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use crate::cross_table_lookup::Column; +use crate::logic::columns::NUM_COLUMNS; use crate::stark::Stark; use crate::util::{limb_from_bits_le, limb_from_bits_le_recursive, trace_rows_to_poly_values}; use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; @@ -101,7 +104,25 @@ impl Operation { } impl LogicStark { - pub(crate) fn generate_trace(&self, operations: Vec) -> Vec> { + pub(crate) fn generate_trace( + &self, + operations: Vec, + timing: &mut TimingTree, + ) -> Vec> { + let trace_rows = timed!( + timing, + "generate trace rows", + self.generate_trace_rows(operations) + ); + let trace_polys = timed!( + timing, + "convert to PolynomialValues", + trace_rows_to_poly_values(trace_rows) + ); + trace_polys + } + + fn generate_trace_rows(&self, operations: Vec) -> Vec<[F; NUM_COLUMNS]> { let len = operations.len(); let padded_len = len.next_power_of_two(); @@ -115,7 +136,7 @@ impl LogicStark { rows.push([F::ZERO; columns::NUM_COLUMNS]); } - trace_rows_to_poly_values(rows) + rows } fn generate_row(operation: Operation) -> [F; columns::NUM_COLUMNS] { diff --git a/evm/src/memory/memory_stark.rs b/evm/src/memory/memory_stark.rs index 1ec0c11c..f5455a53 100644 --- a/evm/src/memory/memory_stark.rs +++ b/evm/src/memory/memory_stark.rs @@ -187,12 +187,14 @@ impl, const D: usize> MemoryStark { } } - pub(crate) fn generate_trace(&self, memory_ops: Vec) -> Vec> { - let mut timing = TimingTree::new("generate trace", log::Level::Debug); - + pub(crate) fn generate_trace( + &self, + memory_ops: Vec, + timing: &mut TimingTree, + ) -> Vec> { // Generate most of the trace in row-major form. let trace_rows = timed!( - &mut timing, + timing, "generate trace rows", self.generate_trace_row_major(memory_ops) ); @@ -204,13 +206,10 @@ impl, const D: usize> MemoryStark { // A few final generation steps, which work better in column-major form. Self::generate_trace_col_major(&mut trace_col_vecs); - let trace_polys = trace_col_vecs + trace_col_vecs .into_iter() .map(|column| PolynomialValues::new(column)) - .collect(); - - timing.print(); - trace_polys + .collect() } } diff --git a/evm/src/prover.rs b/evm/src/prover.rs index 31e76a1c..7fe57631 100644 --- a/evm/src/prover.rs +++ b/evm/src/prover.rs @@ -53,7 +53,7 @@ where [(); LogicStark::::COLUMNS]:, [(); MemoryStark::::COLUMNS]:, { - let (traces, public_values) = generate_traces(all_stark, inputs, config); + let (traces, public_values) = generate_traces(all_stark, inputs, config, timing); prove_with_traces(all_stark, config, traces, public_values, timing) } @@ -175,7 +175,7 @@ where } /// Compute proof for a single STARK table. -fn prove_single_table( +pub(crate) fn prove_single_table( stark: &S, config: &StarkConfig, trace_poly_values: &[PolynomialValues], @@ -210,7 +210,11 @@ where ) }); let permutation_zs = permutation_challenges.as_ref().map(|challenges| { - compute_permutation_z_polys::(stark, config, trace_poly_values, challenges) + timed!( + timing, + "compute permutation Z(x) polys", + compute_permutation_z_polys::(stark, config, trace_poly_values, challenges) + ) }); let num_permutation_zs = permutation_zs.as_ref().map(|v| v.len()).unwrap_or(0); @@ -223,13 +227,17 @@ where }; assert!(!z_polys.is_empty(), "No CTL?"); - let permutation_ctl_zs_commitment = PolynomialBatch::from_values( - z_polys, - rate_bits, - false, - config.fri_config.cap_height, + let permutation_ctl_zs_commitment = timed!( timing, - None, + "compute Zs commitment", + PolynomialBatch::from_values( + z_polys, + rate_bits, + false, + config.fri_config.cap_height, + timing, + None, + ) ); let permutation_ctl_zs_cap = permutation_ctl_zs_commitment.merkle_tree.cap.clone(); @@ -249,27 +257,37 @@ where config, ); } - let quotient_polys = compute_quotient_polys::::Packing, C, S, D>( - stark, - trace_commitment, - &permutation_ctl_zs_commitment, - permutation_challenges.as_ref(), - ctl_data, - alphas, - degree_bits, - num_permutation_zs, - config, + let quotient_polys = timed!( + timing, + "compute quotient polys", + compute_quotient_polys::::Packing, C, S, D>( + stark, + trace_commitment, + &permutation_ctl_zs_commitment, + permutation_challenges.as_ref(), + ctl_data, + alphas, + degree_bits, + num_permutation_zs, + config, + ) + ); + let all_quotient_chunks = timed!( + timing, + "split quotient polys", + quotient_polys + .into_par_iter() + .flat_map(|mut quotient_poly| { + quotient_poly + .trim_to_len(degree * stark.quotient_degree_factor()) + .expect( + "Quotient has failed, the vanishing polynomial is not divisible by Z_H", + ); + // Split quotient into degree-n chunks. + quotient_poly.chunks(degree) + }) + .collect() ); - let all_quotient_chunks = quotient_polys - .into_par_iter() - .flat_map(|mut quotient_poly| { - quotient_poly - .trim_to_len(degree * stark.quotient_degree_factor()) - .expect("Quotient has failed, the vanishing polynomial is not divisible by Z_H"); - // Split quotient into degree-n chunks. - quotient_poly.chunks(degree) - }) - .collect(); let quotient_commitment = timed!( timing, "compute quotient commitment", diff --git a/starky/src/config.rs b/starky/src/config.rs index 500cd957..a593c827 100644 --- a/starky/src/config.rs +++ b/starky/src/config.rs @@ -21,9 +21,9 @@ impl StarkConfig { fri_config: FriConfig { rate_bits: 1, cap_height: 4, - proof_of_work_bits: 10, + proof_of_work_bits: 16, reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5), - num_query_rounds: 90, + num_query_rounds: 84, }, } }