Merge pull request #733 from mir-protocol/keccak_bench

Keccak benchmark
This commit is contained in:
Daniel Lubarov 2022-09-23 11:25:53 -07:00 committed by GitHub
commit 243eb265ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 195 additions and 73 deletions

View File

@ -223,14 +223,18 @@ mod tests {
let keccak_inputs = (0..num_keccak_perms)
.map(|_| [0u64; NUM_INPUTS].map(|_| rng.gen()))
.collect_vec();
keccak_stark.generate_trace(keccak_inputs)
keccak_stark.generate_trace(keccak_inputs, &mut TimingTree::default())
}
fn make_keccak_memory_trace(
keccak_memory_stark: &KeccakMemoryStark<F, D>,
config: &StarkConfig,
) -> Vec<PolynomialValues<F>> {
keccak_memory_stark.generate_trace(vec![], 1 << config.fri_config.cap_height)
keccak_memory_stark.generate_trace(
vec![],
1 << config.fri_config.cap_height,
&mut TimingTree::default(),
)
}
fn make_logic_trace<R: Rng>(
@ -247,7 +251,7 @@ mod tests {
Operation::new(op, input0, input1)
})
.collect();
logic_stark.generate_trace(ops)
logic_stark.generate_trace(ops, &mut TimingTree::default())
}
fn make_memory_trace<R: Rng>(
@ -256,7 +260,7 @@ mod tests {
rng: &mut R,
) -> (Vec<PolynomialValues<F>>, usize) {
let memory_ops = generate_random_memory_ops(num_memory_ops, rng);
let trace = memory_stark.generate_trace(memory_ops);
let trace = memory_stark.generate_trace(memory_ops, &mut TimingTree::default());
let num_ops = trace[0].values.len();
(trace, num_ops)
}

View File

@ -21,9 +21,9 @@ impl StarkConfig {
fri_config: FriConfig {
rate_bits: 1,
cap_height: 4,
proof_of_work_bits: 10,
proof_of_work_bits: 16,
reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5),
num_query_rounds: 90,
num_query_rounds: 84,
},
}
}

View File

@ -6,6 +6,7 @@ use plonky2::field::extension::Extendable;
use plonky2::field::polynomial::PolynomialValues;
use plonky2::field::types::Field;
use plonky2::hash::hash_types::RichField;
use plonky2::util::timing::TimingTree;
use serde::{Deserialize, Serialize};
use crate::all_stark::{AllStark, NUM_TABLES};
@ -56,6 +57,7 @@ pub(crate) fn generate_traces<F: RichField + Extendable<D>, const D: usize>(
all_stark: &AllStark<F, D>,
inputs: GenerationInputs,
config: &StarkConfig,
timing: &mut TimingTree,
) -> ([Vec<PolynomialValues<F>>; NUM_TABLES], PublicValues) {
let mut state = GenerationState::<F>::new(inputs.clone());
@ -101,12 +103,14 @@ pub(crate) fn generate_traces<F: RichField + Extendable<D>, const D: usize>(
assert_eq!(current_cpu_row, [F::ZERO; NUM_CPU_COLUMNS].into());
let cpu_trace = trace_rows_to_poly_values(cpu_rows);
let keccak_trace = all_stark.keccak_stark.generate_trace(keccak_inputs);
let keccak_memory_trace = all_stark
.keccak_memory_stark
.generate_trace(keccak_memory_inputs, 1 << config.fri_config.cap_height);
let logic_trace = all_stark.logic_stark.generate_trace(logic_ops);
let memory_trace = all_stark.memory_stark.generate_trace(memory.log);
let keccak_trace = all_stark.keccak_stark.generate_trace(keccak_inputs, timing);
let keccak_memory_trace = all_stark.keccak_memory_stark.generate_trace(
keccak_memory_inputs,
1 << config.fri_config.cap_height,
timing,
);
let logic_trace = all_stark.logic_stark.generate_trace(logic_ops, timing);
let memory_trace = all_stark.memory_stark.generate_trace(memory.log, timing);
let traces = [
cpu_trace,
keccak_trace,

View File

@ -201,23 +201,22 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakStark<F, D> {
row[out_reg_hi] = F::from_canonical_u64(row[in_reg_hi].to_canonical_u64() ^ rc_hi);
}
pub fn generate_trace(&self, inputs: Vec<[u64; NUM_INPUTS]>) -> Vec<PolynomialValues<F>> {
let mut timing = TimingTree::new("generate trace", log::Level::Debug);
pub fn generate_trace(
&self,
inputs: Vec<[u64; NUM_INPUTS]>,
timing: &mut TimingTree,
) -> Vec<PolynomialValues<F>> {
// Generate the witness, except for permuted columns in the lookup argument.
let trace_rows = timed!(
&mut timing,
timing,
"generate trace rows",
self.generate_trace_rows(inputs)
);
let trace_polys = timed!(
&mut timing,
timing,
"convert to PolynomialValues",
trace_rows_to_poly_values(trace_rows)
);
timing.print();
trace_polys
}
}
@ -542,12 +541,22 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for KeccakStark<F
#[cfg(test)]
mod tests {
use anyhow::Result;
use plonky2::field::types::PrimeField64;
use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV};
use plonky2::field::polynomial::PolynomialValues;
use plonky2::field::types::{Field, PrimeField64};
use plonky2::fri::oracle::PolynomialBatch;
use plonky2::iop::challenger::Challenger;
use plonky2::plonk::config::{GenericConfig, PoseidonGoldilocksConfig};
use plonky2::timed;
use plonky2::util::timing::TimingTree;
use tiny_keccak::keccakf;
use crate::config::StarkConfig;
use crate::cross_table_lookup::{CtlData, CtlZData};
use crate::keccak::columns::reg_output_limb;
use crate::keccak::keccak_stark::{KeccakStark, NUM_INPUTS, NUM_ROUNDS};
use crate::permutation::GrandProductChallenge;
use crate::prover::prove_single_table;
use crate::stark_testing::{test_stark_circuit_constraints, test_stark_low_degree};
#[test]
@ -609,4 +618,75 @@ mod tests {
Ok(())
}
#[test]
fn keccak_benchmark() -> Result<()> {
const NUM_PERMS: usize = 85;
const D: usize = 2;
type C = PoseidonGoldilocksConfig;
type F = <C as GenericConfig<D>>::F;
type S = KeccakStark<F, D>;
let stark = S::default();
let config = StarkConfig::standard_fast_config();
init_logger();
let input: Vec<[u64; NUM_INPUTS]> = (0..NUM_PERMS).map(|_| rand::random()).collect();
let mut timing = TimingTree::new("prove", log::Level::Debug);
let trace_poly_values = timed!(
timing,
"generate trace",
stark.generate_trace(input.try_into().unwrap(), &mut timing)
);
// TODO: Cloning this isn't great; consider having `from_values` accept a reference,
// or having `compute_permutation_z_polys` read trace values from the `PolynomialBatch`.
let cloned_trace_poly_values = timed!(timing, "clone", trace_poly_values.clone());
let trace_commitments = timed!(
timing,
"compute trace commitment",
PolynomialBatch::<F, C, D>::from_values(
cloned_trace_poly_values,
config.fri_config.rate_bits,
false,
config.fri_config.cap_height,
&mut timing,
None,
)
);
let degree = 1 << trace_commitments.degree_log;
// Fake CTL data.
let ctl_z_data = CtlZData {
z: PolynomialValues::zero(degree),
challenge: GrandProductChallenge {
beta: F::ZERO,
gamma: F::ZERO,
},
columns: vec![],
filter_column: None,
};
let ctl_data = CtlData {
zs_columns: vec![ctl_z_data; config.num_challenges],
};
prove_single_table(
&stark,
&config,
&trace_poly_values,
&trace_commitments,
&ctl_data,
&mut Challenger::new(),
&mut timing,
)?;
timing.print();
Ok(())
}
fn init_logger() {
let _ = try_init_from_env(Env::default().filter_or(DEFAULT_FILTER_ENV, "debug"));
}
}

View File

@ -93,23 +93,21 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakMemoryStark<F, D> {
&self,
operations: Vec<KeccakMemoryOp>,
min_rows: usize,
timing: &mut TimingTree,
) -> Vec<PolynomialValues<F>> {
let mut timing = TimingTree::new("generate trace", log::Level::Debug);
// Generate the witness row-wise.
let trace_rows = timed!(
&mut timing,
timing,
"generate trace rows",
self.generate_trace_rows(operations, min_rows)
);
let trace_polys = timed!(
&mut timing,
timing,
"convert to PolynomialValues",
trace_rows_to_poly_values(trace_rows)
);
timing.print();
trace_polys
}

View File

@ -171,23 +171,21 @@ impl<F: RichField + Extendable<D>, const D: usize> KeccakSpongeStark<F, D> {
&self,
operations: Vec<KeccakSpongeOp>,
min_rows: usize,
timing: &mut TimingTree,
) -> Vec<PolynomialValues<F>> {
let mut timing = TimingTree::new("generate trace", log::Level::Debug);
// Generate the witness row-wise.
let trace_rows = timed!(
&mut timing,
timing,
"generate trace rows",
self.generate_trace_rows(operations, min_rows)
);
let trace_polys = timed!(
&mut timing,
timing,
"convert to PolynomialValues",
trace_rows_to_poly_values(trace_rows)
);
timing.print();
trace_polys
}

View File

@ -7,10 +7,13 @@ use plonky2::field::packed::PackedField;
use plonky2::field::polynomial::PolynomialValues;
use plonky2::field::types::Field;
use plonky2::hash::hash_types::RichField;
use plonky2::timed;
use plonky2::util::timing::TimingTree;
use plonky2_util::ceil_div_usize;
use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::cross_table_lookup::Column;
use crate::logic::columns::NUM_COLUMNS;
use crate::stark::Stark;
use crate::util::{limb_from_bits_le, limb_from_bits_le_recursive, trace_rows_to_poly_values};
use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars};
@ -101,7 +104,25 @@ impl Operation {
}
impl<F: RichField, const D: usize> LogicStark<F, D> {
pub(crate) fn generate_trace(&self, operations: Vec<Operation>) -> Vec<PolynomialValues<F>> {
pub(crate) fn generate_trace(
&self,
operations: Vec<Operation>,
timing: &mut TimingTree,
) -> Vec<PolynomialValues<F>> {
let trace_rows = timed!(
timing,
"generate trace rows",
self.generate_trace_rows(operations)
);
let trace_polys = timed!(
timing,
"convert to PolynomialValues",
trace_rows_to_poly_values(trace_rows)
);
trace_polys
}
fn generate_trace_rows(&self, operations: Vec<Operation>) -> Vec<[F; NUM_COLUMNS]> {
let len = operations.len();
let padded_len = len.next_power_of_two();
@ -115,7 +136,7 @@ impl<F: RichField, const D: usize> LogicStark<F, D> {
rows.push([F::ZERO; columns::NUM_COLUMNS]);
}
trace_rows_to_poly_values(rows)
rows
}
fn generate_row(operation: Operation) -> [F; columns::NUM_COLUMNS] {

View File

@ -187,12 +187,14 @@ impl<F: RichField + Extendable<D>, const D: usize> MemoryStark<F, D> {
}
}
pub(crate) fn generate_trace(&self, memory_ops: Vec<MemoryOp>) -> Vec<PolynomialValues<F>> {
let mut timing = TimingTree::new("generate trace", log::Level::Debug);
pub(crate) fn generate_trace(
&self,
memory_ops: Vec<MemoryOp>,
timing: &mut TimingTree,
) -> Vec<PolynomialValues<F>> {
// Generate most of the trace in row-major form.
let trace_rows = timed!(
&mut timing,
timing,
"generate trace rows",
self.generate_trace_row_major(memory_ops)
);
@ -204,13 +206,10 @@ impl<F: RichField + Extendable<D>, const D: usize> MemoryStark<F, D> {
// A few final generation steps, which work better in column-major form.
Self::generate_trace_col_major(&mut trace_col_vecs);
let trace_polys = trace_col_vecs
trace_col_vecs
.into_iter()
.map(|column| PolynomialValues::new(column))
.collect();
timing.print();
trace_polys
.collect()
}
}

View File

@ -53,7 +53,7 @@ where
[(); LogicStark::<F, D>::COLUMNS]:,
[(); MemoryStark::<F, D>::COLUMNS]:,
{
let (traces, public_values) = generate_traces(all_stark, inputs, config);
let (traces, public_values) = generate_traces(all_stark, inputs, config, timing);
prove_with_traces(all_stark, config, traces, public_values, timing)
}
@ -175,7 +175,7 @@ where
}
/// Compute proof for a single STARK table.
fn prove_single_table<F, C, S, const D: usize>(
pub(crate) fn prove_single_table<F, C, S, const D: usize>(
stark: &S,
config: &StarkConfig,
trace_poly_values: &[PolynomialValues<F>],
@ -210,7 +210,11 @@ where
)
});
let permutation_zs = permutation_challenges.as_ref().map(|challenges| {
compute_permutation_z_polys::<F, C, S, D>(stark, config, trace_poly_values, challenges)
timed!(
timing,
"compute permutation Z(x) polys",
compute_permutation_z_polys::<F, C, S, D>(stark, config, trace_poly_values, challenges)
)
});
let num_permutation_zs = permutation_zs.as_ref().map(|v| v.len()).unwrap_or(0);
@ -223,13 +227,17 @@ where
};
assert!(!z_polys.is_empty(), "No CTL?");
let permutation_ctl_zs_commitment = PolynomialBatch::from_values(
z_polys,
rate_bits,
false,
config.fri_config.cap_height,
let permutation_ctl_zs_commitment = timed!(
timing,
None,
"compute Zs commitment",
PolynomialBatch::from_values(
z_polys,
rate_bits,
false,
config.fri_config.cap_height,
timing,
None,
)
);
let permutation_ctl_zs_cap = permutation_ctl_zs_commitment.merkle_tree.cap.clone();
@ -249,27 +257,37 @@ where
config,
);
}
let quotient_polys = compute_quotient_polys::<F, <F as Packable>::Packing, C, S, D>(
stark,
trace_commitment,
&permutation_ctl_zs_commitment,
permutation_challenges.as_ref(),
ctl_data,
alphas,
degree_bits,
num_permutation_zs,
config,
let quotient_polys = timed!(
timing,
"compute quotient polys",
compute_quotient_polys::<F, <F as Packable>::Packing, C, S, D>(
stark,
trace_commitment,
&permutation_ctl_zs_commitment,
permutation_challenges.as_ref(),
ctl_data,
alphas,
degree_bits,
num_permutation_zs,
config,
)
);
let all_quotient_chunks = timed!(
timing,
"split quotient polys",
quotient_polys
.into_par_iter()
.flat_map(|mut quotient_poly| {
quotient_poly
.trim_to_len(degree * stark.quotient_degree_factor())
.expect(
"Quotient has failed, the vanishing polynomial is not divisible by Z_H",
);
// Split quotient into degree-n chunks.
quotient_poly.chunks(degree)
})
.collect()
);
let all_quotient_chunks = quotient_polys
.into_par_iter()
.flat_map(|mut quotient_poly| {
quotient_poly
.trim_to_len(degree * stark.quotient_degree_factor())
.expect("Quotient has failed, the vanishing polynomial is not divisible by Z_H");
// Split quotient into degree-n chunks.
quotient_poly.chunks(degree)
})
.collect();
let quotient_commitment = timed!(
timing,
"compute quotient commitment",

View File

@ -21,9 +21,9 @@ impl StarkConfig {
fri_config: FriConfig {
rate_bits: 1,
cap_height: 4,
proof_of_work_bits: 10,
proof_of_work_bits: 16,
reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5),
num_query_rounds: 90,
num_query_rounds: 84,
},
}
}