diff --git a/src/fri/commitment.rs b/src/fri/commitment.rs index 5cba002c..1fc33055 100644 --- a/src/fri/commitment.rs +++ b/src/fri/commitment.rs @@ -17,6 +17,7 @@ use crate::plonk::proof::{OpeningSet, OpeningSetTarget}; use crate::polynomial::polynomial::{PolynomialCoeffs, PolynomialValues}; use crate::timed; use crate::util::reducing::ReducingFactor; +use crate::util::timing::TimingTree; use crate::util::{log2_ceil, log2_strict, reverse_bits, reverse_index_bits_in_place, transpose}; use crate::with_context; @@ -35,30 +36,38 @@ pub struct PolynomialBatchCommitment { impl PolynomialBatchCommitment { /// Creates a list polynomial commitment for the polynomials interpolating the values in `values`. - pub fn new(values: Vec>, rate_bits: usize, blinding: bool) -> Self { + pub(crate) fn new( + values: Vec>, + rate_bits: usize, + blinding: bool, + timing: &mut TimingTree, + ) -> Self { let degree = values[0].len(); let polynomials = values.par_iter().map(|v| v.ifft()).collect::>(); let lde_values = timed!( - Self::lde_values(&polynomials, rate_bits, blinding), - "to compute LDE" + timing, + "compute LDE", + Self::lde_values(&polynomials, rate_bits, blinding) ); - Self::new_from_data(polynomials, lde_values, degree, rate_bits, blinding) + Self::new_from_data(polynomials, lde_values, degree, rate_bits, blinding, timing) } /// Creates a list polynomial commitment for the polynomials `polynomials`. - pub fn new_from_polys( + pub(crate) fn new_from_polys( polynomials: Vec>, rate_bits: usize, blinding: bool, + timing: &mut TimingTree, ) -> Self { let degree = polynomials[0].len(); let lde_values = timed!( - Self::lde_values(&polynomials, rate_bits, blinding), - "to compute LDE" + timing, + "compute LDE", + Self::lde_values(&polynomials, rate_bits, blinding) ); - Self::new_from_data(polynomials, lde_values, degree, rate_bits, blinding) + Self::new_from_data(polynomials, lde_values, degree, rate_bits, blinding, timing) } fn new_from_data( @@ -67,12 +76,13 @@ impl PolynomialBatchCommitment { degree: usize, rate_bits: usize, blinding: bool, + timing: &mut TimingTree, ) -> Self { // TODO: Could try parallelizing the transpose, or not doing it explicitly, instead having // MerkleTree do it implicitly. - let mut leaves = timed!(transpose(&lde_values), "to transpose LDEs"); + let mut leaves = timed!(timing, "transpose LDEs", transpose(&lde_values)); reverse_index_bits_in_place(&mut leaves); - let merkle_tree = timed!(MerkleTree::new(leaves, false), "to build Merkle tree"); + let merkle_tree = timed!(timing, "build Merkle tree", MerkleTree::new(leaves, false)); Self { polynomials, @@ -115,11 +125,12 @@ impl PolynomialBatchCommitment { /// Takes the commitments to the constants - sigmas - wires - zs - quotient — polynomials, /// and an opening point `zeta` and produces a batched opening proof + opening set. - pub fn open_plonk( + pub(crate) fn open_plonk( commitments: &[&Self; 4], zeta: F::Extension, challenger: &mut Challenger, common_data: &CommonCircuitData, + timing: &mut TimingTree, ) -> (OpeningProof, OpeningSet) where F: Extendable, @@ -195,6 +206,7 @@ impl PolynomialBatchCommitment { lde_final_values, challenger, &config.fri_config, + timing, ); ( @@ -366,6 +378,7 @@ mod tests { gen_random_test_case(ks[i], degree_log), common_data.config.rate_bits, PlonkPolynomials::polynomials(i).blinding, + &mut TimingTree::default(), ) }) .collect::>(); @@ -376,6 +389,7 @@ mod tests { zeta, &mut Challenger::new(), &common_data, + &mut TimingTree::default(), ); proof.verify( diff --git a/src/fri/prover.rs b/src/fri/prover.rs index 04c29205..2170b377 100644 --- a/src/fri/prover.rs +++ b/src/fri/prover.rs @@ -12,6 +12,7 @@ use crate::plonk::plonk_common::reduce_with_powers; use crate::polynomial::polynomial::{PolynomialCoeffs, PolynomialValues}; use crate::timed; use crate::util::reverse_index_bits_in_place; +use crate::util::timing::TimingTree; /// Builds a FRI proof. pub fn fri_proof, const D: usize>( @@ -22,6 +23,7 @@ pub fn fri_proof, const D: usize>( lde_polynomial_values: PolynomialValues, challenger: &mut Challenger, config: &FriConfig, + timing: &mut TimingTree, ) -> FriProof { let n = lde_polynomial_values.values.len(); assert_eq!(lde_polynomial_coeffs.coeffs.len(), n); @@ -37,8 +39,9 @@ pub fn fri_proof, const D: usize>( // PoW phase let current_hash = challenger.get_hash(); let pow_witness = timed!( - fri_proof_of_work(current_hash, config), - "to find for proof-of-work witness" + timing, + "find for proof-of-work witness", + fri_proof_of_work(current_hash, config) ); // Query phase diff --git a/src/plonk/circuit_builder.rs b/src/plonk/circuit_builder.rs index 8a0eadc0..e869963e 100644 --- a/src/plonk/circuit_builder.rs +++ b/src/plonk/circuit_builder.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use std::convert::TryInto; use std::time::Instant; -use log::info; +use log::{info, Level}; use crate::field::cosets::get_unique_coset_shifts; use crate::field::extension_field::target::ExtensionTarget; @@ -29,6 +29,7 @@ use crate::polynomial::polynomial::PolynomialValues; use crate::util::context_tree::ContextTree; use crate::util::marking::{Markable, MarkedTargets}; use crate::util::partial_products::num_partial_products; +use crate::util::timing::TimingTree; use crate::util::{log2_ceil, log2_strict, transpose, transpose_poly_values}; pub struct CircuitBuilder, const D: usize> { @@ -504,6 +505,7 @@ impl, const D: usize> CircuitBuilder { /// Builds a "full circuit", with both prover and verifier data. pub fn build(mut self) -> CircuitData { + let mut timing = TimingTree::new("preprocess", Level::Trace); let quotient_degree_factor = 7; // TODO: add this as a parameter. let start = Instant::now(); @@ -557,6 +559,7 @@ impl, const D: usize> CircuitBuilder { constants_sigmas_vecs, self.config.rate_bits, self.config.zero_knowledge & PlonkPolynomials::CONSTANTS_SIGMAS.blinding, + &mut timing, ); let constants_sigmas_root = constants_sigmas_commitment.merkle_tree.root; diff --git a/src/plonk/prover.rs b/src/plonk/prover.rs index 8e233a71..21427a0c 100644 --- a/src/plonk/prover.rs +++ b/src/plonk/prover.rs @@ -1,7 +1,4 @@ -use std::time::Instant; - use anyhow::Result; -use log::info; use rayon::prelude::*; use crate::field::extension_field::Extendable; @@ -20,26 +17,27 @@ use crate::plonk::vars::EvaluationVarsBase; use crate::polynomial::polynomial::{PolynomialCoeffs, PolynomialValues}; use crate::timed; use crate::util::partial_products::partial_products; +use crate::util::timing::TimingTree; use crate::util::{log2_ceil, transpose}; +use log::Level; pub(crate) fn prove, const D: usize>( prover_data: &ProverOnlyCircuitData, common_data: &CommonCircuitData, inputs: PartialWitness, ) -> Result> { + let mut timing = TimingTree::new("prove", Level::Debug); let config = &common_data.config; let num_wires = config.num_wires; let num_challenges = config.num_challenges; let quotient_degree = common_data.quotient_degree(); let degree = common_data.degree(); - let start_proof_gen = Instant::now(); - let mut partial_witness = inputs; - info!("Running {} generators", prover_data.generators.len()); timed!( - generate_partial_witness(&mut partial_witness, &prover_data.generators), - "to generate witness" + timing, + &format!("run {} generators", prover_data.generators.len()), + generate_partial_witness(&mut partial_witness, &prover_data.generators) ); let public_inputs = partial_witness.get_targets(&prover_data.public_inputs); @@ -51,32 +49,37 @@ pub(crate) fn prove, const D: usize>( } timed!( + timing, + "check copy constraints", partial_witness - .check_copy_constraints(&prover_data.copy_constraints, &prover_data.gate_instances)?, - "to check copy constraints" + .check_copy_constraints(&prover_data.copy_constraints, &prover_data.gate_instances)? ); let witness = timed!( - partial_witness.full_witness(degree, num_wires), - "to compute full witness" + timing, + "compute full witness", + partial_witness.full_witness(degree, num_wires) ); let wires_values: Vec> = timed!( + timing, + "compute wire polynomials", witness .wire_values .par_iter() .map(|column| PolynomialValues::new(column.clone())) - .collect(), - "to compute wire polynomials" + .collect() ); let wires_commitment = timed!( + timing, + "compute wires commitment", PolynomialBatchCommitment::new( wires_values, config.rate_bits, - config.zero_knowledge & PlonkPolynomials::WIRES.blinding - ), - "to compute wires commitment" + config.zero_knowledge & PlonkPolynomials::WIRES.blinding, + &mut timing, + ) ); let mut challenger = Challenger::new(); @@ -94,11 +97,16 @@ pub(crate) fn prove, const D: usize>( "When the number of routed wires is smaller that the degree, we should change the logic to avoid computing partial products." ); let mut partial_products = timed!( - all_wires_permutation_partial_products(&witness, &betas, &gammas, prover_data, common_data), - "to compute partial products" + timing, + "compute partial products", + all_wires_permutation_partial_products(&witness, &betas, &gammas, prover_data, common_data) ); - let plonk_z_vecs = timed!(compute_zs(&partial_products, common_data), "to compute Z's"); + let plonk_z_vecs = timed!( + timing, + "compute Z's", + compute_zs(&partial_products, common_data) + ); // The first polynomial in `partial_products` represent the final product used in the // computation of `Z`. It isn't needed anymore so we discard it. @@ -108,12 +116,14 @@ pub(crate) fn prove, const D: usize>( let zs_partial_products = [plonk_z_vecs, partial_products.concat()].concat(); let zs_partial_products_commitment = timed!( + timing, + "commit to Z's", PolynomialBatchCommitment::new( zs_partial_products, config.rate_bits, - config.zero_knowledge & PlonkPolynomials::ZS_PARTIAL_PRODUCTS.blinding - ), - "to commit to Z's" + config.zero_knowledge & PlonkPolynomials::ZS_PARTIAL_PRODUCTS.blinding, + &mut timing, + ) ); challenger.observe_hash(&zs_partial_products_commitment.merkle_tree.root); @@ -121,6 +131,8 @@ pub(crate) fn prove, const D: usize>( let alphas = challenger.get_n_challenges(num_challenges); let quotient_polys = timed!( + timing, + "compute quotient polys", compute_quotient_polys( common_data, prover_data, @@ -130,12 +142,13 @@ pub(crate) fn prove, const D: usize>( &betas, &gammas, &alphas, - ), - "to compute quotient polys" + ) ); // Compute the quotient polynomials, aka `t` in the Plonk paper. let all_quotient_poly_chunks = timed!( + timing, + "split up quotient polys", quotient_polys .into_par_iter() .flat_map(|mut quotient_poly| { @@ -147,17 +160,18 @@ pub(crate) fn prove, const D: usize>( // Split t into degree-n chunks. quotient_poly.chunks(degree) }) - .collect(), - "to split up quotient polys" + .collect() ); let quotient_polys_commitment = timed!( + timing, + "commit to quotient polys", PolynomialBatchCommitment::new_from_polys( all_quotient_poly_chunks, config.rate_bits, - config.zero_knowledge & PlonkPolynomials::QUOTIENT.blinding - ), - "to commit to quotient polys" + config.zero_knowledge & PlonkPolynomials::QUOTIENT.blinding, + &mut timing + ) ); challenger.observe_hash("ient_polys_commitment.merkle_tree.root); @@ -165,6 +179,8 @@ pub(crate) fn prove, const D: usize>( let zeta = challenger.get_extension_challenge(); let (opening_proof, openings) = timed!( + timing, + "compute opening proofs", PolynomialBatchCommitment::open_plonk( &[ &prover_data.constants_sigmas_commitment, @@ -175,14 +191,11 @@ pub(crate) fn prove, const D: usize>( zeta, &mut challenger, common_data, - ), - "to compute opening proofs" + &mut timing, + ) ); - info!( - "{:.3}s for overall witness & proof generation", - start_proof_gen.elapsed().as_secs_f32() - ); + timing.print(); let proof = Proof { wires_root: wires_commitment.merkle_tree.root, diff --git a/src/util/timing.rs b/src/util/timing.rs index 6f27a9d1..cd9ea731 100644 --- a/src/util/timing.rs +++ b/src/util/timing.rs @@ -1,13 +1,150 @@ +use std::time::{Duration, Instant}; + +use log::{log, Level}; + +/// The hierarchy of scopes, and the time consumed by each one. Useful for profiling. +pub struct TimingTree { + /// The name of this scope. + name: String, + /// The level at which to log this scope and its children. + level: log::Level, + /// The time when this scope was created. + enter_time: Instant, + /// The time when this scope was destroyed, or None if it has not yet been destroyed. + exit_time: Option, + /// Any child scopes. + children: Vec, +} + +impl Default for TimingTree { + fn default() -> Self { + TimingTree::new("root", Level::Debug) + } +} + +impl TimingTree { + pub fn new(root_name: &str, level: Level) -> Self { + Self { + name: root_name.to_string(), + level, + enter_time: Instant::now(), + exit_time: None, + children: vec![], + } + } + + /// Whether this scope is still in scope. + fn is_open(&self) -> bool { + self.exit_time.is_none() + } + + /// A description of the stack of currently-open scopes. + pub fn open_stack(&self) -> String { + let mut stack = Vec::new(); + self.open_stack_helper(&mut stack); + stack.join(" > ") + } + + fn open_stack_helper(&self, stack: &mut Vec) { + if self.is_open() { + stack.push(self.name.clone()); + if let Some(last_child) = self.children.last() { + last_child.open_stack_helper(stack); + } + } + } + + pub fn push(&mut self, ctx: &str, mut level: log::Level) { + assert!(self.is_open()); + + // We don't want a scope's log level to be stronger than that of its parent. + level = level.max(self.level); + + if let Some(last_child) = self.children.last_mut() { + if last_child.is_open() { + last_child.push(ctx, level); + return; + } + } + + self.children.push(TimingTree { + name: ctx.to_string(), + level, + enter_time: Instant::now(), + exit_time: None, + children: vec![], + }) + } + + /// Close the deepest open scope from this tree. + pub fn pop(&mut self) { + assert!(self.is_open()); + + if let Some(last_child) = self.children.last_mut() { + if last_child.is_open() { + last_child.pop(); + return; + } + } + + self.exit_time = Some(Instant::now()); + } + + fn duration(&self) -> Duration { + self.exit_time + .unwrap_or(Instant::now()) + .duration_since(self.enter_time) + } + + /// Filter out children with a low duration. + pub fn filter(&self, min_delta: Duration) -> Self { + Self { + name: self.name.clone(), + level: self.level, + enter_time: self.enter_time, + exit_time: self.exit_time, + children: self + .children + .iter() + .filter(|c| c.duration() >= min_delta) + .map(|c| c.filter(min_delta)) + .collect(), + } + } + + pub fn print(&self) { + self.print_helper(0); + } + + fn print_helper(&self, depth: usize) { + let prefix = "| ".repeat(depth); + log!( + self.level, + "{}{:.4}s to {}", + prefix, + self.duration().as_secs_f64(), + self.name + ); + for child in &self.children { + child.print_helper(depth + 1); + } + } +} + +/// Creates a named scope; useful for debugging. #[macro_export] macro_rules! timed { - ($a:expr, $msg:expr) => {{ - use std::time::Instant; - - use log::info; - - let timer = Instant::now(); - let res = $a; - info!("{:.4}s {}", timer.elapsed().as_secs_f32(), $msg); + ($timing_tree:expr, $level:expr, $ctx:expr, $exp:expr) => {{ + $timing_tree.push($ctx, $level); + let res = $exp; + $timing_tree.pop(); + res + }}; + // If no context is specified, default to Debug. + ($timing_tree:expr, $ctx:expr, $exp:expr) => {{ + $timing_tree.push($ctx, log::Level::Debug); + let res = $exp; + $timing_tree.pop(); res }}; }