Add a tree of scopes for proving times (#141)

* Add a tree of scopes for proving times

To replace the old `timed!` macro, which had no hierarchy.

It's similar to `ContextTree`, which tracks gate counts of circuit operations.

This gives a more organized output, with indentation levels based on scope depth, parent durations listed before child durations, etc.

* PR feedback
This commit is contained in:
Daniel Lubarov 2021-08-02 10:38:09 -07:00 committed by GitHub
parent 08b018fc02
commit 1229d90fae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 228 additions and 58 deletions

View File

@ -17,6 +17,7 @@ use crate::plonk::proof::{OpeningSet, OpeningSetTarget};
use crate::polynomial::polynomial::{PolynomialCoeffs, PolynomialValues};
use crate::timed;
use crate::util::reducing::ReducingFactor;
use crate::util::timing::TimingTree;
use crate::util::{log2_ceil, log2_strict, reverse_bits, reverse_index_bits_in_place, transpose};
use crate::with_context;
@ -35,30 +36,38 @@ pub struct PolynomialBatchCommitment<F: Field> {
impl<F: Field> PolynomialBatchCommitment<F> {
/// Creates a list polynomial commitment for the polynomials interpolating the values in `values`.
pub fn new(values: Vec<PolynomialValues<F>>, rate_bits: usize, blinding: bool) -> Self {
pub(crate) fn new(
values: Vec<PolynomialValues<F>>,
rate_bits: usize,
blinding: bool,
timing: &mut TimingTree,
) -> Self {
let degree = values[0].len();
let polynomials = values.par_iter().map(|v| v.ifft()).collect::<Vec<_>>();
let lde_values = timed!(
Self::lde_values(&polynomials, rate_bits, blinding),
"to compute LDE"
timing,
"compute LDE",
Self::lde_values(&polynomials, rate_bits, blinding)
);
Self::new_from_data(polynomials, lde_values, degree, rate_bits, blinding)
Self::new_from_data(polynomials, lde_values, degree, rate_bits, blinding, timing)
}
/// Creates a list polynomial commitment for the polynomials `polynomials`.
pub fn new_from_polys(
pub(crate) fn new_from_polys(
polynomials: Vec<PolynomialCoeffs<F>>,
rate_bits: usize,
blinding: bool,
timing: &mut TimingTree,
) -> Self {
let degree = polynomials[0].len();
let lde_values = timed!(
Self::lde_values(&polynomials, rate_bits, blinding),
"to compute LDE"
timing,
"compute LDE",
Self::lde_values(&polynomials, rate_bits, blinding)
);
Self::new_from_data(polynomials, lde_values, degree, rate_bits, blinding)
Self::new_from_data(polynomials, lde_values, degree, rate_bits, blinding, timing)
}
fn new_from_data(
@ -67,12 +76,13 @@ impl<F: Field> PolynomialBatchCommitment<F> {
degree: usize,
rate_bits: usize,
blinding: bool,
timing: &mut TimingTree,
) -> Self {
// TODO: Could try parallelizing the transpose, or not doing it explicitly, instead having
// MerkleTree do it implicitly.
let mut leaves = timed!(transpose(&lde_values), "to transpose LDEs");
let mut leaves = timed!(timing, "transpose LDEs", transpose(&lde_values));
reverse_index_bits_in_place(&mut leaves);
let merkle_tree = timed!(MerkleTree::new(leaves, false), "to build Merkle tree");
let merkle_tree = timed!(timing, "build Merkle tree", MerkleTree::new(leaves, false));
Self {
polynomials,
@ -115,11 +125,12 @@ impl<F: Field> PolynomialBatchCommitment<F> {
/// Takes the commitments to the constants - sigmas - wires - zs - quotient — polynomials,
/// and an opening point `zeta` and produces a batched opening proof + opening set.
pub fn open_plonk<const D: usize>(
pub(crate) fn open_plonk<const D: usize>(
commitments: &[&Self; 4],
zeta: F::Extension,
challenger: &mut Challenger<F>,
common_data: &CommonCircuitData<F, D>,
timing: &mut TimingTree,
) -> (OpeningProof<F, D>, OpeningSet<F, D>)
where
F: Extendable<D>,
@ -195,6 +206,7 @@ impl<F: Field> PolynomialBatchCommitment<F> {
lde_final_values,
challenger,
&config.fri_config,
timing,
);
(
@ -366,6 +378,7 @@ mod tests {
gen_random_test_case(ks[i], degree_log),
common_data.config.rate_bits,
PlonkPolynomials::polynomials(i).blinding,
&mut TimingTree::default(),
)
})
.collect::<Vec<_>>();
@ -376,6 +389,7 @@ mod tests {
zeta,
&mut Challenger::new(),
&common_data,
&mut TimingTree::default(),
);
proof.verify(

View File

@ -12,6 +12,7 @@ use crate::plonk::plonk_common::reduce_with_powers;
use crate::polynomial::polynomial::{PolynomialCoeffs, PolynomialValues};
use crate::timed;
use crate::util::reverse_index_bits_in_place;
use crate::util::timing::TimingTree;
/// Builds a FRI proof.
pub fn fri_proof<F: Field + Extendable<D>, const D: usize>(
@ -22,6 +23,7 @@ pub fn fri_proof<F: Field + Extendable<D>, const D: usize>(
lde_polynomial_values: PolynomialValues<F::Extension>,
challenger: &mut Challenger<F>,
config: &FriConfig,
timing: &mut TimingTree,
) -> FriProof<F, D> {
let n = lde_polynomial_values.values.len();
assert_eq!(lde_polynomial_coeffs.coeffs.len(), n);
@ -37,8 +39,9 @@ pub fn fri_proof<F: Field + Extendable<D>, const D: usize>(
// PoW phase
let current_hash = challenger.get_hash();
let pow_witness = timed!(
fri_proof_of_work(current_hash, config),
"to find for proof-of-work witness"
timing,
"find for proof-of-work witness",
fri_proof_of_work(current_hash, config)
);
// Query phase

View File

@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet};
use std::convert::TryInto;
use std::time::Instant;
use log::info;
use log::{info, Level};
use crate::field::cosets::get_unique_coset_shifts;
use crate::field::extension_field::target::ExtensionTarget;
@ -29,6 +29,7 @@ use crate::polynomial::polynomial::PolynomialValues;
use crate::util::context_tree::ContextTree;
use crate::util::marking::{Markable, MarkedTargets};
use crate::util::partial_products::num_partial_products;
use crate::util::timing::TimingTree;
use crate::util::{log2_ceil, log2_strict, transpose, transpose_poly_values};
pub struct CircuitBuilder<F: Extendable<D>, const D: usize> {
@ -504,6 +505,7 @@ impl<F: Extendable<D>, const D: usize> CircuitBuilder<F, D> {
/// Builds a "full circuit", with both prover and verifier data.
pub fn build(mut self) -> CircuitData<F, D> {
let mut timing = TimingTree::new("preprocess", Level::Trace);
let quotient_degree_factor = 7; // TODO: add this as a parameter.
let start = Instant::now();
@ -557,6 +559,7 @@ impl<F: Extendable<D>, const D: usize> CircuitBuilder<F, D> {
constants_sigmas_vecs,
self.config.rate_bits,
self.config.zero_knowledge & PlonkPolynomials::CONSTANTS_SIGMAS.blinding,
&mut timing,
);
let constants_sigmas_root = constants_sigmas_commitment.merkle_tree.root;

View File

@ -1,7 +1,4 @@
use std::time::Instant;
use anyhow::Result;
use log::info;
use rayon::prelude::*;
use crate::field::extension_field::Extendable;
@ -20,26 +17,27 @@ use crate::plonk::vars::EvaluationVarsBase;
use crate::polynomial::polynomial::{PolynomialCoeffs, PolynomialValues};
use crate::timed;
use crate::util::partial_products::partial_products;
use crate::util::timing::TimingTree;
use crate::util::{log2_ceil, transpose};
use log::Level;
pub(crate) fn prove<F: Extendable<D>, const D: usize>(
prover_data: &ProverOnlyCircuitData<F, D>,
common_data: &CommonCircuitData<F, D>,
inputs: PartialWitness<F>,
) -> Result<ProofWithPublicInputs<F, D>> {
let mut timing = TimingTree::new("prove", Level::Debug);
let config = &common_data.config;
let num_wires = config.num_wires;
let num_challenges = config.num_challenges;
let quotient_degree = common_data.quotient_degree();
let degree = common_data.degree();
let start_proof_gen = Instant::now();
let mut partial_witness = inputs;
info!("Running {} generators", prover_data.generators.len());
timed!(
generate_partial_witness(&mut partial_witness, &prover_data.generators),
"to generate witness"
timing,
&format!("run {} generators", prover_data.generators.len()),
generate_partial_witness(&mut partial_witness, &prover_data.generators)
);
let public_inputs = partial_witness.get_targets(&prover_data.public_inputs);
@ -51,32 +49,37 @@ pub(crate) fn prove<F: Extendable<D>, const D: usize>(
}
timed!(
timing,
"check copy constraints",
partial_witness
.check_copy_constraints(&prover_data.copy_constraints, &prover_data.gate_instances)?,
"to check copy constraints"
.check_copy_constraints(&prover_data.copy_constraints, &prover_data.gate_instances)?
);
let witness = timed!(
partial_witness.full_witness(degree, num_wires),
"to compute full witness"
timing,
"compute full witness",
partial_witness.full_witness(degree, num_wires)
);
let wires_values: Vec<PolynomialValues<F>> = timed!(
timing,
"compute wire polynomials",
witness
.wire_values
.par_iter()
.map(|column| PolynomialValues::new(column.clone()))
.collect(),
"to compute wire polynomials"
.collect()
);
let wires_commitment = timed!(
timing,
"compute wires commitment",
PolynomialBatchCommitment::new(
wires_values,
config.rate_bits,
config.zero_knowledge & PlonkPolynomials::WIRES.blinding
),
"to compute wires commitment"
config.zero_knowledge & PlonkPolynomials::WIRES.blinding,
&mut timing,
)
);
let mut challenger = Challenger::new();
@ -94,11 +97,16 @@ pub(crate) fn prove<F: Extendable<D>, const D: usize>(
"When the number of routed wires is smaller that the degree, we should change the logic to avoid computing partial products."
);
let mut partial_products = timed!(
all_wires_permutation_partial_products(&witness, &betas, &gammas, prover_data, common_data),
"to compute partial products"
timing,
"compute partial products",
all_wires_permutation_partial_products(&witness, &betas, &gammas, prover_data, common_data)
);
let plonk_z_vecs = timed!(compute_zs(&partial_products, common_data), "to compute Z's");
let plonk_z_vecs = timed!(
timing,
"compute Z's",
compute_zs(&partial_products, common_data)
);
// The first polynomial in `partial_products` represent the final product used in the
// computation of `Z`. It isn't needed anymore so we discard it.
@ -108,12 +116,14 @@ pub(crate) fn prove<F: Extendable<D>, const D: usize>(
let zs_partial_products = [plonk_z_vecs, partial_products.concat()].concat();
let zs_partial_products_commitment = timed!(
timing,
"commit to Z's",
PolynomialBatchCommitment::new(
zs_partial_products,
config.rate_bits,
config.zero_knowledge & PlonkPolynomials::ZS_PARTIAL_PRODUCTS.blinding
),
"to commit to Z's"
config.zero_knowledge & PlonkPolynomials::ZS_PARTIAL_PRODUCTS.blinding,
&mut timing,
)
);
challenger.observe_hash(&zs_partial_products_commitment.merkle_tree.root);
@ -121,6 +131,8 @@ pub(crate) fn prove<F: Extendable<D>, const D: usize>(
let alphas = challenger.get_n_challenges(num_challenges);
let quotient_polys = timed!(
timing,
"compute quotient polys",
compute_quotient_polys(
common_data,
prover_data,
@ -130,12 +142,13 @@ pub(crate) fn prove<F: Extendable<D>, const D: usize>(
&betas,
&gammas,
&alphas,
),
"to compute quotient polys"
)
);
// Compute the quotient polynomials, aka `t` in the Plonk paper.
let all_quotient_poly_chunks = timed!(
timing,
"split up quotient polys",
quotient_polys
.into_par_iter()
.flat_map(|mut quotient_poly| {
@ -147,17 +160,18 @@ pub(crate) fn prove<F: Extendable<D>, const D: usize>(
// Split t into degree-n chunks.
quotient_poly.chunks(degree)
})
.collect(),
"to split up quotient polys"
.collect()
);
let quotient_polys_commitment = timed!(
timing,
"commit to quotient polys",
PolynomialBatchCommitment::new_from_polys(
all_quotient_poly_chunks,
config.rate_bits,
config.zero_knowledge & PlonkPolynomials::QUOTIENT.blinding
),
"to commit to quotient polys"
config.zero_knowledge & PlonkPolynomials::QUOTIENT.blinding,
&mut timing
)
);
challenger.observe_hash(&quotient_polys_commitment.merkle_tree.root);
@ -165,6 +179,8 @@ pub(crate) fn prove<F: Extendable<D>, const D: usize>(
let zeta = challenger.get_extension_challenge();
let (opening_proof, openings) = timed!(
timing,
"compute opening proofs",
PolynomialBatchCommitment::open_plonk(
&[
&prover_data.constants_sigmas_commitment,
@ -175,14 +191,11 @@ pub(crate) fn prove<F: Extendable<D>, const D: usize>(
zeta,
&mut challenger,
common_data,
),
"to compute opening proofs"
&mut timing,
)
);
info!(
"{:.3}s for overall witness & proof generation",
start_proof_gen.elapsed().as_secs_f32()
);
timing.print();
let proof = Proof {
wires_root: wires_commitment.merkle_tree.root,

View File

@ -1,13 +1,150 @@
use std::time::{Duration, Instant};
use log::{log, Level};
/// The hierarchy of scopes, and the time consumed by each one. Useful for profiling.
pub struct TimingTree {
/// The name of this scope.
name: String,
/// The level at which to log this scope and its children.
level: log::Level,
/// The time when this scope was created.
enter_time: Instant,
/// The time when this scope was destroyed, or None if it has not yet been destroyed.
exit_time: Option<Instant>,
/// Any child scopes.
children: Vec<TimingTree>,
}
impl Default for TimingTree {
fn default() -> Self {
TimingTree::new("root", Level::Debug)
}
}
impl TimingTree {
pub fn new(root_name: &str, level: Level) -> Self {
Self {
name: root_name.to_string(),
level,
enter_time: Instant::now(),
exit_time: None,
children: vec![],
}
}
/// Whether this scope is still in scope.
fn is_open(&self) -> bool {
self.exit_time.is_none()
}
/// A description of the stack of currently-open scopes.
pub fn open_stack(&self) -> String {
let mut stack = Vec::new();
self.open_stack_helper(&mut stack);
stack.join(" > ")
}
fn open_stack_helper(&self, stack: &mut Vec<String>) {
if self.is_open() {
stack.push(self.name.clone());
if let Some(last_child) = self.children.last() {
last_child.open_stack_helper(stack);
}
}
}
pub fn push(&mut self, ctx: &str, mut level: log::Level) {
assert!(self.is_open());
// We don't want a scope's log level to be stronger than that of its parent.
level = level.max(self.level);
if let Some(last_child) = self.children.last_mut() {
if last_child.is_open() {
last_child.push(ctx, level);
return;
}
}
self.children.push(TimingTree {
name: ctx.to_string(),
level,
enter_time: Instant::now(),
exit_time: None,
children: vec![],
})
}
/// Close the deepest open scope from this tree.
pub fn pop(&mut self) {
assert!(self.is_open());
if let Some(last_child) = self.children.last_mut() {
if last_child.is_open() {
last_child.pop();
return;
}
}
self.exit_time = Some(Instant::now());
}
fn duration(&self) -> Duration {
self.exit_time
.unwrap_or(Instant::now())
.duration_since(self.enter_time)
}
/// Filter out children with a low duration.
pub fn filter(&self, min_delta: Duration) -> Self {
Self {
name: self.name.clone(),
level: self.level,
enter_time: self.enter_time,
exit_time: self.exit_time,
children: self
.children
.iter()
.filter(|c| c.duration() >= min_delta)
.map(|c| c.filter(min_delta))
.collect(),
}
}
pub fn print(&self) {
self.print_helper(0);
}
fn print_helper(&self, depth: usize) {
let prefix = "| ".repeat(depth);
log!(
self.level,
"{}{:.4}s to {}",
prefix,
self.duration().as_secs_f64(),
self.name
);
for child in &self.children {
child.print_helper(depth + 1);
}
}
}
/// Creates a named scope; useful for debugging.
#[macro_export]
macro_rules! timed {
($a:expr, $msg:expr) => {{
use std::time::Instant;
use log::info;
let timer = Instant::now();
let res = $a;
info!("{:.4}s {}", timer.elapsed().as_secs_f32(), $msg);
($timing_tree:expr, $level:expr, $ctx:expr, $exp:expr) => {{
$timing_tree.push($ctx, $level);
let res = $exp;
$timing_tree.pop();
res
}};
// If no context is specified, default to Debug.
($timing_tree:expr, $ctx:expr, $exp:expr) => {{
$timing_tree.push($ctx, log::Level::Debug);
let res = $exp;
$timing_tree.pop();
res
}};
}