Da: cache toeplitz1 (#667)

* Added toeplitz1 cache to fk20

* Added toeplitz1 cache benches

* Use toeplitz cache in encoder

* Use cache in bench

* Clippy happy
This commit is contained in:
Daniel Sanchez 2024-06-26 16:30:38 +02:00 committed by GitHub
parent c3375b1c48
commit fdc242fa48
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 125 additions and 29 deletions

View File

@ -1,6 +1,7 @@
use divan::counter::BytesCount; use divan::counter::BytesCount;
use divan::Bencher; use divan::Bencher;
use kzgrs_backend::encoder::{DaEncoder, DaEncoderParams}; use kzgrs_backend::encoder::{DaEncoder, DaEncoderParams};
use once_cell::sync::Lazy;
use rand::RngCore; use rand::RngCore;
use std::hint::black_box; use std::hint::black_box;
@ -8,8 +9,11 @@ fn main() {
divan::main() divan::main()
} }
const PARAMS: DaEncoderParams = DaEncoderParams::default_with(4096); static ENCODER: Lazy<DaEncoder> = Lazy::new(|| {
const ENCODER: DaEncoder = DaEncoder::new(PARAMS); let params = DaEncoderParams::new(4096, true);
DaEncoder::new(params)
});
const KB: usize = 1024; const KB: usize = 1024;
pub fn rand_data(elements_count: usize) -> Vec<u8> { pub fn rand_data(elements_count: usize) -> Vec<u8> {

View File

@ -5,7 +5,7 @@ use std::ops::Div;
use ark_ff::{BigInteger, PrimeField}; use ark_ff::{BigInteger, PrimeField};
use ark_poly::EvaluationDomain; use ark_poly::EvaluationDomain;
use kzgrs::common::bytes_to_polynomial_unchecked; use kzgrs::common::bytes_to_polynomial_unchecked;
use kzgrs::fk20::fk20_batch_generate_elements_proofs; use kzgrs::fk20::{fk20_batch_generate_elements_proofs, Toeplitz1Cache};
use kzgrs::{ use kzgrs::{
bytes_to_polynomial, commit_polynomial, encode, Commitment, Evaluations, KzgRsError, bytes_to_polynomial, commit_polynomial, encode, Commitment, Evaluations, KzgRsError,
Polynomial, PolynomialEvaluationDomain, Proof, BYTES_PER_FIELD_ELEMENT, Polynomial, PolynomialEvaluationDomain, Proof, BYTES_PER_FIELD_ELEMENT,
@ -17,16 +17,29 @@ use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use crate::common::{hash_column_and_commitment, Chunk, ChunksMatrix, Row}; use crate::common::{hash_column_and_commitment, Chunk, ChunksMatrix, Row};
use crate::global::GLOBAL_PARAMETERS; use crate::global::GLOBAL_PARAMETERS;
#[derive(Copy, Clone)] #[derive(Clone)]
pub struct DaEncoderParams { pub struct DaEncoderParams {
column_count: usize, column_count: usize,
toeplitz1cache: Option<Toeplitz1Cache>,
} }
impl DaEncoderParams { impl DaEncoderParams {
pub const MAX_BLS12_381_ENCODING_CHUNK_SIZE: usize = 31; pub const MAX_BLS12_381_ENCODING_CHUNK_SIZE: usize = 31;
pub fn new(column_count: usize, with_cache: bool) -> Self {
let toeplitz1cache =
with_cache.then(|| Toeplitz1Cache::with_size(&GLOBAL_PARAMETERS, column_count));
Self {
column_count,
toeplitz1cache,
}
}
pub const fn default_with(column_count: usize) -> Self { pub const fn default_with(column_count: usize) -> Self {
Self { column_count } Self {
column_count,
toeplitz1cache: None,
}
} }
} }
@ -122,7 +135,10 @@ impl DaEncoder {
.collect() .collect()
} }
fn compute_rows_proofs(polynomials: &[Polynomial]) -> Result<Vec<Vec<Proof>>, KzgRsError> { fn compute_rows_proofs(
polynomials: &[Polynomial],
toeplitz1cache: Option<&Toeplitz1Cache>,
) -> Result<Vec<Vec<Proof>>, KzgRsError> {
Ok({ Ok({
#[cfg(not(feature = "parallel"))] #[cfg(not(feature = "parallel"))]
{ {
@ -133,7 +149,7 @@ impl DaEncoder {
polynomials.par_iter() polynomials.par_iter()
} }
} }
.map(|poly| fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS)) .map(|poly| fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS, toeplitz1cache))
.collect()) .collect())
} }
@ -167,10 +183,14 @@ impl DaEncoder {
Ok(((evals, poly), commitment)) Ok(((evals, poly), commitment))
} }
fn compute_aggregated_column_proofs(polynomial: &Polynomial) -> Result<Vec<Proof>, KzgRsError> { fn compute_aggregated_column_proofs(
polynomial: &Polynomial,
toeplitz1cache: Option<&Toeplitz1Cache>,
) -> Result<Vec<Proof>, KzgRsError> {
Ok(fk20_batch_generate_elements_proofs( Ok(fk20_batch_generate_elements_proofs(
polynomial, polynomial,
&GLOBAL_PARAMETERS, &GLOBAL_PARAMETERS,
toeplitz1cache,
)) ))
} }
@ -189,7 +209,7 @@ impl DaEncoder {
) )
} }
pub fn encode(&self, data: &[u8]) -> Result<EncodedData, kzgrs::KzgRsError> { pub fn encode(&self, data: &[u8]) -> Result<EncodedData, KzgRsError> {
let chunked_data = self.chunkify(data); let chunked_data = self.chunkify(data);
let row_domain = PolynomialEvaluationDomain::new(self.params.column_count) let row_domain = PolynomialEvaluationDomain::new(self.params.column_count)
.expect("Domain should be able to build"); .expect("Domain should be able to build");
@ -202,7 +222,8 @@ impl DaEncoder {
let (_, row_polynomials): (Vec<_>, Vec<_>) = row_polynomials.into_iter().unzip(); let (_, row_polynomials): (Vec<_>, Vec<_>) = row_polynomials.into_iter().unzip();
let encoded_evaluations = Self::rs_encode_rows(&row_polynomials, row_domain); let encoded_evaluations = Self::rs_encode_rows(&row_polynomials, row_domain);
let extended_data = Self::evals_to_chunk_matrix(&encoded_evaluations); let extended_data = Self::evals_to_chunk_matrix(&encoded_evaluations);
let rows_proofs = Self::compute_rows_proofs(&row_polynomials)?; let rows_proofs =
Self::compute_rows_proofs(&row_polynomials, self.params.toeplitz1cache.as_ref())?;
let (_column_polynomials, column_commitments): (Vec<_>, Vec<_>) = let (_column_polynomials, column_commitments): (Vec<_>, Vec<_>) =
Self::compute_kzg_column_commitments(&extended_data, column_domain)? Self::compute_kzg_column_commitments(&extended_data, column_domain)?
.into_iter() .into_iter()
@ -213,8 +234,10 @@ impl DaEncoder {
&column_commitments, &column_commitments,
row_domain, row_domain,
)?; )?;
let aggregated_column_proofs = let aggregated_column_proofs = Self::compute_aggregated_column_proofs(
Self::compute_aggregated_column_proofs(&aggregated_polynomial)?; &aggregated_polynomial,
self.params.toeplitz1cache.as_ref(),
)?;
Ok(EncodedData { Ok(EncodedData {
data: data.to_vec(), data: data.to_vec(),
chunked_data, chunked_data,
@ -258,7 +281,7 @@ pub mod test {
let params = DaEncoderParams::default_with(2); let params = DaEncoderParams::default_with(2);
let elements = 10usize; let elements = 10usize;
let data = rand_data(elements); let data = rand_data(elements);
let encoder = DaEncoder::new(params); let encoder = DaEncoder::new(params.clone());
let matrix = encoder.chunkify(&data); let matrix = encoder.chunkify(&data);
assert_eq!(matrix.len(), elements.div(params.column_count.div(2))); assert_eq!(matrix.len(), elements.div(params.column_count.div(2)));
for row in matrix.rows() { for row in matrix.rows() {
@ -343,7 +366,7 @@ pub mod test {
let (_evals, polynomials): (Vec<_>, Vec<_>) = poly_data.into_iter().unzip(); let (_evals, polynomials): (Vec<_>, Vec<_>) = poly_data.into_iter().unzip();
let extended_evaluations = DaEncoder::rs_encode_rows(&polynomials, domain); let extended_evaluations = DaEncoder::rs_encode_rows(&polynomials, domain);
let extended_matrix = DaEncoder::evals_to_chunk_matrix(&extended_evaluations); let extended_matrix = DaEncoder::evals_to_chunk_matrix(&extended_evaluations);
let proofs = DaEncoder::compute_rows_proofs(&polynomials).unwrap(); let proofs = DaEncoder::compute_rows_proofs(&polynomials, None).unwrap();
let checks = izip!(matrix.iter(), &commitments, &proofs); let checks = izip!(matrix.iter(), &commitments, &proofs);
for (row, commitment, proofs) in checks { for (row, commitment, proofs) in checks {
@ -412,7 +435,7 @@ pub mod test {
.unzip(); .unzip();
let ((_evals, polynomial), _aggregated_commitment) = let ((_evals, polynomial), _aggregated_commitment) =
DaEncoder::compute_aggregated_column_commitment(&matrix, &commitments, domain).unwrap(); DaEncoder::compute_aggregated_column_commitment(&matrix, &commitments, domain).unwrap();
DaEncoder::compute_aggregated_column_proofs(&polynomial).unwrap(); DaEncoder::compute_aggregated_column_proofs(&polynomial, None).unwrap();
} }
#[test] #[test]

View File

@ -6,7 +6,7 @@ use ark_poly::{EvaluationDomain, GeneralEvaluationDomain};
use ark_poly_commit::kzg10::KZG10; use ark_poly_commit::kzg10::KZG10;
use divan::counter::ItemsCount; use divan::counter::ItemsCount;
use divan::Bencher; use divan::Bencher;
use kzgrs::fk20::fk20_batch_generate_elements_proofs; use kzgrs::fk20::{fk20_batch_generate_elements_proofs, Toeplitz1Cache};
use kzgrs::{bytes_to_polynomial, GlobalParameters, BYTES_PER_FIELD_ELEMENT}; use kzgrs::{bytes_to_polynomial, GlobalParameters, BYTES_PER_FIELD_ELEMENT};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use rand::SeedableRng; use rand::SeedableRng;
@ -23,7 +23,7 @@ static GLOBAL_PARAMETERS: Lazy<GlobalParameters> = Lazy::new(|| {
KZG10::<Bls12_381, DensePolynomial<Fr>>::setup(4096, true, &mut rng).unwrap() KZG10::<Bls12_381, DensePolynomial<Fr>>::setup(4096, true, &mut rng).unwrap()
}); });
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])] #[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
fn compute_fk20_proofs_for_size(bencher: Bencher, size: usize) { fn compute_fk20_proofs_for_size(bencher: Bencher, size: usize) {
bencher bencher
.with_inputs(|| { .with_inputs(|| {
@ -40,12 +40,13 @@ fn compute_fk20_proofs_for_size(bencher: Bencher, size: usize) {
black_box(fk20_batch_generate_elements_proofs( black_box(fk20_batch_generate_elements_proofs(
poly, poly,
&GLOBAL_PARAMETERS, &GLOBAL_PARAMETERS,
None,
)) ))
}); });
} }
#[cfg(feature = "parallel")] #[cfg(feature = "parallel")]
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])] #[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
fn compute_parallel_fk20_proofs_for_size(bencher: Bencher, size: usize) { fn compute_parallel_fk20_proofs_for_size(bencher: Bencher, size: usize) {
let thread_count: usize = rayon::max_num_threads().min(rayon::current_num_threads()); let thread_count: usize = rayon::max_num_threads().min(rayon::current_num_threads());
bencher bencher
@ -59,9 +60,55 @@ fn compute_parallel_fk20_proofs_for_size(bencher: Bencher, size: usize) {
poly poly
}) })
.input_counter(move |_| ItemsCount::new(size * thread_count)) .input_counter(move |_| ItemsCount::new(size * thread_count))
.bench_refs(|(poly)| { .bench_refs(|poly| {
black_box((0..thread_count).into_par_iter().for_each(|_| { black_box((0..thread_count).into_par_iter().for_each(|_| {
fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS); fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS, None);
}))
});
}
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
fn compute_fk20_proofs_for_size_with_cache(bencher: Bencher, size: usize) {
bencher
.with_inputs(|| {
let buff: Vec<_> = (0..BYTES_PER_FIELD_ELEMENT * size)
.map(|i| (i % 255) as u8)
.rev()
.collect();
let domain = GeneralEvaluationDomain::new(size).unwrap();
let (_, poly) = bytes_to_polynomial::<BYTES_PER_FIELD_ELEMENT>(&buff, domain).unwrap();
let cache = Toeplitz1Cache::with_size(&GLOBAL_PARAMETERS, size);
(poly, cache)
})
.input_counter(move |_| ItemsCount::new(size))
.bench_refs(|(poly, cache)| {
black_box(fk20_batch_generate_elements_proofs(
&poly,
&GLOBAL_PARAMETERS,
Some(cache),
))
});
}
#[cfg(feature = "parallel")]
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
fn compute_parallel_fk20_proofs_for_size_with_cache(bencher: Bencher, size: usize) {
let thread_count: usize = rayon::max_num_threads().min(rayon::current_num_threads());
bencher
.with_inputs(|| {
let buff: Vec<_> = (0..BYTES_PER_FIELD_ELEMENT * size)
.map(|i| (i % 255) as u8)
.rev()
.collect();
let domain = GeneralEvaluationDomain::new(size).unwrap();
let (_, poly) = bytes_to_polynomial::<BYTES_PER_FIELD_ELEMENT>(&buff, domain).unwrap();
let cache = Toeplitz1Cache::with_size(&GLOBAL_PARAMETERS, size);
(poly, cache)
})
.input_counter(move |_| ItemsCount::new(size * thread_count))
.bench_refs(|(poly, cache)| {
black_box((0..thread_count).into_par_iter().for_each(|_| {
fk20_batch_generate_elements_proofs(&poly, &GLOBAL_PARAMETERS, Some(cache));
})) }))
}); });
} }

View File

@ -4,6 +4,7 @@ use ark_ec::CurveGroup;
use ark_ff::Field; use ark_ff::Field;
use ark_poly::{EvaluationDomain, GeneralEvaluationDomain}; use ark_poly::{EvaluationDomain, GeneralEvaluationDomain};
use num_traits::Zero; use num_traits::Zero;
use std::borrow::Cow;
fn toeplitz1(global_parameters: &[G1Affine], polynomial_degree: usize) -> Vec<G1Projective> { fn toeplitz1(global_parameters: &[G1Affine], polynomial_degree: usize) -> Vec<G1Projective> {
debug_assert_eq!(global_parameters.len(), polynomial_degree); debug_assert_eq!(global_parameters.len(), polynomial_degree);
@ -41,12 +42,17 @@ fn toeplitz3(h_extended_fft: &[G1Projective]) -> Vec<G1Projective> {
pub fn fk20_batch_generate_elements_proofs( pub fn fk20_batch_generate_elements_proofs(
polynomial: &Polynomial, polynomial: &Polynomial,
global_parameters: &GlobalParameters, global_parameters: &GlobalParameters,
toeplitz1_cache: Option<&Toeplitz1Cache>,
) -> Vec<Proof> { ) -> Vec<Proof> {
let polynomial_degree = polynomial.len(); let polynomial_degree = polynomial.len();
debug_assert!(polynomial_degree <= global_parameters.powers_of_g.len()); debug_assert!(polynomial_degree <= global_parameters.powers_of_g.len());
debug_assert!(polynomial_degree.is_power_of_two()); debug_assert!(polynomial_degree.is_power_of_two());
let domain: GeneralEvaluationDomain<Fr> = let domain: GeneralEvaluationDomain<Fr> =
GeneralEvaluationDomain::new(polynomial_degree).expect("Domain should be able to build"); GeneralEvaluationDomain::new(polynomial_degree).expect("Domain should be able to build");
let extended_vector = if let Some(Toeplitz1Cache(v)) = toeplitz1_cache {
Cow::Borrowed(v)
} else {
let global_parameters: Vec<G1Affine> = global_parameters let global_parameters: Vec<G1Affine> = global_parameters
.powers_of_g .powers_of_g
.iter() .iter()
@ -54,8 +60,8 @@ pub fn fk20_batch_generate_elements_proofs(
.take(polynomial_degree) .take(polynomial_degree)
.rev() .rev()
.collect(); .collect();
Cow::Owned(toeplitz1(&global_parameters, polynomial_degree))
let extended_vector = toeplitz1(&global_parameters, polynomial_degree); };
let toeplitz_coefficients: Vec<Fr> = std::iter::repeat(Fr::ZERO) let toeplitz_coefficients: Vec<Fr> = std::iter::repeat(Fr::ZERO)
.take(polynomial_degree) .take(polynomial_degree)
.chain(polynomial.coeffs.iter().copied()) .chain(polynomial.coeffs.iter().copied())
@ -72,6 +78,22 @@ pub fn fk20_batch_generate_elements_proofs(
.collect() .collect()
} }
#[derive(Clone)]
pub struct Toeplitz1Cache(Vec<G1Projective>);
impl Toeplitz1Cache {
pub fn with_size(global_parameters: &GlobalParameters, polynomial_degree: usize) -> Self {
let global_parameters: Vec<G1Affine> = global_parameters
.powers_of_g
.iter()
.copied()
.take(polynomial_degree)
.rev()
.collect();
Self(toeplitz1(&global_parameters, polynomial_degree))
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::fk20::fk20_batch_generate_elements_proofs; use crate::fk20::fk20_batch_generate_elements_proofs;
@ -107,7 +129,7 @@ mod test {
generate_element_proof(i, &poly, &evals, &GLOBAL_PARAMETERS, domain).unwrap() generate_element_proof(i, &poly, &evals, &GLOBAL_PARAMETERS, domain).unwrap()
}) })
.collect(); .collect();
let fk20_proofs = fk20_batch_generate_elements_proofs(&poly, &GLOBAL_PARAMETERS); let fk20_proofs = fk20_batch_generate_elements_proofs(&poly, &GLOBAL_PARAMETERS, None);
assert_eq!(slow_proofs, fk20_proofs); assert_eq!(slow_proofs, fk20_proofs);
} }
} }