Da: cache toeplitz1 (#667)

* Added toeplitz1 cache to fk20

* Added toeplitz1 cache benches

* Use toeplitz cache in encoder

* Use cache in bench

* Clippy happy
This commit is contained in:
Daniel Sanchez 2024-06-26 16:30:38 +02:00 committed by GitHub
parent c3375b1c48
commit fdc242fa48
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 125 additions and 29 deletions

View File

@ -1,6 +1,7 @@
use divan::counter::BytesCount;
use divan::Bencher;
use kzgrs_backend::encoder::{DaEncoder, DaEncoderParams};
use once_cell::sync::Lazy;
use rand::RngCore;
use std::hint::black_box;
@ -8,8 +9,11 @@ fn main() {
divan::main()
}
const PARAMS: DaEncoderParams = DaEncoderParams::default_with(4096);
const ENCODER: DaEncoder = DaEncoder::new(PARAMS);
static ENCODER: Lazy<DaEncoder> = Lazy::new(|| {
let params = DaEncoderParams::new(4096, true);
DaEncoder::new(params)
});
const KB: usize = 1024;
pub fn rand_data(elements_count: usize) -> Vec<u8> {

View File

@ -5,7 +5,7 @@ use std::ops::Div;
use ark_ff::{BigInteger, PrimeField};
use ark_poly::EvaluationDomain;
use kzgrs::common::bytes_to_polynomial_unchecked;
use kzgrs::fk20::fk20_batch_generate_elements_proofs;
use kzgrs::fk20::{fk20_batch_generate_elements_proofs, Toeplitz1Cache};
use kzgrs::{
bytes_to_polynomial, commit_polynomial, encode, Commitment, Evaluations, KzgRsError,
Polynomial, PolynomialEvaluationDomain, Proof, BYTES_PER_FIELD_ELEMENT,
@ -17,16 +17,29 @@ use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use crate::common::{hash_column_and_commitment, Chunk, ChunksMatrix, Row};
use crate::global::GLOBAL_PARAMETERS;
#[derive(Copy, Clone)]
#[derive(Clone)]
pub struct DaEncoderParams {
column_count: usize,
toeplitz1cache: Option<Toeplitz1Cache>,
}
impl DaEncoderParams {
pub const MAX_BLS12_381_ENCODING_CHUNK_SIZE: usize = 31;
pub fn new(column_count: usize, with_cache: bool) -> Self {
let toeplitz1cache =
with_cache.then(|| Toeplitz1Cache::with_size(&GLOBAL_PARAMETERS, column_count));
Self {
column_count,
toeplitz1cache,
}
}
pub const fn default_with(column_count: usize) -> Self {
Self { column_count }
Self {
column_count,
toeplitz1cache: None,
}
}
}
@ -122,7 +135,10 @@ impl DaEncoder {
.collect()
}
fn compute_rows_proofs(polynomials: &[Polynomial]) -> Result<Vec<Vec<Proof>>, KzgRsError> {
fn compute_rows_proofs(
polynomials: &[Polynomial],
toeplitz1cache: Option<&Toeplitz1Cache>,
) -> Result<Vec<Vec<Proof>>, KzgRsError> {
Ok({
#[cfg(not(feature = "parallel"))]
{
@ -133,7 +149,7 @@ impl DaEncoder {
polynomials.par_iter()
}
}
.map(|poly| fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS))
.map(|poly| fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS, toeplitz1cache))
.collect())
}
@ -167,10 +183,14 @@ impl DaEncoder {
Ok(((evals, poly), commitment))
}
fn compute_aggregated_column_proofs(polynomial: &Polynomial) -> Result<Vec<Proof>, KzgRsError> {
fn compute_aggregated_column_proofs(
polynomial: &Polynomial,
toeplitz1cache: Option<&Toeplitz1Cache>,
) -> Result<Vec<Proof>, KzgRsError> {
Ok(fk20_batch_generate_elements_proofs(
polynomial,
&GLOBAL_PARAMETERS,
toeplitz1cache,
))
}
@ -189,7 +209,7 @@ impl DaEncoder {
)
}
pub fn encode(&self, data: &[u8]) -> Result<EncodedData, kzgrs::KzgRsError> {
pub fn encode(&self, data: &[u8]) -> Result<EncodedData, KzgRsError> {
let chunked_data = self.chunkify(data);
let row_domain = PolynomialEvaluationDomain::new(self.params.column_count)
.expect("Domain should be able to build");
@ -202,7 +222,8 @@ impl DaEncoder {
let (_, row_polynomials): (Vec<_>, Vec<_>) = row_polynomials.into_iter().unzip();
let encoded_evaluations = Self::rs_encode_rows(&row_polynomials, row_domain);
let extended_data = Self::evals_to_chunk_matrix(&encoded_evaluations);
let rows_proofs = Self::compute_rows_proofs(&row_polynomials)?;
let rows_proofs =
Self::compute_rows_proofs(&row_polynomials, self.params.toeplitz1cache.as_ref())?;
let (_column_polynomials, column_commitments): (Vec<_>, Vec<_>) =
Self::compute_kzg_column_commitments(&extended_data, column_domain)?
.into_iter()
@ -213,8 +234,10 @@ impl DaEncoder {
&column_commitments,
row_domain,
)?;
let aggregated_column_proofs =
Self::compute_aggregated_column_proofs(&aggregated_polynomial)?;
let aggregated_column_proofs = Self::compute_aggregated_column_proofs(
&aggregated_polynomial,
self.params.toeplitz1cache.as_ref(),
)?;
Ok(EncodedData {
data: data.to_vec(),
chunked_data,
@ -258,7 +281,7 @@ pub mod test {
let params = DaEncoderParams::default_with(2);
let elements = 10usize;
let data = rand_data(elements);
let encoder = DaEncoder::new(params);
let encoder = DaEncoder::new(params.clone());
let matrix = encoder.chunkify(&data);
assert_eq!(matrix.len(), elements.div(params.column_count.div(2)));
for row in matrix.rows() {
@ -343,7 +366,7 @@ pub mod test {
let (_evals, polynomials): (Vec<_>, Vec<_>) = poly_data.into_iter().unzip();
let extended_evaluations = DaEncoder::rs_encode_rows(&polynomials, domain);
let extended_matrix = DaEncoder::evals_to_chunk_matrix(&extended_evaluations);
let proofs = DaEncoder::compute_rows_proofs(&polynomials).unwrap();
let proofs = DaEncoder::compute_rows_proofs(&polynomials, None).unwrap();
let checks = izip!(matrix.iter(), &commitments, &proofs);
for (row, commitment, proofs) in checks {
@ -412,7 +435,7 @@ pub mod test {
.unzip();
let ((_evals, polynomial), _aggregated_commitment) =
DaEncoder::compute_aggregated_column_commitment(&matrix, &commitments, domain).unwrap();
DaEncoder::compute_aggregated_column_proofs(&polynomial).unwrap();
DaEncoder::compute_aggregated_column_proofs(&polynomial, None).unwrap();
}
#[test]

View File

@ -6,7 +6,7 @@ use ark_poly::{EvaluationDomain, GeneralEvaluationDomain};
use ark_poly_commit::kzg10::KZG10;
use divan::counter::ItemsCount;
use divan::Bencher;
use kzgrs::fk20::fk20_batch_generate_elements_proofs;
use kzgrs::fk20::{fk20_batch_generate_elements_proofs, Toeplitz1Cache};
use kzgrs::{bytes_to_polynomial, GlobalParameters, BYTES_PER_FIELD_ELEMENT};
use once_cell::sync::Lazy;
use rand::SeedableRng;
@ -23,7 +23,7 @@ static GLOBAL_PARAMETERS: Lazy<GlobalParameters> = Lazy::new(|| {
KZG10::<Bls12_381, DensePolynomial<Fr>>::setup(4096, true, &mut rng).unwrap()
});
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])]
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
fn compute_fk20_proofs_for_size(bencher: Bencher, size: usize) {
bencher
.with_inputs(|| {
@ -40,12 +40,13 @@ fn compute_fk20_proofs_for_size(bencher: Bencher, size: usize) {
black_box(fk20_batch_generate_elements_proofs(
poly,
&GLOBAL_PARAMETERS,
None,
))
});
}
#[cfg(feature = "parallel")]
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])]
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
fn compute_parallel_fk20_proofs_for_size(bencher: Bencher, size: usize) {
let thread_count: usize = rayon::max_num_threads().min(rayon::current_num_threads());
bencher
@ -59,9 +60,55 @@ fn compute_parallel_fk20_proofs_for_size(bencher: Bencher, size: usize) {
poly
})
.input_counter(move |_| ItemsCount::new(size * thread_count))
.bench_refs(|(poly)| {
.bench_refs(|poly| {
black_box((0..thread_count).into_par_iter().for_each(|_| {
fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS);
fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS, None);
}))
});
}
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
fn compute_fk20_proofs_for_size_with_cache(bencher: Bencher, size: usize) {
bencher
.with_inputs(|| {
let buff: Vec<_> = (0..BYTES_PER_FIELD_ELEMENT * size)
.map(|i| (i % 255) as u8)
.rev()
.collect();
let domain = GeneralEvaluationDomain::new(size).unwrap();
let (_, poly) = bytes_to_polynomial::<BYTES_PER_FIELD_ELEMENT>(&buff, domain).unwrap();
let cache = Toeplitz1Cache::with_size(&GLOBAL_PARAMETERS, size);
(poly, cache)
})
.input_counter(move |_| ItemsCount::new(size))
.bench_refs(|(poly, cache)| {
black_box(fk20_batch_generate_elements_proofs(
&poly,
&GLOBAL_PARAMETERS,
Some(cache),
))
});
}
#[cfg(feature = "parallel")]
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
fn compute_parallel_fk20_proofs_for_size_with_cache(bencher: Bencher, size: usize) {
let thread_count: usize = rayon::max_num_threads().min(rayon::current_num_threads());
bencher
.with_inputs(|| {
let buff: Vec<_> = (0..BYTES_PER_FIELD_ELEMENT * size)
.map(|i| (i % 255) as u8)
.rev()
.collect();
let domain = GeneralEvaluationDomain::new(size).unwrap();
let (_, poly) = bytes_to_polynomial::<BYTES_PER_FIELD_ELEMENT>(&buff, domain).unwrap();
let cache = Toeplitz1Cache::with_size(&GLOBAL_PARAMETERS, size);
(poly, cache)
})
.input_counter(move |_| ItemsCount::new(size * thread_count))
.bench_refs(|(poly, cache)| {
black_box((0..thread_count).into_par_iter().for_each(|_| {
fk20_batch_generate_elements_proofs(&poly, &GLOBAL_PARAMETERS, Some(cache));
}))
});
}

View File

@ -4,6 +4,7 @@ use ark_ec::CurveGroup;
use ark_ff::Field;
use ark_poly::{EvaluationDomain, GeneralEvaluationDomain};
use num_traits::Zero;
use std::borrow::Cow;
fn toeplitz1(global_parameters: &[G1Affine], polynomial_degree: usize) -> Vec<G1Projective> {
debug_assert_eq!(global_parameters.len(), polynomial_degree);
@ -41,21 +42,26 @@ fn toeplitz3(h_extended_fft: &[G1Projective]) -> Vec<G1Projective> {
pub fn fk20_batch_generate_elements_proofs(
polynomial: &Polynomial,
global_parameters: &GlobalParameters,
toeplitz1_cache: Option<&Toeplitz1Cache>,
) -> Vec<Proof> {
let polynomial_degree = polynomial.len();
debug_assert!(polynomial_degree <= global_parameters.powers_of_g.len());
debug_assert!(polynomial_degree.is_power_of_two());
let domain: GeneralEvaluationDomain<Fr> =
GeneralEvaluationDomain::new(polynomial_degree).expect("Domain should be able to build");
let global_parameters: Vec<G1Affine> = global_parameters
.powers_of_g
.iter()
.copied()
.take(polynomial_degree)
.rev()
.collect();
let extended_vector = toeplitz1(&global_parameters, polynomial_degree);
let extended_vector = if let Some(Toeplitz1Cache(v)) = toeplitz1_cache {
Cow::Borrowed(v)
} else {
let global_parameters: Vec<G1Affine> = global_parameters
.powers_of_g
.iter()
.copied()
.take(polynomial_degree)
.rev()
.collect();
Cow::Owned(toeplitz1(&global_parameters, polynomial_degree))
};
let toeplitz_coefficients: Vec<Fr> = std::iter::repeat(Fr::ZERO)
.take(polynomial_degree)
.chain(polynomial.coeffs.iter().copied())
@ -72,6 +78,22 @@ pub fn fk20_batch_generate_elements_proofs(
.collect()
}
#[derive(Clone)]
pub struct Toeplitz1Cache(Vec<G1Projective>);
impl Toeplitz1Cache {
pub fn with_size(global_parameters: &GlobalParameters, polynomial_degree: usize) -> Self {
let global_parameters: Vec<G1Affine> = global_parameters
.powers_of_g
.iter()
.copied()
.take(polynomial_degree)
.rev()
.collect();
Self(toeplitz1(&global_parameters, polynomial_degree))
}
}
#[cfg(test)]
mod test {
use crate::fk20::fk20_batch_generate_elements_proofs;
@ -107,7 +129,7 @@ mod test {
generate_element_proof(i, &poly, &evals, &GLOBAL_PARAMETERS, domain).unwrap()
})
.collect();
let fk20_proofs = fk20_batch_generate_elements_proofs(&poly, &GLOBAL_PARAMETERS);
let fk20_proofs = fk20_batch_generate_elements_proofs(&poly, &GLOBAL_PARAMETERS, None);
assert_eq!(slow_proofs, fk20_proofs);
}
}