Da: cache toeplitz1 (#667)

* Added toeplitz1 cache to fk20 * Added toeplitz1 cache benches * Use toeplitz cache in encoder * Use cache in bench * Clippy happy
2025-02-23 21:18:20 +00:00 · 2024-06-26 16:30:38 +02:00 · 2024-06-26 16:30:38 +02:00 · fdc242fa48
commit fdc242fa48
parent c3375b1c48
4 changed files with 125 additions and 29 deletions
--- a/nomos-da/kzgrs-backend/benches/encoder.rs
+++ b/nomos-da/kzgrs-backend/benches/encoder.rs
@ -1,6 +1,7 @@
 use divan::counter::BytesCount;
 use divan::Bencher;
 use kzgrs_backend::encoder::{DaEncoder, DaEncoderParams};
+use once_cell::sync::Lazy;
 use rand::RngCore;
 use std::hint::black_box;

@ -8,8 +9,11 @@ fn main() {
    divan::main()
 }

-const PARAMS: DaEncoderParams = DaEncoderParams::default_with(4096);
-const ENCODER: DaEncoder = DaEncoder::new(PARAMS);
+static ENCODER: Lazy<DaEncoder> = Lazy::new(|| {
+    let params = DaEncoderParams::new(4096, true);
+    DaEncoder::new(params)
+});
+
 const KB: usize = 1024;

 pub fn rand_data(elements_count: usize) -> Vec<u8> {
--- a/nomos-da/kzgrs-backend/src/encoder.rs
+++ b/nomos-da/kzgrs-backend/src/encoder.rs
@ -5,7 +5,7 @@ use std::ops::Div;
 use ark_ff::{BigInteger, PrimeField};
 use ark_poly::EvaluationDomain;
 use kzgrs::common::bytes_to_polynomial_unchecked;
-use kzgrs::fk20::fk20_batch_generate_elements_proofs;
+use kzgrs::fk20::{fk20_batch_generate_elements_proofs, Toeplitz1Cache};
 use kzgrs::{
    bytes_to_polynomial, commit_polynomial, encode, Commitment, Evaluations, KzgRsError,
    Polynomial, PolynomialEvaluationDomain, Proof, BYTES_PER_FIELD_ELEMENT,
@ -17,16 +17,29 @@ use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
 use crate::common::{hash_column_and_commitment, Chunk, ChunksMatrix, Row};
 use crate::global::GLOBAL_PARAMETERS;

-#[derive(Copy, Clone)]
+#[derive(Clone)]
 pub struct DaEncoderParams {
    column_count: usize,
+    toeplitz1cache: Option<Toeplitz1Cache>,
 }

 impl DaEncoderParams {
    pub const MAX_BLS12_381_ENCODING_CHUNK_SIZE: usize = 31;

+    pub fn new(column_count: usize, with_cache: bool) -> Self {
+        let toeplitz1cache =
+            with_cache.then(|| Toeplitz1Cache::with_size(&GLOBAL_PARAMETERS, column_count));
+        Self {
+            column_count,
+            toeplitz1cache,
+        }
+    }
+
    pub const fn default_with(column_count: usize) -> Self {
-        Self { column_count }
+        Self {
+            column_count,
+            toeplitz1cache: None,
+        }
    }
 }

@ -122,7 +135,10 @@ impl DaEncoder {
        .collect()
    }

-    fn compute_rows_proofs(polynomials: &[Polynomial]) -> Result<Vec<Vec<Proof>>, KzgRsError> {
+    fn compute_rows_proofs(
+        polynomials: &[Polynomial],
+        toeplitz1cache: Option<&Toeplitz1Cache>,
+    ) -> Result<Vec<Vec<Proof>>, KzgRsError> {
        Ok({
            #[cfg(not(feature = "parallel"))]
            {
@ -133,7 +149,7 @@ impl DaEncoder {
                polynomials.par_iter()
            }
        }
-        .map(|poly| fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS))
+        .map(|poly| fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS, toeplitz1cache))
        .collect())
    }

@ -167,10 +183,14 @@ impl DaEncoder {
        Ok(((evals, poly), commitment))
    }

-    fn compute_aggregated_column_proofs(polynomial: &Polynomial) -> Result<Vec<Proof>, KzgRsError> {
+    fn compute_aggregated_column_proofs(
+        polynomial: &Polynomial,
+        toeplitz1cache: Option<&Toeplitz1Cache>,
+    ) -> Result<Vec<Proof>, KzgRsError> {
        Ok(fk20_batch_generate_elements_proofs(
            polynomial,
            &GLOBAL_PARAMETERS,
+            toeplitz1cache,
        ))
    }

@ -189,7 +209,7 @@ impl DaEncoder {
        )
    }

-    pub fn encode(&self, data: &[u8]) -> Result<EncodedData, kzgrs::KzgRsError> {
+    pub fn encode(&self, data: &[u8]) -> Result<EncodedData, KzgRsError> {
        let chunked_data = self.chunkify(data);
        let row_domain = PolynomialEvaluationDomain::new(self.params.column_count)
            .expect("Domain should be able to build");
@ -202,7 +222,8 @@ impl DaEncoder {
        let (_, row_polynomials): (Vec<_>, Vec<_>) = row_polynomials.into_iter().unzip();
        let encoded_evaluations = Self::rs_encode_rows(&row_polynomials, row_domain);
        let extended_data = Self::evals_to_chunk_matrix(&encoded_evaluations);
-        let rows_proofs = Self::compute_rows_proofs(&row_polynomials)?;
+        let rows_proofs =
+            Self::compute_rows_proofs(&row_polynomials, self.params.toeplitz1cache.as_ref())?;
        let (_column_polynomials, column_commitments): (Vec<_>, Vec<_>) =
            Self::compute_kzg_column_commitments(&extended_data, column_domain)?
                .into_iter()
@ -213,8 +234,10 @@ impl DaEncoder {
                &column_commitments,
                row_domain,
            )?;
-        let aggregated_column_proofs =
-            Self::compute_aggregated_column_proofs(&aggregated_polynomial)?;
+        let aggregated_column_proofs = Self::compute_aggregated_column_proofs(
+            &aggregated_polynomial,
+            self.params.toeplitz1cache.as_ref(),
+        )?;
        Ok(EncodedData {
            data: data.to_vec(),
            chunked_data,
@ -258,7 +281,7 @@ pub mod test {
        let params = DaEncoderParams::default_with(2);
        let elements = 10usize;
        let data = rand_data(elements);
-        let encoder = DaEncoder::new(params);
+        let encoder = DaEncoder::new(params.clone());
        let matrix = encoder.chunkify(&data);
        assert_eq!(matrix.len(), elements.div(params.column_count.div(2)));
        for row in matrix.rows() {
@ -343,7 +366,7 @@ pub mod test {
        let (_evals, polynomials): (Vec<_>, Vec<_>) = poly_data.into_iter().unzip();
        let extended_evaluations = DaEncoder::rs_encode_rows(&polynomials, domain);
        let extended_matrix = DaEncoder::evals_to_chunk_matrix(&extended_evaluations);
-        let proofs = DaEncoder::compute_rows_proofs(&polynomials).unwrap();
+        let proofs = DaEncoder::compute_rows_proofs(&polynomials, None).unwrap();

        let checks = izip!(matrix.iter(), &commitments, &proofs);
        for (row, commitment, proofs) in checks {
@ -412,7 +435,7 @@ pub mod test {
                .unzip();
        let ((_evals, polynomial), _aggregated_commitment) =
            DaEncoder::compute_aggregated_column_commitment(&matrix, &commitments, domain).unwrap();
-        DaEncoder::compute_aggregated_column_proofs(&polynomial).unwrap();
+        DaEncoder::compute_aggregated_column_proofs(&polynomial, None).unwrap();
    }

    #[test]
--- a/nomos-da/kzgrs/benches/fk20.rs
+++ b/nomos-da/kzgrs/benches/fk20.rs
@ -6,7 +6,7 @@ use ark_poly::{EvaluationDomain, GeneralEvaluationDomain};
 use ark_poly_commit::kzg10::KZG10;
 use divan::counter::ItemsCount;
 use divan::Bencher;
-use kzgrs::fk20::fk20_batch_generate_elements_proofs;
+use kzgrs::fk20::{fk20_batch_generate_elements_proofs, Toeplitz1Cache};
 use kzgrs::{bytes_to_polynomial, GlobalParameters, BYTES_PER_FIELD_ELEMENT};
 use once_cell::sync::Lazy;
 use rand::SeedableRng;
@ -23,7 +23,7 @@ static GLOBAL_PARAMETERS: Lazy<GlobalParameters> = Lazy::new(|| {
    KZG10::<Bls12_381, DensePolynomial<Fr>>::setup(4096, true, &mut rng).unwrap()
 });

-#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])]
+#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
 fn compute_fk20_proofs_for_size(bencher: Bencher, size: usize) {
    bencher
        .with_inputs(|| {
@ -40,12 +40,13 @@ fn compute_fk20_proofs_for_size(bencher: Bencher, size: usize) {
            black_box(fk20_batch_generate_elements_proofs(
                poly,
                &GLOBAL_PARAMETERS,
+                None,
            ))
        });
 }

 #[cfg(feature = "parallel")]
-#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])]
+#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
 fn compute_parallel_fk20_proofs_for_size(bencher: Bencher, size: usize) {
    let thread_count: usize = rayon::max_num_threads().min(rayon::current_num_threads());
    bencher
@ -59,9 +60,55 @@ fn compute_parallel_fk20_proofs_for_size(bencher: Bencher, size: usize) {
            poly
        })
        .input_counter(move |_| ItemsCount::new(size * thread_count))
-        .bench_refs(|(poly)| {
+        .bench_refs(|poly| {
            black_box((0..thread_count).into_par_iter().for_each(|_| {
-                fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS);
+                fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS, None);
+            }))
+        });
+}
+
+#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
+fn compute_fk20_proofs_for_size_with_cache(bencher: Bencher, size: usize) {
+    bencher
+        .with_inputs(|| {
+            let buff: Vec<_> = (0..BYTES_PER_FIELD_ELEMENT * size)
+                .map(|i| (i % 255) as u8)
+                .rev()
+                .collect();
+            let domain = GeneralEvaluationDomain::new(size).unwrap();
+            let (_, poly) = bytes_to_polynomial::<BYTES_PER_FIELD_ELEMENT>(&buff, domain).unwrap();
+            let cache = Toeplitz1Cache::with_size(&GLOBAL_PARAMETERS, size);
+            (poly, cache)
+        })
+        .input_counter(move |_| ItemsCount::new(size))
+        .bench_refs(|(poly, cache)| {
+            black_box(fk20_batch_generate_elements_proofs(
+                &poly,
+                &GLOBAL_PARAMETERS,
+                Some(cache),
+            ))
+        });
+}
+
+#[cfg(feature = "parallel")]
+#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096], sample_count = 10, sample_size = 10)]
+fn compute_parallel_fk20_proofs_for_size_with_cache(bencher: Bencher, size: usize) {
+    let thread_count: usize = rayon::max_num_threads().min(rayon::current_num_threads());
+    bencher
+        .with_inputs(|| {
+            let buff: Vec<_> = (0..BYTES_PER_FIELD_ELEMENT * size)
+                .map(|i| (i % 255) as u8)
+                .rev()
+                .collect();
+            let domain = GeneralEvaluationDomain::new(size).unwrap();
+            let (_, poly) = bytes_to_polynomial::<BYTES_PER_FIELD_ELEMENT>(&buff, domain).unwrap();
+            let cache = Toeplitz1Cache::with_size(&GLOBAL_PARAMETERS, size);
+            (poly, cache)
+        })
+        .input_counter(move |_| ItemsCount::new(size * thread_count))
+        .bench_refs(|(poly, cache)| {
+            black_box((0..thread_count).into_par_iter().for_each(|_| {
+                fk20_batch_generate_elements_proofs(&poly, &GLOBAL_PARAMETERS, Some(cache));
            }))
        });
 }
--- a/nomos-da/kzgrs/src/fk20.rs
+++ b/nomos-da/kzgrs/src/fk20.rs
@ -4,6 +4,7 @@ use ark_ec::CurveGroup;
 use ark_ff::Field;
 use ark_poly::{EvaluationDomain, GeneralEvaluationDomain};
 use num_traits::Zero;
+use std::borrow::Cow;

 fn toeplitz1(global_parameters: &[G1Affine], polynomial_degree: usize) -> Vec<G1Projective> {
    debug_assert_eq!(global_parameters.len(), polynomial_degree);
@ -41,21 +42,26 @@ fn toeplitz3(h_extended_fft: &[G1Projective]) -> Vec<G1Projective> {
 pub fn fk20_batch_generate_elements_proofs(
    polynomial: &Polynomial,
    global_parameters: &GlobalParameters,
+    toeplitz1_cache: Option<&Toeplitz1Cache>,
 ) -> Vec<Proof> {
    let polynomial_degree = polynomial.len();
    debug_assert!(polynomial_degree <= global_parameters.powers_of_g.len());
    debug_assert!(polynomial_degree.is_power_of_two());
    let domain: GeneralEvaluationDomain<Fr> =
        GeneralEvaluationDomain::new(polynomial_degree).expect("Domain should be able to build");
-    let global_parameters: Vec<G1Affine> = global_parameters
-        .powers_of_g
-        .iter()
-        .copied()
-        .take(polynomial_degree)
-        .rev()
-        .collect();

-    let extended_vector = toeplitz1(&global_parameters, polynomial_degree);
+    let extended_vector = if let Some(Toeplitz1Cache(v)) = toeplitz1_cache {
+        Cow::Borrowed(v)
+    } else {
+        let global_parameters: Vec<G1Affine> = global_parameters
+            .powers_of_g
+            .iter()
+            .copied()
+            .take(polynomial_degree)
+            .rev()
+            .collect();
+        Cow::Owned(toeplitz1(&global_parameters, polynomial_degree))
+    };
    let toeplitz_coefficients: Vec<Fr> = std::iter::repeat(Fr::ZERO)
        .take(polynomial_degree)
        .chain(polynomial.coeffs.iter().copied())
@ -72,6 +78,22 @@ pub fn fk20_batch_generate_elements_proofs(
        .collect()
 }

+#[derive(Clone)]
+pub struct Toeplitz1Cache(Vec<G1Projective>);
+
+impl Toeplitz1Cache {
+    pub fn with_size(global_parameters: &GlobalParameters, polynomial_degree: usize) -> Self {
+        let global_parameters: Vec<G1Affine> = global_parameters
+            .powers_of_g
+            .iter()
+            .copied()
+            .take(polynomial_degree)
+            .rev()
+            .collect();
+        Self(toeplitz1(&global_parameters, polynomial_degree))
+    }
+}
+
 #[cfg(test)]
 mod test {
    use crate::fk20::fk20_batch_generate_elements_proofs;
@ -107,7 +129,7 @@ mod test {
                    generate_element_proof(i, &poly, &evals, &GLOBAL_PARAMETERS, domain).unwrap()
                })
                .collect();
-            let fk20_proofs = fk20_batch_generate_elements_proofs(&poly, &GLOBAL_PARAMETERS);
+            let fk20_proofs = fk20_batch_generate_elements_proofs(&poly, &GLOBAL_PARAMETERS, None);
            assert_eq!(slow_proofs, fk20_proofs);
        }
    }