From 9adfbdddc628d1dceb4624d3af39fed777ffc8bb Mon Sep 17 00:00:00 2001 From: Daniel Sanchez Date: Fri, 21 Jun 2024 12:43:17 +0200 Subject: [PATCH] Da: fk20 implementation (#663) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Implement fft_g1 * Fix ifft_g1 Added fft/ifft g1 test * Parallelize i/fft * Add i/fft benches * Fix parallel feature tagging * Fix bench * Fix parallelization * Implement fk20 * Add compute roots of unity method * Fix ranges * Remove innecesary collect in fft_g1 * Use domaın and ark fft * Remove custom fft * Add fk20 benches * Clippy happy * Use threadcount for parallelization --------- Co-authored-by: mgonen --- nomos-da/kzgrs/Cargo.toml | 13 +++- nomos-da/kzgrs/benches/fft.rs | 23 +++++++ nomos-da/kzgrs/benches/fk20.rs | 67 +++++++++++++++++++ nomos-da/kzgrs/benches/kzg.rs | 8 ++- nomos-da/kzgrs/src/common.rs | 6 ++ nomos-da/kzgrs/src/fk20.rs | 114 +++++++++++++++++++++++++++++++++ nomos-da/kzgrs/src/lib.rs | 1 + 7 files changed, 229 insertions(+), 3 deletions(-) create mode 100644 nomos-da/kzgrs/benches/fft.rs create mode 100644 nomos-da/kzgrs/benches/fk20.rs create mode 100644 nomos-da/kzgrs/src/fk20.rs diff --git a/nomos-da/kzgrs/Cargo.toml b/nomos-da/kzgrs/Cargo.toml index ba7dfb07..f4690e6b 100644 --- a/nomos-da/kzgrs/Cargo.toml +++ b/nomos-da/kzgrs/Cargo.toml @@ -15,23 +15,34 @@ ark-poly = { version = "0.4.2" } ark-poly-commit = { version = "0.4.0" } ark-serialize = { version = "0.4" } blst = "0.3.11" +derive_more = "0.99" num-bigint = "0.4.4" thiserror = "1.0.58" num-traits = "0.2.18" rand = "0.8.5" +rayon = { version = "1.10", optional = true } + [dev-dependencies] divan = "0.1" -rayon = "1.10" [[bench]] name = "kzg" harness = false +[[bench]] +name = "fft" +harness = false + +[[bench]] +name = "fk20" +harness = false + [features] default = ["single"] single = [] parallel = [ + "rayon", "ark-ff/parallel", "ark-ff/asm", "ark-ff/rayon", diff --git a/nomos-da/kzgrs/benches/fft.rs b/nomos-da/kzgrs/benches/fft.rs new file mode 100644 index 00000000..016a6a16 --- /dev/null +++ b/nomos-da/kzgrs/benches/fft.rs @@ -0,0 +1,23 @@ +use ark_bls12_381::{Fr, G1Affine, G1Projective}; +use ark_ec::AffineRepr; +use ark_ff::BigInt; +use ark_poly::{EvaluationDomain, GeneralEvaluationDomain}; +use divan::counter::ItemsCount; +use divan::{black_box, Bencher}; +fn main() { + divan::main() +} + +#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])] +fn compute_ark_fft_for_size(bencher: Bencher, size: usize) { + bencher + .with_inputs(|| { + let domain = GeneralEvaluationDomain::::new(size).unwrap(); + let buff: Vec = (0..size) + .map(|i| G1Affine::identity().mul_bigint(BigInt::<4>::from(i as u64))) + .collect(); + (buff, domain) + }) + .input_counter(move |_| ItemsCount::new(size)) + .bench_refs(|(buff, domain)| black_box(domain.fft(buff))); +} diff --git a/nomos-da/kzgrs/benches/fk20.rs b/nomos-da/kzgrs/benches/fk20.rs new file mode 100644 index 00000000..21c5aa58 --- /dev/null +++ b/nomos-da/kzgrs/benches/fk20.rs @@ -0,0 +1,67 @@ +use ark_bls12_381::{Bls12_381, Fr, G1Affine, G1Projective}; +use ark_ec::AffineRepr; +use ark_ff::BigInt; +use ark_poly::univariate::DensePolynomial; +use ark_poly::{EvaluationDomain, GeneralEvaluationDomain}; +use ark_poly_commit::kzg10::KZG10; +use divan::counter::ItemsCount; +use divan::Bencher; +use kzgrs::fk20::fk20_batch_generate_elements_proofs; +use kzgrs::{bytes_to_polynomial, GlobalParameters, BYTES_PER_FIELD_ELEMENT}; +use once_cell::sync::Lazy; +use rand::SeedableRng; +#[cfg(feature = "parallel")] +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use std::hint::black_box; + +fn main() { + divan::main() +} + +static GLOBAL_PARAMETERS: Lazy = Lazy::new(|| { + let mut rng = rand::rngs::StdRng::seed_from_u64(1987); + KZG10::>::setup(4096, true, &mut rng).unwrap() +}); + +#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])] +fn compute_fk20_proofs_for_size(bencher: Bencher, size: usize) { + bencher + .with_inputs(|| { + let buff: Vec<_> = (0..BYTES_PER_FIELD_ELEMENT * size) + .map(|i| (i % 255) as u8) + .rev() + .collect(); + let domain = GeneralEvaluationDomain::new(size).unwrap(); + let (_, poly) = bytes_to_polynomial::(&buff, domain).unwrap(); + poly + }) + .input_counter(move |_| ItemsCount::new(size)) + .bench_refs(|(poly)| { + black_box(fk20_batch_generate_elements_proofs( + poly, + &GLOBAL_PARAMETERS, + )) + }); +} + +#[cfg(feature = "parallel")] +#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])] +fn compute_parallel_fk20_proofs_for_size(bencher: Bencher, size: usize) { + let thread_count: usize = rayon::max_num_threads().min(rayon::current_num_threads()); + bencher + .with_inputs(|| { + let buff: Vec<_> = (0..BYTES_PER_FIELD_ELEMENT * size) + .map(|i| (i % 255) as u8) + .rev() + .collect(); + let domain = GeneralEvaluationDomain::new(size).unwrap(); + let (_, poly) = bytes_to_polynomial::(&buff, domain).unwrap(); + poly + }) + .input_counter(move |_| ItemsCount::new(size * thread_count)) + .bench_refs(|(poly)| { + black_box((0..thread_count).into_par_iter().for_each(|_| { + fk20_batch_generate_elements_proofs(poly, &GLOBAL_PARAMETERS); + })) + }); +} diff --git a/nomos-da/kzgrs/benches/kzg.rs b/nomos-da/kzgrs/benches/kzg.rs index 18c26eb6..cacc8dfb 100644 --- a/nomos-da/kzgrs/benches/kzg.rs +++ b/nomos-da/kzgrs/benches/kzg.rs @@ -3,10 +3,12 @@ use ark_poly::univariate::DensePolynomial; use ark_poly::{EvaluationDomain, GeneralEvaluationDomain}; use ark_poly_commit::kzg10::{UniversalParams, KZG10}; use divan::counter::ItemsCount; -use divan::{black_box, counter::BytesCount, AllocProfiler, Bencher}; +use divan::{black_box, Bencher}; use once_cell::sync::Lazy; use rand::RngCore; +#[cfg(feature = "parallel")] use rayon::iter::IntoParallelIterator; +#[cfg(feature = "parallel")] use rayon::iter::ParallelIterator; use kzgrs::{common::bytes_to_polynomial_unchecked, kzg::*}; @@ -46,6 +48,7 @@ fn commit_single_polynomial_with_element_count(bencher: Bencher, element_count: .bench_refs(|(_evals, poly)| black_box(commit_polynomial(poly, &GLOBAL_PARAMETERS))); } +#[cfg(feature = "parallel")] #[allow(non_snake_case)] #[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])] fn commit_polynomial_with_element_count_parallelized(bencher: Bencher, element_count: usize) { @@ -58,7 +61,7 @@ fn commit_polynomial_with_element_count_parallelized(bencher: Bencher, element_c }) .input_counter(move |(_evals, _poly)| ItemsCount::new(threads)) .bench_refs(|(_evals, poly)| { - let commitments: Vec<_> = (0..threads) + let _commitments: Vec<_> = (0..threads) .into_par_iter() .map(|_| commit_polynomial(poly, &GLOBAL_PARAMETERS)) .collect(); @@ -114,6 +117,7 @@ fn compute_batch_proofs(bencher: Bencher, element_count: usize) { // This is a test on how will perform by having a wrapping rayon on top of the proof computation // ark libraries already use rayon underneath so no great improvements are probably come up from this. // But it should help reusing the same thread pool for all jobs saving a little time. +#[cfg(feature = "parallel")] #[allow(non_snake_case)] #[divan::bench(args = [128, 256, 512, 1024], sample_count = 3, sample_size = 5)] fn compute_parallelize_batch_proofs(bencher: Bencher, element_count: usize) { diff --git a/nomos-da/kzgrs/src/common.rs b/nomos-da/kzgrs/src/common.rs index 35f7bc28..2eee670c 100644 --- a/nomos-da/kzgrs/src/common.rs +++ b/nomos-da/kzgrs/src/common.rs @@ -8,6 +8,7 @@ use ark_ff::Zero; use ark_poly::domain::general::GeneralEvaluationDomain; use ark_poly::evaluations::univariate::Evaluations; use ark_poly::univariate::DensePolynomial; +use ark_poly::EvaluationDomain; use blst::BLST_ERROR; use num_bigint::BigUint; use thiserror::Error; @@ -122,6 +123,11 @@ pub fn field_element_from_bytes_le(b: &[u8]) -> FieldElement { FieldElement::from(BigUint::from_bytes_le(b)) } +pub fn compute_roots_of_unity(size: usize) -> Vec { + let domain = GeneralEvaluationDomain::new(size).unwrap(); + domain.elements().take(size).collect() +} + #[cfg(test)] mod test { use super::{bytes_to_evaluations, bytes_to_polynomial, KzgRsError}; diff --git a/nomos-da/kzgrs/src/fk20.rs b/nomos-da/kzgrs/src/fk20.rs new file mode 100644 index 00000000..e3667da3 --- /dev/null +++ b/nomos-da/kzgrs/src/fk20.rs @@ -0,0 +1,114 @@ +use crate::{GlobalParameters, Polynomial, Proof}; +use ark_bls12_381::{Fr, G1Affine, G1Projective}; +use ark_ec::CurveGroup; +use ark_ff::Field; +use ark_poly::{EvaluationDomain, GeneralEvaluationDomain}; +use num_traits::Zero; + +fn toeplitz1(global_parameters: &[G1Affine], polynomial_degree: usize) -> Vec { + debug_assert_eq!(global_parameters.len(), polynomial_degree); + debug_assert!(polynomial_degree.is_power_of_two()); + let domain: GeneralEvaluationDomain = GeneralEvaluationDomain::new(polynomial_degree * 2) + .expect("Domain should be able to build"); + let vector_extended: Vec = global_parameters + .iter() + .copied() + .map(G1Projective::from) + .chain(std::iter::repeat_with(G1Projective::zero).take(polynomial_degree)) + .collect(); + domain.fft(&vector_extended) +} + +fn toeplitz2(coefficients: &[Fr], extended_vector: &[G1Projective]) -> Vec { + debug_assert!(coefficients.len().is_power_of_two()); + let domain: GeneralEvaluationDomain = + GeneralEvaluationDomain::new(coefficients.len()).expect("Domain should be able to build"); + let toeplitz_coefficients_fft = domain.fft(coefficients); + extended_vector + .iter() + .copied() + .zip(toeplitz_coefficients_fft) + .map(|(v, c)| (v * c)) + .collect() +} + +fn toeplitz3(h_extended_fft: &[G1Projective]) -> Vec { + let domain: GeneralEvaluationDomain = + GeneralEvaluationDomain::new(h_extended_fft.len()).expect("Domain should be able to build"); + domain.ifft(h_extended_fft) +} + +pub fn fk20_batch_generate_elements_proofs( + polynomial: &Polynomial, + global_parameters: &GlobalParameters, +) -> Vec { + let polynomial_degree = polynomial.len(); + debug_assert!(polynomial_degree <= global_parameters.powers_of_g.len()); + debug_assert!(polynomial_degree.is_power_of_two()); + let domain: GeneralEvaluationDomain = + GeneralEvaluationDomain::new(polynomial_degree).expect("Domain should be able to build"); + let global_parameters: Vec = global_parameters + .powers_of_g + .iter() + .copied() + .take(polynomial_degree) + .rev() + .collect(); + + let extended_vector = toeplitz1(&global_parameters, polynomial_degree); + let toeplitz_coefficients: Vec = std::iter::repeat(Fr::ZERO) + .take(polynomial_degree) + .chain(polynomial.coeffs.iter().copied()) + .collect(); + let h_extended_vector = toeplitz2(&toeplitz_coefficients, &extended_vector); + let h_vector = toeplitz3(&h_extended_vector); + domain + .fft(&h_vector) + .into_iter() + .map(|g1| Proof { + w: g1.into_affine(), + random_v: None, + }) + .collect() +} + +#[cfg(test)] +mod test { + use crate::fk20::fk20_batch_generate_elements_proofs; + use crate::{ + common::bytes_to_polynomial, kzg::generate_element_proof, GlobalParameters, Proof, + BYTES_PER_FIELD_ELEMENT, + }; + use ark_bls12_381::{Bls12_381, Fr}; + use ark_poly::univariate::DensePolynomial; + use ark_poly::{EvaluationDomain, GeneralEvaluationDomain}; + use ark_poly_commit::kzg10::KZG10; + use once_cell::sync::Lazy; + use rand::SeedableRng; + + static GLOBAL_PARAMETERS: Lazy = Lazy::new(|| { + let mut rng = rand::rngs::StdRng::seed_from_u64(1987); + KZG10::>::setup(4096, true, &mut rng).unwrap() + }); + + #[test] + fn test_generate_proofs() { + for size in [16, 32, 64, 128, 256] { + let buff: Vec<_> = (0..BYTES_PER_FIELD_ELEMENT * size) + .map(|i| (i % 255) as u8) + .rev() + .collect(); + let domain = GeneralEvaluationDomain::new(size).unwrap(); + let (evals, poly) = + bytes_to_polynomial::(&buff, domain).unwrap(); + let polynomial_degree = poly.len(); + let slow_proofs: Vec = (0..polynomial_degree) + .map(|i| { + generate_element_proof(i, &poly, &evals, &GLOBAL_PARAMETERS, domain).unwrap() + }) + .collect(); + let fk20_proofs = fk20_batch_generate_elements_proofs(&poly, &GLOBAL_PARAMETERS); + assert_eq!(slow_proofs, fk20_proofs); + } + } +} diff --git a/nomos-da/kzgrs/src/lib.rs b/nomos-da/kzgrs/src/lib.rs index 188bee30..97b0c486 100644 --- a/nomos-da/kzgrs/src/lib.rs +++ b/nomos-da/kzgrs/src/lib.rs @@ -1,4 +1,5 @@ pub mod common; +pub mod fk20; pub mod global_parameters; pub mod kzg; pub mod rs;