Compare commits

..

5 Commits

Author SHA1 Message Date
danielsanchezq bf83bc4403 Fix parallelization 2024-06-14 15:24:41 +02:00
danielsanchezq c83638dd3a Fix bench 2024-06-14 15:04:16 +02:00
danielsanchezq a760e436ed Fix parallel feature tagging 2024-06-14 15:04:09 +02:00
danielsanchezq 911505f5e3 Add i/fft benches 2024-06-14 13:35:38 +02:00
danielsanchezq 5eeb96271f Parallelize i/fft 2024-06-14 12:01:06 +02:00
5 changed files with 150 additions and 45 deletions

View File

@ -20,19 +20,25 @@ num-bigint = "0.4.4"
thiserror = "1.0.58" thiserror = "1.0.58"
num-traits = "0.2.18" num-traits = "0.2.18"
rand = "0.8.5" rand = "0.8.5"
rayon = { version = "1.10", optional = true }
[dev-dependencies] [dev-dependencies]
divan = "0.1" divan = "0.1"
rayon = "1.10"
[[bench]] [[bench]]
name = "kzg" name = "kzg"
harness = false harness = false
[[bench]]
name = "fft"
harness = false
[features] [features]
default = ["single"] default = ["single"]
single = [] single = []
parallel = [ parallel = [
"rayon",
"ark-ff/parallel", "ark-ff/parallel",
"ark-ff/asm", "ark-ff/asm",
"ark-ff/rayon", "ark-ff/rayon",

View File

@ -0,0 +1,52 @@
use ark_bls12_381::{Fr, G1Affine};
use ark_ec::{AffineRepr, CurveGroup};
use ark_ff::{BigInt, FftField, Field};
use divan::counter::ItemsCount;
use divan::{black_box, counter::BytesCount, AllocProfiler, Bencher};
use kzgrs::fft::{fft_g1, ifft_g1};
fn main() {
divan::main()
}
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])]
fn compute_fft_for_size(bencher: Bencher, size: usize) {
bencher
.with_inputs(|| {
let primitive_root = <Fr as FftField>::get_root_of_unity(size as u64).unwrap();
let roots_of_unity: Vec<_> = (1..=size)
.map(|i| primitive_root.pow::<ark_ff::BigInt<4>>(BigInt::from(i as u64)))
.collect();
let buff: Vec<G1Affine> = (0..size)
.map(|i| {
G1Affine::identity()
.mul_bigint(BigInt::<4>::from(i as u64))
.into_affine()
})
.collect();
(buff, roots_of_unity)
})
.input_counter(move |_| ItemsCount::new(size))
.bench_refs(|(buff, roots_of_unity)| black_box(fft_g1(buff, roots_of_unity)));
}
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])]
fn compute_ifft_for_size(bencher: Bencher, size: usize) {
bencher
.with_inputs(|| {
let primitive_root = <Fr as FftField>::get_root_of_unity(size as u64).unwrap();
let roots_of_unity: Vec<_> = (1..=size)
.map(|i| primitive_root.pow::<ark_ff::BigInt<4>>(BigInt::from(i as u64)))
.collect();
let buff: Vec<G1Affine> = (0..size)
.map(|i| {
G1Affine::identity()
.mul_bigint(BigInt::<4>::from(i as u64))
.into_affine()
})
.collect();
let buff = fft_g1(&buff, &roots_of_unity);
(buff, roots_of_unity)
})
.input_counter(move |_| ItemsCount::new(size))
.bench_refs(|(buff, roots_of_unity)| black_box(ifft_g1(buff, roots_of_unity)));
}

View File

@ -6,7 +6,9 @@ use divan::counter::ItemsCount;
use divan::{black_box, counter::BytesCount, AllocProfiler, Bencher}; use divan::{black_box, counter::BytesCount, AllocProfiler, Bencher};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use rand::RngCore; use rand::RngCore;
#[cfg(feature = "parallel")]
use rayon::iter::IntoParallelIterator; use rayon::iter::IntoParallelIterator;
#[cfg(feature = "parallel")]
use rayon::iter::ParallelIterator; use rayon::iter::ParallelIterator;
use kzgrs::{common::bytes_to_polynomial_unchecked, kzg::*}; use kzgrs::{common::bytes_to_polynomial_unchecked, kzg::*};
@ -46,6 +48,7 @@ fn commit_single_polynomial_with_element_count(bencher: Bencher, element_count:
.bench_refs(|(_evals, poly)| black_box(commit_polynomial(poly, &GLOBAL_PARAMETERS))); .bench_refs(|(_evals, poly)| black_box(commit_polynomial(poly, &GLOBAL_PARAMETERS)));
} }
#[cfg(feature = "parallel")]
#[allow(non_snake_case)] #[allow(non_snake_case)]
#[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])] #[divan::bench(args = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096])]
fn commit_polynomial_with_element_count_parallelized(bencher: Bencher, element_count: usize) { fn commit_polynomial_with_element_count_parallelized(bencher: Bencher, element_count: usize) {
@ -114,6 +117,7 @@ fn compute_batch_proofs(bencher: Bencher, element_count: usize) {
// This is a test on how will perform by having a wrapping rayon on top of the proof computation // This is a test on how will perform by having a wrapping rayon on top of the proof computation
// ark libraries already use rayon underneath so no great improvements are probably come up from this. // ark libraries already use rayon underneath so no great improvements are probably come up from this.
// But it should help reusing the same thread pool for all jobs saving a little time. // But it should help reusing the same thread pool for all jobs saving a little time.
#[cfg(feature = "parallel")]
#[allow(non_snake_case)] #[allow(non_snake_case)]
#[divan::bench(args = [128, 256, 512, 1024], sample_count = 3, sample_size = 5)] #[divan::bench(args = [128, 256, 512, 1024], sample_count = 3, sample_size = 5)]
fn compute_parallelize_batch_proofs(bencher: Bencher, element_count: usize) { fn compute_parallelize_batch_proofs(bencher: Bencher, element_count: usize) {

View File

@ -2,54 +2,89 @@ use ark_bls12_381::{Bls12_381, Fr, G1Affine};
use ark_ec::pairing::Pairing; use ark_ec::pairing::Pairing;
use ark_ec::{AffineRepr, CurveGroup}; use ark_ec::{AffineRepr, CurveGroup};
use ark_ff::{BigInt, BigInteger, FftField, Field, PrimeField}; use ark_ff::{BigInt, BigInteger, FftField, Field, PrimeField};
use blst::BLS12_381_G1; #[cfg(feature = "parallel")]
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
pub fn fft_g1(vals: &[G1Affine], roots_of_unity: &[Fr]) -> Vec<G1Affine> { pub fn fft_g1(vals: &[G1Affine], roots_of_unity: &[Fr]) -> Vec<G1Affine> {
debug_assert_eq!(vals.len(), roots_of_unity.len()); debug_assert_eq!(vals.len(), roots_of_unity.len());
if vals.len() == 1 { let original_len = vals.len();
if original_len == 1 {
return vals.to_vec(); return vals.to_vec();
} }
let half_roots: Vec<_> = roots_of_unity.iter().step_by(2).copied().collect(); let half_roots: Vec<_> = roots_of_unity.iter().step_by(2).copied().collect();
let l = fft_g1( let l = || {
vals.iter() fft_g1(
.step_by(2) vals.iter()
.copied() .step_by(2)
.collect::<Vec<_>>() .copied()
.as_slice(), .collect::<Vec<_>>()
half_roots.as_slice(), .as_slice(),
); half_roots.as_slice(),
)
};
let r = fft_g1( let r = || {
vals.iter() fft_g1(
.skip(1) vals.iter()
.step_by(2) .skip(1)
.copied() .step_by(2)
.collect::<Vec<_>>() .copied()
.as_slice(), .collect::<Vec<_>>()
half_roots.as_slice(), .as_slice(),
); half_roots.as_slice(),
)
};
let y_times_root = r let [l, r]: [Vec<G1Affine>; 2] = {
.into_iter() #[cfg(feature = "parallel")]
.cycle() {
.enumerate() let (l, r) = rayon::join(l, r);
.map(|(i, y)| (y * roots_of_unity[i % vals.len()]).into_affine()); [l, r]
}
#[cfg(not(feature = "parallel"))]
{
[l(), r()]
}
};
// Double sized so we can use iterator later on
let l: Vec<_> = l.into_iter().cycle().take(original_len).collect();
let r: Vec<_> = r.into_iter().cycle().take(original_len).collect();
l.into_iter() let y_times_root = {
.cycle() #[cfg(feature = "parallel")]
.take(vals.len()) {
.zip(y_times_root) r.into_par_iter()
.enumerate() }
.map(|(i, (x, y_times_root))| { #[cfg(not(feature = "parallel"))]
if i < vals.len() / 2 { {
x + y_times_root r.into_iter()
} else { }
x - y_times_root }
} .enumerate()
.into_affine() .map(|(i, y)| (y * roots_of_unity[i % vals.len()]).into_affine());
})
.collect() {
#[cfg(feature = "parallel")]
{
l.into_par_iter()
}
#[cfg(not(feature = "parallel"))]
{
l.into_iter()
}
}
.zip(y_times_root)
.enumerate()
.map(|(i, (x, y_times_root))| {
if i < vals.len() / 2 {
x + y_times_root
} else {
x - y_times_root
}
.into_affine()
})
.collect()
} }
pub fn ifft_g1(vals: &[G1Affine], roots_of_unity: &[Fr]) -> Vec<G1Affine> { pub fn ifft_g1(vals: &[G1Affine], roots_of_unity: &[Fr]) -> Vec<G1Affine> {
@ -57,10 +92,18 @@ pub fn ifft_g1(vals: &[G1Affine], roots_of_unity: &[Fr]) -> Vec<G1Affine> {
let mut mod_min_2 = BigInt::new(<Fr as PrimeField>::MODULUS.0); let mut mod_min_2 = BigInt::new(<Fr as PrimeField>::MODULUS.0);
mod_min_2.sub_with_borrow(&BigInt::<4>::from(2u64)); mod_min_2.sub_with_borrow(&BigInt::<4>::from(2u64));
let invlen = Fr::from(vals.len() as u64).pow(mod_min_2).into_bigint(); let invlen = Fr::from(vals.len() as u64).pow(mod_min_2).into_bigint();
fft_g1(vals, roots_of_unity) {
.into_iter() #[cfg(feature = "parallel")]
.map(|g| g.mul_bigint(invlen).into_affine()) {
.collect() fft_g1(vals, roots_of_unity).into_par_iter()
}
#[cfg(not(feature = "parallel"))]
{
fft_g1(vals, roots_of_unity).into_iter()
}
}
.map(|g| g.mul_bigint(invlen).into_affine())
.collect()
} }
#[cfg(test)] #[cfg(test)]

View File

@ -1,6 +1,6 @@
pub mod common; pub mod common;
mod fft; pub mod fft;
mod fk20; pub mod fk20;
pub mod global_parameters; pub mod global_parameters;
pub mod kzg; pub mod kzg;
pub mod rs; pub mod rs;