From d6630869e124bf929a0d523952cb0460ea5ae8ab Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 9 Nov 2021 16:23:22 -0800 Subject: [PATCH] msm (outside circuit) --- src/curve/curve_msm.rs | 263 +++++++++++++++++++++++++++++++++++++++++ src/curve/mod.rs | 1 + src/gadgets/curve.rs | 2 +- 3 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 src/curve/curve_msm.rs diff --git a/src/curve/curve_msm.rs b/src/curve/curve_msm.rs new file mode 100644 index 00000000..d2cb8049 --- /dev/null +++ b/src/curve/curve_msm.rs @@ -0,0 +1,263 @@ +use itertools::Itertools; +use rayon::prelude::*; + +use crate::curve::curve_summation::affine_multisummation_best; +use crate::curve::curve_types::{AffinePoint, Curve, ProjectivePoint}; +use crate::field::field_types::Field; + +/// In Yao's method, we compute an affine summation for each digit. In a parallel setting, it would +/// be easiest to assign individual summations to threads, but this would be sub-optimal because +/// multi-summations can be more efficient than repeating individual summations (see +/// `affine_multisummation_best`). Thus we divide digits into large chunks, and assign chunks of +/// digits to threads. Note that there is a delicate balance here, as large chunks can result in +/// uneven distributions of work among threads. +const DIGITS_PER_CHUNK: usize = 80; + +#[derive(Clone, Debug)] +pub struct MsmPrecomputation { + /// For each generator (in the order they were passed to `msm_precompute`), contains a vector + /// of powers, i.e. [(2^w)^i] for i < DIGITS. + // TODO: Use compressed coordinates here. + powers_per_generator: Vec>>, + + /// The window size. + w: usize, +} + +pub fn msm_precompute( + generators: &[ProjectivePoint], + w: usize, +) -> MsmPrecomputation { + MsmPrecomputation { + powers_per_generator: generators + .into_par_iter() + .map(|&g| precompute_single_generator(g, w)) + .collect(), + w, + } +} + +fn precompute_single_generator(g: ProjectivePoint, w: usize) -> Vec> { + let digits = (C::ScalarField::BITS + w - 1) / w; + let mut powers: Vec> = Vec::with_capacity(digits); + powers.push(g); + for i in 1..digits { + let mut power_i_proj = powers[i - 1]; + for _j in 0..w { + power_i_proj = power_i_proj.double(); + } + powers.push(power_i_proj); + } + ProjectivePoint::batch_to_affine(&powers) +} + +pub fn msm_parallel( + scalars: &[C::ScalarField], + generators: &[ProjectivePoint], + w: usize, +) -> ProjectivePoint { + let precomputation = msm_precompute(generators, w); + msm_execute_parallel(&precomputation, scalars) +} + +pub fn msm_execute( + precomputation: &MsmPrecomputation, + scalars: &[C::ScalarField], +) -> ProjectivePoint { + assert_eq!(precomputation.powers_per_generator.len(), scalars.len()); + let w = precomputation.w; + let digits = (C::ScalarField::BITS + w - 1) / w; + let base = 1 << w; + + // This is a variant of Yao's method, adapted to the multi-scalar setting. Because we use + // extremely large windows, the repeated scans in Yao's method could be more expensive than the + // actual group operations. To avoid this, we store a multimap from each possible digit to the + // positions in which that digit occurs in the scalars. These positions have the form (i, j), + // where i is the index of the generator and j is an index into the digits of the scalar + // associated with that generator. + let mut digit_occurrences: Vec> = Vec::with_capacity(digits); + for _i in 0..base { + digit_occurrences.push(Vec::new()); + } + for (i, scalar) in scalars.iter().enumerate() { + let digits = to_digits::(scalar, w); + for (j, &digit) in digits.iter().enumerate() { + digit_occurrences[digit].push((i, j)); + } + } + + let mut y = ProjectivePoint::ZERO; + let mut u = ProjectivePoint::ZERO; + + for digit in (1..base).rev() { + for &(i, j) in &digit_occurrences[digit] { + u = u + precomputation.powers_per_generator[i][j]; + } + y = y + u; + } + + y +} + +pub fn msm_execute_parallel( + precomputation: &MsmPrecomputation, + scalars: &[C::ScalarField], +) -> ProjectivePoint { + assert_eq!(precomputation.powers_per_generator.len(), scalars.len()); + let w = precomputation.w; + let digits = (C::ScalarField::BITS + w - 1) / w; + let base = 1 << w; + + // This is a variant of Yao's method, adapted to the multi-scalar setting. Because we use + // extremely large windows, the repeated scans in Yao's method could be more expensive than the + // actual group operations. To avoid this, we store a multimap from each possible digit to the + // positions in which that digit occurs in the scalars. These positions have the form (i, j), + // where i is the index of the generator and j is an index into the digits of the scalar + // associated with that generator. + let mut digit_occurrences: Vec> = Vec::with_capacity(digits); + for _i in 0..base { + digit_occurrences.push(Vec::new()); + } + for (i, scalar) in scalars.iter().enumerate() { + let digits = to_digits::(scalar, w); + for (j, &digit) in digits.iter().enumerate() { + digit_occurrences[digit].push((i, j)); + } + } + + // For each digit, we add up the powers associated with all occurrences that digit. + let digits: Vec = (0..base).collect(); + let digit_acc: Vec> = digits + .par_chunks(DIGITS_PER_CHUNK) + .flat_map(|chunk| { + let summations: Vec>> = chunk + .iter() + .map(|&digit| { + digit_occurrences[digit] + .iter() + .map(|&(i, j)| precomputation.powers_per_generator[i][j]) + .collect() + }) + .collect(); + affine_multisummation_best(summations) + }) + .collect(); + // println!("Computing the per-digit summations (in parallel) took {}s", start.elapsed().as_secs_f64()); + + let mut y = ProjectivePoint::ZERO; + let mut u = ProjectivePoint::ZERO; + for digit in (1..base).rev() { + u = u + digit_acc[digit]; + y = y + u; + } + // println!("Final summation (sequential) {}s", start.elapsed().as_secs_f64()); + y +} + +pub(crate) fn to_digits(x: &C::ScalarField, w: usize) -> Vec { + let scalar_bits = C::ScalarField::BITS; + let num_digits = (scalar_bits + w - 1) / w; + + // Convert x to a bool array. + let x_canonical: Vec<_> = x + .to_biguint() + .to_u64_digits() + .iter() + .cloned() + .pad_using(scalar_bits / 64, |_| 0) + .collect(); + let mut x_bits = Vec::with_capacity(scalar_bits); + for i in 0..scalar_bits { + x_bits.push((x_canonical[i / 64] >> (i as u64 % 64) & 1) != 0); + } + + let mut digits = Vec::with_capacity(num_digits); + for i in 0..num_digits { + let mut digit = 0; + for j in ((i * w)..((i + 1) * w).min(scalar_bits)).rev() { + digit <<= 1; + digit |= x_bits[j] as usize; + } + digits.push(digit); + } + digits +} + +#[cfg(test)] +mod tests { + use num::BigUint; + + use crate::curve::curve_msm::{msm_execute, msm_precompute, to_digits}; + use crate::curve::curve_types::Curve; + use crate::curve::secp256k1::Secp256K1; + use crate::field::field_types::Field; + use crate::field::secp256k1_scalar::Secp256K1Scalar; + + #[test] + fn test_to_digits() { + let x_canonical = [ + 0b10101010101010101010101010101010, + 0b10101010101010101010101010101010, + 0b11001100110011001100110011001100, + 0b11001100110011001100110011001100, + 0b11110000111100001111000011110000, + 0b11110000111100001111000011110000, + 0b00001111111111111111111111111111, + 0b11111111111111111111111111111111, + ]; + let x = Secp256K1Scalar::from_biguint(BigUint::from_slice(&x_canonical)); + assert_eq!(x.to_biguint().to_u32_digits(), x_canonical); + assert_eq!( + to_digits::(&x, 17), + vec![ + 0b01010101010101010, + 0b10101010101010101, + 0b01010101010101010, + 0b11001010101010101, + 0b01100110011001100, + 0b00110011001100110, + 0b10011001100110011, + 0b11110000110011001, + 0b01111000011110000, + 0b00111100001111000, + 0b00011110000111100, + 0b11111111111111110, + 0b01111111111111111, + 0b11111111111111000, + 0b11111111111111111, + 0b1, + ] + ); + } + + #[test] + fn test_msm() { + let w = 5; + + let generator_1 = Secp256K1::GENERATOR_PROJECTIVE; + let generator_2 = generator_1 + generator_1; + let generator_3 = generator_1 + generator_2; + + let scalar_1 = Secp256K1Scalar::from_biguint(BigUint::from_slice(&[ + 11111111, 22222222, 33333333, 44444444, + ])); + let scalar_2 = Secp256K1Scalar::from_biguint(BigUint::from_slice(&[ + 22222222, 22222222, 33333333, 44444444, + ])); + let scalar_3 = Secp256K1Scalar::from_biguint(BigUint::from_slice(&[ + 33333333, 22222222, 33333333, 44444444, + ])); + + let generators = vec![generator_1, generator_2, generator_3]; + let scalars = vec![scalar_1, scalar_2, scalar_3]; + + let precomputation = msm_precompute(&generators, w); + let result_msm = msm_execute(&precomputation, &scalars); + + let result_naive = Secp256K1::convert(scalar_1) * generator_1 + + Secp256K1::convert(scalar_2) * generator_2 + + Secp256K1::convert(scalar_3) * generator_3; + + assert_eq!(result_msm, result_naive); + } +} diff --git a/src/curve/mod.rs b/src/curve/mod.rs index 6555404e..d31e373e 100644 --- a/src/curve/mod.rs +++ b/src/curve/mod.rs @@ -1,4 +1,5 @@ pub mod curve_adds; +pub mod curve_msm; pub mod curve_multiplication; pub mod curve_summation; pub mod curve_types; diff --git a/src/gadgets/curve.rs b/src/gadgets/curve.rs index a3fd5b90..fe6ae306 100644 --- a/src/gadgets/curve.rs +++ b/src/gadgets/curve.rs @@ -88,7 +88,7 @@ impl, const D: usize> CircuitBuilder { AffinePointTarget { x: x3, y: y3 } } - pub fn curve_add_two_affine( + pub fn curve_add( &mut self, p1: &AffinePointTarget, p2: &AffinePointTarget,