From b28cd55326dc618e7596c97a9dccb1c38886ca6b Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Wed, 16 Feb 2022 13:37:01 +0100 Subject: [PATCH 01/32] Fix reduction strategy --- plonky2/src/fri/mod.rs | 2 +- plonky2/src/fri/reduction_strategies.rs | 21 +++++++++++---------- plonky2/src/plonk/circuit_builder.rs | 2 +- plonky2/src/plonk/circuit_data.rs | 2 +- starky/src/config.rs | 2 +- starky/src/prover.rs | 6 +++++- 6 files changed, 20 insertions(+), 15 deletions(-) diff --git a/plonky2/src/fri/mod.rs b/plonky2/src/fri/mod.rs index c491f8f0..5792444e 100644 --- a/plonky2/src/fri/mod.rs +++ b/plonky2/src/fri/mod.rs @@ -67,7 +67,7 @@ pub struct FriParams { } impl FriParams { - pub(crate) fn total_arities(&self) -> usize { + pub fn total_arities(&self) -> usize { self.reduction_arity_bits.iter().sum() } diff --git a/plonky2/src/fri/reduction_strategies.rs b/plonky2/src/fri/reduction_strategies.rs index 49eda3ba..84505ec2 100644 --- a/plonky2/src/fri/reduction_strategies.rs +++ b/plonky2/src/fri/reduction_strategies.rs @@ -8,11 +8,12 @@ pub enum FriReductionStrategy { /// Specifies the exact sequence of arities (expressed in bits) to use. Fixed(Vec), - /// `ConstantArityBits(arity_bits, final_poly_bits)` applies reductions of arity `2^arity_bits` - /// until the polynomial degree is `2^final_poly_bits` or less. This tends to work well in the - /// recursive setting, as it avoids needing multiple configurations of gates used in FRI - /// verification, such as `InterpolationGate`. - ConstantArityBits(usize, usize), + /// `ConstantArityBits(arity_bits, final_poly_bits, cap_height)` applies reductions of arity `2^arity_bits` + /// until the polynomial degree is less than or equal to `2^final_poly_bits` or until any further + /// `arity_bits`-reduction makes the polynomial degree smaller than `2^cap_height` (which would make FRI fail). + /// This tends to work well in the recursive setting, as it avoids needing multiple configurations + /// of gates used in FRI verification, such as `InterpolationGate`. + ConstantArityBits(usize, usize, usize), /// `MinSize(opt_max_arity_bits)` searches for an optimal sequence of reduction arities, with an /// optional max `arity_bits`. If this proof will have recursive proofs on top of it, a max @@ -31,12 +32,12 @@ impl FriReductionStrategy { match self { FriReductionStrategy::Fixed(reduction_arity_bits) => reduction_arity_bits.to_vec(), - FriReductionStrategy::ConstantArityBits(arity_bits, final_poly_bits) => { + &FriReductionStrategy::ConstantArityBits(arity_bits, final_poly_bits, cap_height) => { let mut result = Vec::new(); - while degree_bits > *final_poly_bits { - result.push(*arity_bits); - assert!(degree_bits >= *arity_bits); - degree_bits -= *arity_bits; + while degree_bits > final_poly_bits && degree_bits - arity_bits >= cap_height { + result.push(arity_bits); + assert!(degree_bits >= arity_bits); + degree_bits -= arity_bits; } result.shrink_to_fit(); result diff --git a/plonky2/src/plonk/circuit_builder.rs b/plonky2/src/plonk/circuit_builder.rs index bd216389..d045aa6e 100644 --- a/plonky2/src/plonk/circuit_builder.rs +++ b/plonky2/src/plonk/circuit_builder.rs @@ -664,7 +664,7 @@ impl, const D: usize> CircuitBuilder { let degree_bits = log2_strict(degree); let fri_params = self.fri_params(degree_bits); assert!( - fri_params.total_arities() <= degree_bits, + fri_params.total_arities() <= degree_bits - self.config.fri_config.cap_height, "FRI total reduction arity is too large.", ); diff --git a/plonky2/src/plonk/circuit_data.rs b/plonky2/src/plonk/circuit_data.rs index 3d4ee2df..fdec495e 100644 --- a/plonky2/src/plonk/circuit_data.rs +++ b/plonky2/src/plonk/circuit_data.rs @@ -73,7 +73,7 @@ impl CircuitConfig { rate_bits: 3, cap_height: 4, proof_of_work_bits: 16, - reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5), + reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5, 4), num_query_rounds: 28, }, } diff --git a/starky/src/config.rs b/starky/src/config.rs index 500cd957..2e2cced7 100644 --- a/starky/src/config.rs +++ b/starky/src/config.rs @@ -22,7 +22,7 @@ impl StarkConfig { rate_bits: 1, cap_height: 4, proof_of_work_bits: 10, - reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5), + reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5, 4), num_query_rounds: 90, }, } diff --git a/starky/src/prover.rs b/starky/src/prover.rs index 2d57a60a..902fd1f9 100644 --- a/starky/src/prover.rs +++ b/starky/src/prover.rs @@ -37,6 +37,11 @@ where { let degree = trace.len(); let degree_bits = log2_strict(degree); + let fri_params = config.fri_params(degree_bits); + assert!( + fri_params.total_arities() <= degree_bits - config.fri_config.cap_height, + "FRI total reduction arity is too large.", + ); let trace_vecs = trace.into_iter().map(|row| row.to_vec()).collect_vec(); let trace_col_major: Vec> = transpose(&trace_vecs); @@ -117,7 +122,6 @@ where // TODO: Add permutation checks let initial_merkle_trees = &[&trace_commitment, "ient_commitment]; - let fri_params = config.fri_params(degree_bits); let opening_proof = timed!( timing, From ea9006f52eb98a96ff706167ea9b67cfe0f033a5 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Wed, 16 Feb 2022 13:51:10 +0100 Subject: [PATCH 02/32] Add rate_bits --- plonky2/src/plonk/circuit_builder.rs | 5 +++-- starky/src/prover.rs | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/plonky2/src/plonk/circuit_builder.rs b/plonky2/src/plonk/circuit_builder.rs index d045aa6e..8e2f2e10 100644 --- a/plonky2/src/plonk/circuit_builder.rs +++ b/plonky2/src/plonk/circuit_builder.rs @@ -639,6 +639,7 @@ impl, const D: usize> CircuitBuilder { let mut timing = TimingTree::new("preprocess", Level::Trace); let start = Instant::now(); let rate_bits = self.config.fri_config.rate_bits; + let cap_height = self.config.fri_config.cap_height; // Hash the public inputs, and route them to a `PublicInputGate` which will enforce that // those hash wires match the claimed public inputs. @@ -664,7 +665,7 @@ impl, const D: usize> CircuitBuilder { let degree_bits = log2_strict(degree); let fri_params = self.fri_params(degree_bits); assert!( - fri_params.total_arities() <= degree_bits - self.config.fri_config.cap_height, + fri_params.total_arities() <= degree_bits + rate_bits - cap_height, "FRI total reduction arity is too large.", ); @@ -705,7 +706,7 @@ impl, const D: usize> CircuitBuilder { constants_sigmas_vecs, rate_bits, PlonkOracle::CONSTANTS_SIGMAS.blinding, - self.config.fri_config.cap_height, + cap_height, &mut timing, Some(&fft_root_table), ); diff --git a/starky/src/prover.rs b/starky/src/prover.rs index 4fef0b4a..be1f198b 100644 --- a/starky/src/prover.rs +++ b/starky/src/prover.rs @@ -41,8 +41,10 @@ where let degree = trace.len(); let degree_bits = log2_strict(degree); let fri_params = config.fri_params(degree_bits); + let rate_bits = config.fri_config.rate_bits; + let cap_height = config.fri_config.cap_height; assert!( - fri_params.total_arities() <= degree_bits - config.fri_config.cap_height, + fri_params.total_arities() <= degree_bits + rate_bits - cap_height, "FRI total reduction arity is too large.", ); @@ -58,8 +60,6 @@ where .collect() ); - let rate_bits = config.fri_config.rate_bits; - let cap_height = config.fri_config.cap_height; let trace_commitment = timed!( timing, "compute trace commitment", From 56336e396d40f0d6fe221334a6ef761a2bb0394b Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Wed, 16 Feb 2022 14:17:14 +0100 Subject: [PATCH 03/32] Fix --- plonky2/src/fri/reduction_strategies.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plonky2/src/fri/reduction_strategies.rs b/plonky2/src/fri/reduction_strategies.rs index 84505ec2..d81ac2ae 100644 --- a/plonky2/src/fri/reduction_strategies.rs +++ b/plonky2/src/fri/reduction_strategies.rs @@ -10,7 +10,7 @@ pub enum FriReductionStrategy { /// `ConstantArityBits(arity_bits, final_poly_bits, cap_height)` applies reductions of arity `2^arity_bits` /// until the polynomial degree is less than or equal to `2^final_poly_bits` or until any further - /// `arity_bits`-reduction makes the polynomial degree smaller than `2^cap_height` (which would make FRI fail). + /// `arity_bits`-reduction makes the last FRI tree have height less than `cap_height`. /// This tends to work well in the recursive setting, as it avoids needing multiple configurations /// of gates used in FRI verification, such as `InterpolationGate`. ConstantArityBits(usize, usize, usize), @@ -34,7 +34,9 @@ impl FriReductionStrategy { &FriReductionStrategy::ConstantArityBits(arity_bits, final_poly_bits, cap_height) => { let mut result = Vec::new(); - while degree_bits > final_poly_bits && degree_bits - arity_bits >= cap_height { + while degree_bits > final_poly_bits + && degree_bits + rate_bits - arity_bits >= cap_height + { result.push(arity_bits); assert!(degree_bits >= arity_bits); degree_bits -= arity_bits; From 431faccbdbee989300992a1d1d04a42bc2602b7e Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Wed, 16 Feb 2022 22:37:20 -0800 Subject: [PATCH 04/32] Change `compute_permutation_z_polys` to batch permutation checks (#492) * Change `compute_permutation_z_polys` to batch permutation checks * feedback --- starky/src/permutation.rs | 77 +++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs index 1f7655b4..01cfa8bf 100644 --- a/starky/src/permutation.rs +++ b/starky/src/permutation.rs @@ -67,48 +67,46 @@ where // Before batching, each permutation pair leads to `num_challenges` permutation arguments, so we // start with the cartesian product of `permutation_pairs` and `0..num_challenges`. Then we // chunk these arguments based on our batch size. - let permutation_instances = permutation_pairs + let permutation_batches = permutation_pairs .iter() .cartesian_product(0..config.num_challenges) .chunks(stark.permutation_batch_size()) .into_iter() - .flat_map(|batch| { - batch.enumerate().map(|(i, (pair, chal))| { - let challenge = permutation_challenge_sets[i].challenges[chal]; - PermutationInstance { pair, challenge } - }) + .map(|batch| { + batch + .enumerate() + .map(|(i, (pair, chal))| { + let challenge = permutation_challenge_sets[i].challenges[chal]; + PermutationInstance { pair, challenge } + }) + .collect_vec() }) .collect_vec(); - permutation_instances + permutation_batches .into_par_iter() - .map(|instance| compute_permutation_z_poly(instance, trace_poly_values)) + .map(|instances| compute_permutation_z_poly(&instances, trace_poly_values)) .collect() } /// Compute a single Z polynomial. -// TODO: Change this to handle a batch of `PermutationInstance`s. fn compute_permutation_z_poly( - instance: PermutationInstance, + instances: &[PermutationInstance], trace_poly_values: &[PolynomialValues], ) -> PolynomialValues { - let PermutationInstance { pair, challenge } = instance; - let PermutationPair { column_pairs } = pair; - let PermutationChallenge { beta, gamma } = challenge; - let degree = trace_poly_values[0].len(); - let mut reduced_lhs = PolynomialValues::constant(gamma, degree); - let mut reduced_rhs = PolynomialValues::constant(gamma, degree); + let (reduced_lhs_polys, reduced_rhs_polys): (Vec<_>, Vec<_>) = instances + .iter() + .map(|instance| permutation_reduced_polys(instance, trace_poly_values, degree)) + .unzip(); - for ((lhs, rhs), weight) in column_pairs.iter().zip(beta.powers()) { - reduced_lhs.add_assign_scaled(&trace_poly_values[*lhs], weight); - reduced_rhs.add_assign_scaled(&trace_poly_values[*rhs], weight); - } + let numerator = poly_product_elementwise(reduced_lhs_polys.into_iter()); + let denominator = poly_product_elementwise(reduced_rhs_polys.into_iter()); // Compute the quotients. - let reduced_rhs_inverses = F::batch_multiplicative_inverse(&reduced_rhs.values); - let mut quotients = reduced_lhs.values; - batch_multiply_inplace(&mut quotients, &reduced_rhs_inverses); + let denominator_inverses = F::batch_multiplicative_inverse(&denominator.values); + let mut quotients = numerator.values; + batch_multiply_inplace(&mut quotients, &denominator_inverses); // Compute Z, which contains partial products of the quotients. let mut partial_products = Vec::with_capacity(degree); @@ -120,6 +118,39 @@ fn compute_permutation_z_poly( PolynomialValues::new(partial_products) } +/// Computes the reduced polynomial, `\sum beta^i f_i(x) + gamma`, for both the "left" and "right" +/// sides of a given `PermutationPair`. +fn permutation_reduced_polys( + instance: &PermutationInstance, + trace_poly_values: &[PolynomialValues], + degree: usize, +) -> (PolynomialValues, PolynomialValues) { + let PermutationInstance { + pair: PermutationPair { column_pairs }, + challenge: PermutationChallenge { beta, gamma }, + } = instance; + + let mut reduced_lhs = PolynomialValues::constant(*gamma, degree); + let mut reduced_rhs = PolynomialValues::constant(*gamma, degree); + for ((lhs, rhs), weight) in column_pairs.iter().zip(beta.powers()) { + reduced_lhs.add_assign_scaled(&trace_poly_values[*lhs], weight); + reduced_rhs.add_assign_scaled(&trace_poly_values[*rhs], weight); + } + (reduced_lhs, reduced_rhs) +} + +/// Computes the elementwise product of a set of polynomials. Assumes that the set is non-empty and +/// that each polynomial has the same length. +fn poly_product_elementwise( + mut polys: impl Iterator>, +) -> PolynomialValues { + let mut product = polys.next().expect("Expected at least one polynomial"); + for poly in polys { + batch_multiply_inplace(&mut product.values, &poly.values) + } + product +} + fn get_permutation_challenge>( challenger: &mut Challenger, ) -> PermutationChallenge { From 67cb5dfd5880ffc8a080bd22f63c62f0d83fd7e4 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Thu, 17 Feb 2022 08:26:23 +0100 Subject: [PATCH 05/32] PR feedback --- plonky2/src/fri/mod.rs | 1 + plonky2/src/fri/reduction_strategies.rs | 7 ++++--- plonky2/src/plonk/circuit_data.rs | 2 +- starky/src/config.rs | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/plonky2/src/fri/mod.rs b/plonky2/src/fri/mod.rs index 5792444e..4ed2ea3b 100644 --- a/plonky2/src/fri/mod.rs +++ b/plonky2/src/fri/mod.rs @@ -35,6 +35,7 @@ impl FriConfig { let reduction_arity_bits = self.reduction_strategy.reduction_arity_bits( degree_bits, self.rate_bits, + self.cap_height, self.num_query_rounds, ); FriParams { diff --git a/plonky2/src/fri/reduction_strategies.rs b/plonky2/src/fri/reduction_strategies.rs index d81ac2ae..4252564e 100644 --- a/plonky2/src/fri/reduction_strategies.rs +++ b/plonky2/src/fri/reduction_strategies.rs @@ -8,12 +8,12 @@ pub enum FriReductionStrategy { /// Specifies the exact sequence of arities (expressed in bits) to use. Fixed(Vec), - /// `ConstantArityBits(arity_bits, final_poly_bits, cap_height)` applies reductions of arity `2^arity_bits` + /// `ConstantArityBits(arity_bits, final_poly_bits)` applies reductions of arity `2^arity_bits` /// until the polynomial degree is less than or equal to `2^final_poly_bits` or until any further /// `arity_bits`-reduction makes the last FRI tree have height less than `cap_height`. /// This tends to work well in the recursive setting, as it avoids needing multiple configurations /// of gates used in FRI verification, such as `InterpolationGate`. - ConstantArityBits(usize, usize, usize), + ConstantArityBits(usize, usize), /// `MinSize(opt_max_arity_bits)` searches for an optimal sequence of reduction arities, with an /// optional max `arity_bits`. If this proof will have recursive proofs on top of it, a max @@ -27,12 +27,13 @@ impl FriReductionStrategy { &self, mut degree_bits: usize, rate_bits: usize, + cap_height: usize, num_queries: usize, ) -> Vec { match self { FriReductionStrategy::Fixed(reduction_arity_bits) => reduction_arity_bits.to_vec(), - &FriReductionStrategy::ConstantArityBits(arity_bits, final_poly_bits, cap_height) => { + &FriReductionStrategy::ConstantArityBits(arity_bits, final_poly_bits) => { let mut result = Vec::new(); while degree_bits > final_poly_bits && degree_bits + rate_bits - arity_bits >= cap_height diff --git a/plonky2/src/plonk/circuit_data.rs b/plonky2/src/plonk/circuit_data.rs index fdec495e..3d4ee2df 100644 --- a/plonky2/src/plonk/circuit_data.rs +++ b/plonky2/src/plonk/circuit_data.rs @@ -73,7 +73,7 @@ impl CircuitConfig { rate_bits: 3, cap_height: 4, proof_of_work_bits: 16, - reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5, 4), + reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5), num_query_rounds: 28, }, } diff --git a/starky/src/config.rs b/starky/src/config.rs index 2e2cced7..500cd957 100644 --- a/starky/src/config.rs +++ b/starky/src/config.rs @@ -22,7 +22,7 @@ impl StarkConfig { rate_bits: 1, cap_height: 4, proof_of_work_bits: 10, - reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5, 4), + reduction_strategy: FriReductionStrategy::ConstantArityBits(4, 5), num_query_rounds: 90, }, } From a736aa8e705314ba5b061556946129ae838ffa76 Mon Sep 17 00:00:00 2001 From: Jakub Nabaglo Date: Thu, 17 Feb 2022 22:01:07 -0800 Subject: [PATCH 06/32] Update MDS matrix and round consts in Poseidon; disable vectorization (#493) --- plonky2/src/bin/generate_constants.rs | 11 +- plonky2/src/gates/poseidon_mds.rs | 14 +- plonky2/src/hash/arch/aarch64/mod.rs | 4 +- plonky2/src/hash/arch/x86_64/mod.rs | 10 +- plonky2/src/hash/poseidon.rs | 218 ++++++----- plonky2/src/hash/poseidon_goldilocks.rs | 501 ++++++++++++------------ 6 files changed, 376 insertions(+), 382 deletions(-) diff --git a/plonky2/src/bin/generate_constants.rs b/plonky2/src/bin/generate_constants.rs index d2744991..6527b361 100644 --- a/plonky2/src/bin/generate_constants.rs +++ b/plonky2/src/bin/generate_constants.rs @@ -7,22 +7,15 @@ use plonky2_field::goldilocks_field::GoldilocksField; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha8Rng; -// For historical reasons, we sample from 0..0xffffffff70000001, which is slightly larger than the -// range of GoldilocksField, then verify that each constant also fits in GoldilocksField. -const SAMPLE_RANGE_END: u64 = 0xffffffff70000001; +const SAMPLE_RANGE_END: u64 = GoldilocksField::ORDER; -// const N: usize = 8 * 30; // For Posiedon-8 -const N: usize = 12 * 30; // For Posiedon-12 +const N: usize = 12 * 30; // For Poseidon-12 pub(crate) fn main() { let mut rng = ChaCha8Rng::seed_from_u64(0); let mut constants = [0u64; N]; for i in 0..N { constants[i] = rng.gen_range(0..SAMPLE_RANGE_END); - // Make sure the constant fits in Goldilocks. If so, we also have random numbers in - // GoldilocksField::ORDER. This may be viewed as rejection sampling, except that we never - // encounter a rejection in practice, so we don't bother handling it. - assert!(constants[i] < GoldilocksField::ORDER); } // Print the constants in the format we prefer in our code. diff --git a/plonky2/src/gates/poseidon_mds.rs b/plonky2/src/gates/poseidon_mds.rs index 81583f88..8a989078 100644 --- a/plonky2/src/gates/poseidon_mds.rs +++ b/plonky2/src/gates/poseidon_mds.rs @@ -51,9 +51,13 @@ impl + Poseidon, const D: usize> PoseidonMdsGate::MDS_MATRIX_EXPS[i]); + let coeff = F::Extension::from_canonical_u64(::MDS_MATRIX_CIRC[i]); res += v[(i + r) % SPONGE_WIDTH].scalar_mul(coeff); } + { + let coeff = F::Extension::from_canonical_u64(::MDS_MATRIX_DIAG[r]); + res += v[r].scalar_mul(coeff); + } res } @@ -69,10 +73,16 @@ impl + Poseidon, const D: usize> PoseidonMdsGate::MDS_MATRIX_EXPS[i], + ::MDS_MATRIX_CIRC[i], )); res = builder.scalar_mul_add_ext_algebra(coeff, v[(i + r) % SPONGE_WIDTH], res); } + { + let coeff = builder.constant_extension(F::Extension::from_canonical_u64( + ::MDS_MATRIX_DIAG[r], + )); + res = builder.scalar_mul_add_ext_algebra(coeff, v[r], res); + } res } diff --git a/plonky2/src/hash/arch/aarch64/mod.rs b/plonky2/src/hash/arch/aarch64/mod.rs index b8ae14af..ba86797d 100644 --- a/plonky2/src/hash/arch/aarch64/mod.rs +++ b/plonky2/src/hash/arch/aarch64/mod.rs @@ -1,2 +1,2 @@ -#[cfg(target_feature = "neon")] -pub(crate) mod poseidon_goldilocks_neon; +// #[cfg(target_feature = "neon")] +// pub(crate) mod poseidon_goldilocks_neon; diff --git a/plonky2/src/hash/arch/x86_64/mod.rs b/plonky2/src/hash/arch/x86_64/mod.rs index fa3681d0..0730b626 100644 --- a/plonky2/src/hash/arch/x86_64/mod.rs +++ b/plonky2/src/hash/arch/x86_64/mod.rs @@ -1,5 +1,5 @@ -// Requires: -// - AVX2 -// - BMI2 (for MULX and SHRX) -#[cfg(all(target_feature = "avx2", target_feature = "bmi2"))] -pub(crate) mod poseidon_goldilocks_avx2_bmi2; +// // Requires: +// // - AVX2 +// // - BMI2 (for MULX and SHRX) +// #[cfg(all(target_feature = "avx2", target_feature = "bmi2"))] +// pub(crate) mod poseidon_goldilocks_avx2_bmi2; diff --git a/plonky2/src/hash/poseidon.rs b/plonky2/src/hash/poseidon.rs index 09c5d2fc..9c202834 100644 --- a/plonky2/src/hash/poseidon.rs +++ b/plonky2/src/hash/poseidon.rs @@ -55,96 +55,96 @@ pub const ALL_ROUND_CONSTANTS: [u64; MAX_WIDTH * N_ROUNDS] = [ // WARNING: If these are changed in any way, then all the // implementations of Poseidon must be regenerated. See comments // in `poseidon_goldilocks.rs`. - 0xb585f767417ee042, 0x7746a55f77c10331, 0xb2fb0d321d356f7a, 0x0f6760a486f1621f, - 0xe10d6666b36abcdf, 0x8cae14cb455cc50b, 0xd438539cf2cee334, 0xef781c7d4c1fd8b4, - 0xcdc4a23a0aca4b1f, 0x277fa208d07b52e3, 0xe17653a300493d38, 0xc54302f27c287dc1, - 0x8628782231d47d10, 0x59cd1a8a690b49f2, 0xc3b919ad9efec0b0, 0xa484c4c637641d97, - 0x308bbd23f191398b, 0x6e4a40c1bf713cf1, 0x9a2eedb7510414fb, 0xe360c6e111c2c63b, - 0xd5c771901d4d89aa, 0xc35eae076e7d6b2f, 0x849c2656d0a09cad, 0xc0572c8c5cf1df2b, - 0xe9fa634a883b8bf3, 0xf56f6d4900fb1fdd, 0xf7d713e872a72a1b, 0x8297132b6ba47612, - 0xad6805e12ee8af1c, 0xac51d9f6485c22b9, 0x502ad7dc3bd56bf8, 0x57a1550c3761c577, - 0x66bbd30e99d311da, 0x0da2abef5e948f87, 0xf0612750443f8e94, 0x28b8ec3afb937d8c, - 0x92a756e6be54ca18, 0x70e741ec304e925d, 0x019d5ee2b037c59f, 0x6f6f2ed7a30707d1, - 0x7cf416d01e8c169c, 0x61df517bb17617df, 0x85dc499b4c67dbaa, 0x4b959b48dad27b23, - 0xe8be3e5e0dd779a0, 0xf5c0bc1e525ed8e6, 0x40b12cbf263cf853, 0xa637093f13e2ea3c, - 0x3cc3f89232e3b0c8, 0x2e479dc16bfe86c0, 0x6f49de07d6d39469, 0x213ce7beecc232de, - 0x5b043134851fc00a, 0xa2de45784a861506, 0x7103aaf97bed8dd5, 0x5326fc0dbb88a147, - 0xa9ceb750364cb77a, 0x27f8ec88cc9e991f, 0xfceb4fda8c93fb83, 0xfac6ff13b45b260e, - 0x7131aa455813380b, 0x93510360d5d68119, 0xad535b24fb96e3db, 0x4627f5c6b7efc045, - 0x645cf794e4da78a9, 0x241c70ed1ac2877f, 0xacb8e076b009e825, 0x3737e9db6477bd9d, - 0xe7ea5e344cd688ed, 0x90dee4a009214640, 0xd1b1edf7c77e74af, 0x0b65481bab42158e, - 0x99ad1aab4b4fe3e7, 0x438a7c91f1a360cd, 0xb60de3bd159088bf, 0xc99cab6b47a3e3bb, - 0x69a5ed92d5677cef, 0x5e7b329c482a9396, 0x5fc0ac0829f893c9, 0x32db82924fb757ea, - 0x0ade699c5cf24145, 0x7cc5583b46d7b5bb, 0x85df9ed31bf8abcb, 0x6604df501ad4de64, - 0xeb84f60941611aec, 0xda60883523989bd4, 0x8f97fe40bf3470bf, 0xa93f485ce0ff2b32, - 0x6704e8eebc2afb4b, 0xcee3e9ac788ad755, 0x510d0e66062a270d, 0xf6323f48d74634a0, - 0x0b508cdf04990c90, 0xf241708a4ef7ddf9, 0x60e75c28bb368f82, 0xa6217d8c3f0f9989, - 0x7159cd30f5435b53, 0x839b4e8fe97ec79f, 0x0d3f3e5e885db625, 0x8f7d83be1daea54b, - 0x780f22441e8dbc04, 0xeb9158465aedacd3, 0xd19e120d826c1b6c, 0x016ee53a7f007110, - 0xcb5fd54ed22dd1ca, 0xacb84178c58de144, 0x9c22190c2c463227, 0x5d693c1bcc98406d, - 0xdcef0798235f321a, 0x3d639263f55e0b1e, 0xe273fd977edb8fda, 0x418f027049d10fe7, - 0x8c25fda3f253a284, 0x2cbaed4dc25a884e, 0x5f58e6aff78dc2af, 0x284650ac6fb9d206, - 0x635b337f1391c13c, 0x9f9a036f1ac6361f, 0xb93e260cff6747b4, 0xb0a7eae8c7272e33, - 0xd0762cbce7da0a9f, 0x34c6efb829c754d6, 0x40bf0ab6166855c1, 0xb6b570fccc46a242, - 0x5a27b90055549545, 0xb1a5b166048b306f, 0x8722e0ad24f1006d, 0x788ee3b3b315049a, - 0x14a726661e5b0351, 0x98b7672fe1c3f13e, 0xbb93ae77bdc3aa8f, 0x28fd3b04756fc222, - 0x30a46805a86d7109, 0x337dc00c7844a0e7, 0xd5eca245253c861b, 0x77626382990d8546, - 0xc1e434bf33c3ae7a, 0x0299351a54dbf35e, 0xb2d456e4fb620184, 0x3e9ed1fdc00265ea, - 0x2972a92bb672e8db, 0x20216dd789f333ec, 0xadffe8cf746494a1, 0x1c4dbb1c5889d420, - 0x15a16a8a8c9972f5, 0x388a128b98960e26, 0x2300e5d6ca3e5589, 0x2f63aa865c9ceb9f, - 0xf1c36ce8d894420f, 0x271811252953f84a, 0xe5840293d5466a8e, 0x4d9bbc3e24e5f20e, - 0xea35bc29cfa2794b, 0x18e21b4bf59e2d28, 0x1e3b9fc632ef6adb, 0x25d643627a05e678, - 0x5a3f1bb1ecb63263, 0xdb7f0238ca031e31, 0xb462065960bfc4c4, 0x49c24ae463c280f4, - 0xd793862c6f7b901a, 0xaadd1106bdce475e, 0xc43b6e0eed8ad58f, 0xe29024c1f2060cb7, - 0x5e50c2755efbe17a, 0x10383f20ac183625, 0x38e8ee9d8a8a435d, 0xdd511837bcc52452, - 0x7750059861a7da6a, 0x86ab99b518d1dbef, 0xb1204f608ccfe33b, 0xef61ac84d8dfca49, - 0x1bbcd90f1f4eff36, 0x0cd1dabd9be9850a, 0x11a3ae5bf354bb11, 0xf755bfef11bb5516, - 0xa3b832506e2f3adb, 0x516306f4b617e6ba, 0xddb4ac4a2aeead3a, 0x64bb6dec62af4430, - 0xf9cc95c29895a152, 0x08d37f75632771b9, 0xeec49b619cee6b56, 0xf143933b56b3711a, - 0xe4c5dd82b9f6570c, 0xe7ad775756eefdc4, 0x92c2318bc834ef78, 0x739c25f93007aa0a, - 0x5636caca1725f788, 0xdd8f909af47cd0b6, 0xc6401fe16bc24d4e, 0x8ad97b342e6b3a3c, - 0x0c49366bb7be8ce2, 0x0784d3d2f4b39fb5, 0x530fb67ec5d77a58, 0x41049229b8221f3b, - 0x139542347cb606a3, 0x9cb0bd5ee62e6438, 0x02e3f615c4d3054a, 0x985d4f4adefb64a0, - 0x775b9feb32053cde, 0x304265a64d6c1ba6, 0x593664c3be7acd42, 0x4f0a2e5fd2bd6718, - 0xdd611f10619bf1da, 0xd8185f9b3e74f9a4, 0xef87139d126ec3b3, 0x3ba71336dd67f99b, - 0x7d3a455d8d808091, 0x660d32e15cbdecc7, 0x297a863f5af2b9ff, 0x90e0a736e6b434df, - 0x549f80ce7a12182e, 0x0f73b29235fb5b84, 0x16bf1f74056e3a01, 0x6d1f5a593019a39f, - 0x02ff876fa73f6305, 0xc5cb72a2fb9a5bd7, 0x8470f39d674dfaa3, 0x25abb3f1e41aea30, - 0x23eb8cc9c32951c7, 0xd687ba56242ac4ea, 0xda8d9e915d2de6b7, 0xe3cbdc7d938d8f1e, - 0xb9a8c9b4001efad6, 0xc0d28a5c64f2285c, 0x45d7ac9b878575b8, 0xeeb76e39d8da283e, - 0x3d06c8bd2fc7daac, 0x9c9c9820c13589f5, 0x65700b51db40bae3, 0x911f451579044242, - 0x7ae6849ff1fee8cc, 0x3bb340ebba896ae5, 0xb46e9d8bb71f0b4b, 0x8dcf22f9e1bde2a3, - 0x77bdaeda8cc55427, 0xf19e400ababa0e12, 0xc368a34939eb5c7f, 0x9ef1cd612c03bc5e, - 0xe89cd8553b94bbd8, 0x5cd377dcb4550713, 0xa7b0fb78cd4c5665, 0x7684403ef76c7128, - 0x5fa3f06f79c4f483, 0x8df57ac159dbade6, 0x2db01efa321b2625, 0x54846de4cfd58cb6, - 0xba674538aa20f5cd, 0x541d4963699f9777, 0xe9096784dadaa548, 0xdfe8992458bf85ff, - 0xece5a71e74a35593, 0x5ff98fd5ff1d14fd, 0x83e89419524c06e1, 0x5922040b6ef03286, - 0xf97d750eab002858, 0x5080d4c2dba7b3ec, 0xa7de115ba038b508, 0x6a9242acb5f37ec0, - 0xf7856ef865619ed0, 0x2265fc930dbd7a89, 0x17dfc8e5022c723b, 0x9001a64248f2d676, - 0x90004c13b0b8b50e, 0xb932b7cfc63485b0, 0xa0b1df81fd4c2bc5, 0x8ef1dd26b594c383, - 0x0541a4f9d20ba562, 0x9e611061be0a3c5b, 0xb3767e80e1e1624a, 0x0098d57820a88c6b, - 0x31d191cd71e01691, 0x410fefafbf90a57a, 0xbdf8f2433633aea8, 0x9e8cd55b9cc11c28, - 0xde122bec4acb869f, 0x4d001fd5b0b03314, 0xca66370067416209, 0x2f2339d6399888c6, - 0x6d1a7918f7c98a13, 0xdf9a493995f688f3, 0xebc2151f4ded22ca, 0x03cc2ba8a2bab82f, - 0xd341d03844ad9a9b, 0x387cb5d273ab3f58, 0xbba2515f74a7a221, 0x7248fe7737f37d9c, - 0x4d61e56a7437f6b9, 0x262e963c9e54bef8, 0x59e89b097477d296, 0x055d5b52b9e47452, - 0x82b27eb36e430708, 0xd30094caf3080f94, 0xcf5cb38227c2a3be, 0xfeed4db701262c7c, - 0x41703f5391dd0154, 0x5eeea9412666f57b, 0x4cd1f1b196abdbc4, 0x4a20358594b3662b, - 0x1478d361e4b47c26, 0x6f02dc0801d2c79f, 0x296a202eeb03c4b6, 0x2afd6799aec20c38, - 0x7acfd96f3050383d, 0x6798ba0c380dfdd3, 0x34c6f57b3de02c88, 0x5736e1baf82eb8a0, - 0x20057d2a0e58b8de, 0x3dea5bd5eb6e1404, 0x16e50d89874a6a98, 0x29bff3eccbfba19a, - 0x475cd3207974793c, 0x18a42105cde34cfa, 0x023e7414b0618331, 0x151471081b52594b, - 0xe4a3dff23bdeb0f3, 0x01a8d1a588c232ef, 0x11b4c74ee221d621, 0xe587cc0dce129c8c, - 0x1ff7327025a65080, 0x594e29c44b8602b1, 0xf6f31db1f5a56fd3, 0xc02ac5e4c7258a5e, - 0xe70201e9c5dc598f, 0x6f90ff3b9b3560b2, 0x42747a7262faf016, 0xd1f507e496927d26, - 0x1c86d265fdd24cd9, 0x3996ce73f6b5266e, 0x8e7fba02d68a061e, 0xba0dec71548b7546, - 0x9e9cbd785b8d8f40, 0xdae86459f6b3828c, 0xdebe08541314f71d, 0xa49229d29501358f, - 0x7be5ba0010c4df7c, 0xa3c95eaf09ecc39c, 0x0230bca8f5d457cd, 0x4135c2bedc68cdf9, - 0x166fc0cc4d5b20cc, 0x3762b59aa3236e6e, 0xe8928a4ceed163d2, 0x2a440b51b71223d9, - 0x80cefd2bb5f48e46, 0xbb9879c738328b71, 0x6e7c8f1ab47cced0, 0x164bb2de257ffc0a, - 0xf3c12fe5b800ea30, 0x40b9e92309e8c7e1, 0x551f5b0fe3b8d017, 0x25032aa7d4fc7aba, - 0xaaed340795de0a0a, 0x8ffd96bc38c8ba0f, 0x70fc91eb8aa58833, 0x7f795e2a97566d73, - 0x4543d9df72c4831d, 0xf172d73e69f20739, 0xdfd1c4ff1eb3d868, 0xbc8dfb62d26376f7, + 0xb585f766f2144405, 0x7746a55f43921ad7, 0xb2fb0d31cee799b4, 0x0f6760a4803427d7, + 0xe10d666650f4e012, 0x8cae14cb07d09bf1, 0xd438539c95f63e9f, 0xef781c7ce35b4c3d, + 0xcdc4a239b0c44426, 0x277fa208bf337bff, 0xe17653a29da578a1, 0xc54302f225db2c76, + 0x86287821f722c881, 0x59cd1a8a41c18e55, 0xc3b919ad495dc574, 0xa484c4c5ef6a0781, + 0x308bbd23dc5416cc, 0x6e4a40c18f30c09c, 0x9a2eedb70d8f8cfa, 0xe360c6e0ae486f38, + 0xd5c7718fbfc647fb, 0xc35eae071903ff0b, 0x849c2656969c4be7, 0xc0572c8c08cbbbad, + 0xe9fa634a21de0082, 0xf56f6d48959a600d, 0xf7d713e806391165, 0x8297132b32825daf, + 0xad6805e0e30b2c8a, 0xac51d9f5fcf8535e, 0x502ad7dc18c2ad87, 0x57a1550c110b3041, + 0x66bbd30e6ce0e583, 0x0da2abef589d644e, 0xf061274fdb150d61, 0x28b8ec3ae9c29633, + 0x92a756e67e2b9413, 0x70e741ebfee96586, 0x019d5ee2af82ec1c, 0x6f6f2ed772466352, + 0x7cf416cfe7e14ca1, 0x61df517b86a46439, 0x85dc499b11d77b75, 0x4b959b48b9c10733, + 0xe8be3e5da8043e57, 0xf5c0bc1de6da8699, 0x40b12cbf09ef74bf, 0xa637093ecb2ad631, + 0x3cc3f892184df408, 0x2e479dc157bf31bb, 0x6f49de07a6234346, 0x213ce7bede378d7b, + 0x5b0431345d4dea83, 0xa2de45780344d6a1, 0x7103aaf94a7bf308, 0x5326fc0d97279301, + 0xa9ceb74fec024747, 0x27f8ec88bb21b1a3, 0xfceb4fda1ded0893, 0xfac6ff1346a41675, + 0x7131aa45268d7d8c, 0x9351036095630f9f, 0xad535b24afc26bfb, 0x4627f5c6993e44be, + 0x645cf794b8f1cc58, 0x241c70ed0af61617, 0xacb8e076647905f1, 0x3737e9db4c4f474d, + 0xe7ea5e33e75fffb6, 0x90dee49fc9bfc23a, 0xd1b1edf76bc09c92, 0x0b65481ba645c602, + 0x99ad1aab0814283b, 0x438a7c91d416ca4d, 0xb60de3bcc5ea751c, 0xc99cab6aef6f58bc, + 0x69a5ed92a72ee4ff, 0x5e7b329c1ed4ad71, 0x5fc0ac0800144885, 0x32db829239774eca, + 0x0ade699c5830f310, 0x7cc5583b10415f21, 0x85df9ed2e166d64f, 0x6604df4fee32bcb1, + 0xeb84f608da56ef48, 0xda608834c40e603d, 0x8f97fe408061f183, 0xa93f485c96f37b89, + 0x6704e8ee8f18d563, 0xcee3e9ac1e072119, 0x510d0e65e2b470c1, 0xf6323f486b9038f0, + 0x0b508cdeffa5ceef, 0xf2417089e4fb3cbd, 0x60e75c2890d15730, 0xa6217d8bf660f29c, + 0x7159cd30c3ac118e, 0x839b4e8fafead540, 0x0d3f3e5e82920adc, 0x8f7d83bddee7bba8, + 0x780f2243ea071d06, 0xeb915845f3de1634, 0xd19e120d26b6f386, 0x016ee53a7e5fecc6, + 0xcb5fd54e7933e477, 0xacb8417879fd449f, 0x9c22190be7f74732, 0x5d693c1ba3ba3621, + 0xdcef0797c2b69ec7, 0x3d639263da827b13, 0xe273fd971bc8d0e7, 0x418f02702d227ed5, + 0x8c25fda3b503038c, 0x2cbaed4daec8c07c, 0x5f58e6afcdd6ddc2, 0x284650ac5e1b0eba, + 0x635b337ee819dab5, 0x9f9a036ed4f2d49f, 0xb93e260cae5c170e, 0xb0a7eae879ddb76d, + 0xd0762cbc8ca6570c, 0x34c6efb812b04bf5, 0x40bf0ab5fa14c112, 0xb6b570fc7c5740d3, + 0x5a27b9002de33454, 0xb1a5b165b6d2b2d2, 0x8722e0ace9d1be22, 0x788ee3b37e5680fb, + 0x14a726661551e284, 0x98b7672f9ef3b419, 0xbb93ae776bb30e3a, 0x28fd3b046380f850, + 0x30a4680593258387, 0x337dc00c61bd9ce1, 0xd5eca244c7a4ff1d, 0x7762638264d279bd, + 0xc1e434bedeefd767, 0x0299351a53b8ec22, 0xb2d456e4ad251b80, 0x3e9ed1fda49cea0b, + 0x2972a92ba450bed8, 0x20216dd77be493de, 0xadffe8cf28449ec6, 0x1c4dbb1c4c27d243, + 0x15a16a8a8322d458, 0x388a128b7fd9a609, 0x2300e5d6baedf0fb, 0x2f63aa8647e15104, + 0xf1c36ce86ecec269, 0x27181125183970c9, 0xe584029370dca96d, 0x4d9bbc3e02f1cfb2, + 0xea35bc29692af6f8, 0x18e21b4beabb4137, 0x1e3b9fc625b554f4, 0x25d64362697828fd, + 0x5a3f1bb1c53a9645, 0xdb7f023869fb8d38, 0xb462065911d4e1fc, 0x49c24ae4437d8030, + 0xd793862c112b0566, 0xaadd1106730d8feb, 0xc43b6e0e97b0d568, 0xe29024c18ee6fca2, + 0x5e50c27535b88c66, 0x10383f20a4ff9a87, 0x38e8ee9d71a45af8, 0xdd5118375bf1a9b9, + 0x775005982d74d7f7, 0x86ab99b4dde6c8b0, 0xb1204f603f51c080, 0xef61ac8470250ecf, + 0x1bbcd90f132c603f, 0x0cd1dabd964db557, 0x11a3ae5beb9d1ec9, 0xf755bfeea585d11d, + 0xa3b83250268ea4d7, 0x516306f4927c93af, 0xddb4ac49c9efa1da, 0x64bb6dec369d4418, + 0xf9cc95c22b4c1fcc, 0x08d37f755f4ae9f6, 0xeec49b613478675b, 0xf143933aed25e0b0, + 0xe4c5dd8255dfc622, 0xe7ad7756f193198e, 0x92c2318b87fff9cb, 0x739c25f8fd73596d, + 0x5636cac9f16dfed0, 0xdd8f909a938e0172, 0xc6401fe115063f5b, 0x8ad97b33f1ac1455, + 0x0c49366bb25e8513, 0x0784d3d2f1698309, 0x530fb67ea1809a81, 0x410492299bb01f49, + 0x139542347424b9ac, 0x9cb0bd5ea1a1115e, 0x02e3f615c38f49a1, 0x985d4f4a9c5291ef, + 0x775b9feafdcd26e7, 0x304265a6384f0f2d, 0x593664c39773012c, 0x4f0a2e5fb028f2ce, + 0xdd611f1000c17442, 0xd8185f9adfea4fd0, 0xef87139ca9a3ab1e, 0x3ba71336c34ee133, + 0x7d3a455d56b70238, 0x660d32e130182684, 0x297a863f48cd1f43, 0x90e0a736a751ebb7, + 0x549f80ce550c4fd3, 0x0f73b2922f38bd64, 0x16bf1f73fb7a9c3f, 0x6d1f5a59005bec17, + 0x02ff876fa5ef97c4, 0xc5cb72a2a51159b0, 0x8470f39d2d5c900e, 0x25abb3f1d39fcb76, + 0x23eb8cc9b372442f, 0xd687ba55c64f6364, 0xda8d9e90fd8ff158, 0xe3cbdc7d2fe45ea7, + 0xb9a8c9b3aee52297, 0xc0d28a5c10960bd3, 0x45d7ac9b68f71a34, 0xeeb76e397069e804, + 0x3d06c8bd1514e2d9, 0x9c9c98207cb10767, 0x65700b51aedfb5ef, 0x911f451539869408, + 0x7ae6849fbc3a0ec6, 0x3bb340eba06afe7e, 0xb46e9d8b682ea65e, 0x8dcf22f9a3b34356, + 0x77bdaeda586257a7, 0xf19e400a5104d20d, 0xc368a348e46d950f, 0x9ef1cd60e679f284, + 0xe89cd854d5d01d33, 0x5cd377dc8bb882a2, 0xa7b0fb7883eee860, 0x7684403ec392950d, + 0x5fa3f06f4fed3b52, 0x8df57ac11bc04831, 0x2db01efa1e1e1897, 0x54846de4aadb9ca2, + 0xba6745385893c784, 0x541d496344d2c75b, 0xe909678474e687fe, 0xdfe89923f6c9c2ff, + 0xece5a71e0cfedc75, 0x5ff98fd5d51fe610, 0x83e8941918964615, 0x5922040b47f150c1, + 0xf97d750e3dd94521, 0x5080d4c2b86f56d7, 0xa7de115b56c78d70, 0x6a9242ac87538194, + 0xf7856ef7f9173e44, 0x2265fc92feb0dc09, 0x17dfc8e4f7ba8a57, 0x9001a64209f21db8, + 0x90004c1371b893c5, 0xb932b7cf752e5545, 0xa0b1df81b6fe59fc, 0x8ef1dd26770af2c2, + 0x0541a4f9cfbeed35, 0x9e61106178bfc530, 0xb3767e80935d8af2, 0x0098d5782065af06, + 0x31d191cd5c1466c7, 0x410fefafa319ac9d, 0xbdf8f242e316c4ab, 0x9e8cd55b57637ed0, + 0xde122bebe9a39368, 0x4d001fd58f002526, 0xca6637000eb4a9f8, 0x2f2339d624f91f78, + 0x6d1a7918c80df518, 0xdf9a4939342308e9, 0xebc2151ee6c8398c, 0x03cc2ba8a1116515, + 0xd341d037e840cf83, 0x387cb5d25af4afcc, 0xbba2515f22909e87, 0x7248fe7705f38e47, + 0x4d61e56a525d225a, 0x262e963c8da05d3d, 0x59e89b094d220ec2, 0x055d5b52b78b9c5e, + 0x82b27eb33514ef99, 0xd30094ca96b7ce7b, 0xcf5cb381cd0a1535, 0xfeed4db6919e5a7c, + 0x41703f53753be59f, 0x5eeea940fcde8b6f, 0x4cd1f1b175100206, 0x4a20358574454ec0, + 0x1478d361dbbf9fac, 0x6f02dc07d141875c, 0x296a202ed8e556a2, 0x2afd67999bf32ee5, + 0x7acfd96efa95491d, 0x6798ba0c0abb2c6d, 0x34c6f57b26c92122, 0x5736e1bad206b5de, + 0x20057d2a0056521b, 0x3dea5bd5d0578bd7, 0x16e50d897d4634ac, 0x29bff3ecb9b7a6e3, + 0x475cd3205a3bdcde, 0x18a42105c31b7e88, 0x023e7414af663068, 0x15147108121967d7, + 0xe4a3dff1d7d6fef9, 0x01a8d1a588085737, 0x11b4c74eda62beef, 0xe587cc0d69a73346, + 0x1ff7327017aa2a6e, 0x594e29c42473d06b, 0xf6f31db1899b12d5, 0xc02ac5e47312d3ca, + 0xe70201e960cb78b8, 0x6f90ff3b6a65f108, 0x42747a7245e7fa84, 0xd1f507e43ab749b2, + 0x1c86d265f15750cd, 0x3996ce73dd832c1c, 0x8e7fba02983224bd, 0xba0dec7103255dd4, + 0x9e9cbd781628fc5b, 0xdae8645996edd6a5, 0xdebe0853b1a1d378, 0xa49229d24d014343, + 0x7be5b9ffda905e1c, 0xa3c95eaec244aa30, 0x0230bca8f4df0544, 0x4135c2bebfe148c6, + 0x166fc0cc438a3c72, 0x3762b59a8ae83efa, 0xe8928a4c89114750, 0x2a440b51a4945ee5, + 0x80cefd2b7d99ff83, 0xbb9879c6e61fd62a, 0x6e7c8f1a84265034, 0x164bb2de1bbeddc8, + 0xf3c12fe54d5c653b, 0x40b9e922ed9771e2, 0x551f5b0fbe7b1840, 0x25032aa7c4cb1811, + 0xaaed34074b164346, 0x8ffd96bbf9c9c81d, 0x70fc91eb5937085c, 0x7f795e2a5f915440, + 0x4543d9df5476d3cb, 0xf172d73e004fc90d, 0xdfd1c4febcc81238, 0xbc8dfb627fe558fc, ]; const WIDTH: usize = SPONGE_WIDTH; @@ -153,9 +153,10 @@ pub trait Poseidon: PrimeField64 { // times number of rounds. const N_ROUND_CONSTANTS: usize = WIDTH * N_ROUNDS; - // Use the MDS matrix which is circulant with entries 2^x for each - // x in MDS_MATRIX_EXPS. - const MDS_MATRIX_EXPS: [u64; WIDTH]; + // The MDS matrix we use is C + D, where C is the circulant matrix whose first row is given by + // `MDS_MATRIX_CIRC`, and D is the diagonal matrix whose diagonal is given by `MDS_MATRIX_DIAG`. + const MDS_MATRIX_CIRC: [u64; WIDTH]; + const MDS_MATRIX_DIAG: [u64; WIDTH]; // Precomputed constants for the fast Poseidon calculation. See // the paper. @@ -169,9 +170,10 @@ pub trait Poseidon: PrimeField64 { #[unroll_for_loops] fn mds_row_shf(r: usize, v: &[u64; WIDTH]) -> u128 { debug_assert!(r < WIDTH); - // The values of MDS_MATRIX_EXPS are known to be small, so we can - // accumulate all the products for each row and reduce just once - // at the end (done by the caller). + // The values of `MDS_MATRIX_CIRC` and `MDS_MATRIX_DIAG` are + // known to be small, so we can accumulate all the products for + // each row and reduce just once at the end (done by the + // caller). // NB: Unrolling this, calculating each term independently, and // summing at the end, didn't improve performance for me. @@ -180,9 +182,10 @@ pub trait Poseidon: PrimeField64 { // This is a hacky way of fully unrolling the loop. for i in 0..12 { if i < WIDTH { - res += (v[(i + r) % WIDTH] as u128) << Self::MDS_MATRIX_EXPS[i]; + res += (v[(i + r) % WIDTH] as u128) * (Self::MDS_MATRIX_CIRC[i] as u128); } } + res += (v[r] as u128) * (Self::MDS_MATRIX_DIAG[r] as u128); res } @@ -196,8 +199,9 @@ pub trait Poseidon: PrimeField64 { let mut res = F::ZERO; for i in 0..WIDTH { - res += v[(i + r) % WIDTH] * F::from_canonical_u64(1 << Self::MDS_MATRIX_EXPS[i]); + res += v[(i + r) % WIDTH] * F::from_canonical_u64(Self::MDS_MATRIX_CIRC[i]); } + res += v[r] * F::from_canonical_u64(Self::MDS_MATRIX_DIAG[r]); res } @@ -215,9 +219,13 @@ pub trait Poseidon: PrimeField64 { let mut res = builder.zero_extension(); for i in 0..WIDTH { - let c = Self::from_canonical_u64(1 << ::MDS_MATRIX_EXPS[i]); + let c = Self::from_canonical_u64(::MDS_MATRIX_CIRC[i]); res = builder.mul_const_add_extension(c, v[(i + r) % WIDTH], res); } + { + let c = Self::from_canonical_u64(::MDS_MATRIX_DIAG[r]); + res = builder.mul_const_add_extension(c, v[r], res); + } res } @@ -395,7 +403,8 @@ pub trait Poseidon: PrimeField64 { } } let s0 = state[0].to_noncanonical_u64() as u128; - d_sum = add_u160_u128(d_sum, s0 << Self::MDS_MATRIX_EXPS[0]); + let mds0to0 = (Self::MDS_MATRIX_CIRC[0] + Self::MDS_MATRIX_DIAG[0]) as u128; + d_sum = add_u160_u128(d_sum, s0 * mds0to0); let d = reduce_u160::(d_sum); // result = [d] concat [state[0] * v + state[shift up by 1]] @@ -416,7 +425,8 @@ pub trait Poseidon: PrimeField64 { r: usize, ) -> [F; WIDTH] { let s0 = state[0]; - let mut d = s0 * F::from_canonical_u64(1 << Self::MDS_MATRIX_EXPS[0]); + let mds0to0 = Self::MDS_MATRIX_CIRC[0] + Self::MDS_MATRIX_DIAG[0]; + let mut d = s0 * F::from_canonical_u64(mds0to0); for i in 1..WIDTH { let t = F::from_canonical_u64(Self::FAST_PARTIAL_ROUND_W_HATS[r][i - 1]); d += state[i] * t; @@ -442,10 +452,8 @@ pub trait Poseidon: PrimeField64 { Self: RichField + Extendable, { let s0 = state[0]; - let mut d = builder.mul_const_extension( - Self::from_canonical_u64(1 << ::MDS_MATRIX_EXPS[0]), - s0, - ); + let mds0to0 = Self::MDS_MATRIX_CIRC[0] + Self::MDS_MATRIX_DIAG[0]; + let mut d = builder.mul_const_extension(Self::from_canonical_u64(mds0to0), s0); for i in 1..WIDTH { let t = ::FAST_PARTIAL_ROUND_W_HATS[r][i - 1]; let t = Self::Extension::from_canonical_u64(t); diff --git a/plonky2/src/hash/poseidon_goldilocks.rs b/plonky2/src/hash/poseidon_goldilocks.rs index 7b82bb01..971fda0f 100644 --- a/plonky2/src/hash/poseidon_goldilocks.rs +++ b/plonky2/src/hash/poseidon_goldilocks.rs @@ -10,8 +10,8 @@ use crate::hash::poseidon::{Poseidon, N_PARTIAL_ROUNDS}; #[rustfmt::skip] impl Poseidon for GoldilocksField { - // The MDS matrix we use is the circulant matrix with first row given by the vector - // [ 2^x for x in MDS_MATRIX_EXPS] = [1, 1, 2, 1, 8, 32, 2, 256, 4096, 8, 65536, 1024] + // The MDS matrix we use is C + D, where C is the circulant matrix whose first row is given by + // `MDS_MATRIX_CIRC`, and D is the diagonal matrix whose diagonal is given by `MDS_MATRIX_DIAG`. // // WARNING: If the MDS matrix is changed, then the following // constants need to be updated accordingly: @@ -19,253 +19,254 @@ impl Poseidon for GoldilocksField { // - FAST_PARTIAL_ROUND_VS // - FAST_PARTIAL_ROUND_W_HATS // - FAST_PARTIAL_ROUND_INITIAL_MATRIX - const MDS_MATRIX_EXPS: [u64; 12] = [0, 0, 1, 0, 3, 5, 1, 8, 12, 3, 16, 10]; + const MDS_MATRIX_CIRC: [u64; 12] = [17, 15, 41, 16, 2, 28, 13, 13, 39, 18, 34, 20]; + const MDS_MATRIX_DIAG: [u64; 12] = [8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; const FAST_PARTIAL_FIRST_ROUND_CONSTANT: [u64; 12] = [ - 0x3cc3f89232e3b0c8, 0x3a8304bc56985013, 0x2a9f75c2280d2a8e, 0x53b9e0fac07c9b2b, - 0x276ef5190ab36dd6, 0xdccc95c1f434ce8d, 0x28d717d689301db6, 0x2662f1723650b872, - 0xc6b0375cf47850da, 0xbdfcca7661d81f17, 0x911992a4f6d9591f, 0xb718e4720c9f542f, + 0x3cc3f892184df408, 0xe993fd841e7e97f1, 0xf2831d3575f0f3af, 0xd2500e0a350994ca, + 0xc5571f35d7288633, 0x91d89c5184109a02, 0xf37f925d04e5667b, 0x2d6e448371955a69, + 0x740ef19ce01398a1, 0x694d24c0752fdf45, 0x60936af96ee2f148, 0xc33448feadc78f0c, ]; const FAST_PARTIAL_ROUND_CONSTANTS: [u64; N_PARTIAL_ROUNDS] = [ - 0x1c92804be083d129, 0x81d932f4620fcfc6, 0x29f58a72045f76a0, 0x434472d6c6e34f30, - 0xc82c90fad781bb5c, 0xe6dfefae3135c450, 0xd0a0c9c9fff4798f, 0x97517f4034e7c8e6, - 0xae8b5030952e5949, 0xf77251b77cc297e2, 0x879c3a97606f1160, 0xed4e1e98780bdc19, - 0x5a9120e0c05b1660, 0xc4b244ea04b27221, 0x7fe9d55a335d7b82, 0xd69ff91c66ec999a, - 0x4c389b1b8180f1f5, 0x1b289f8c7fdeea1e, 0x3d464c75140b20e7, 0x74d158e1be40eb73, - 0xfc787193d2a84ea4, 0x0, + 0x74cb2e819ae421ab, 0xd2559d2370e7f663, 0x62bf78acf843d17c, 0xd5ab7b67e14d1fb4, + 0xb9fe2ae6e0969bdc, 0xe33fdf79f92a10e8, 0x0ea2bb4c2b25989b, 0xca9121fbf9d38f06, + 0xbdd9b0aa81f58fa4, 0x83079fa4ecf20d7e, 0x650b838edfcc4ad3, 0x77180c88583c76ac, + 0xaf8c20753143a180, 0xb8ccfe9989a39175, 0x954a1729f60cc9c5, 0xdeb5b550c4dca53b, + 0xf01bb0b00f77011e, 0xa1ebb404b676afd9, 0x860b6e1597a0173e, 0x308bb65a036acbce, + 0x1aca78f31c97c876, 0x0, ]; const FAST_PARTIAL_ROUND_VS: [[u64; 12 - 1]; N_PARTIAL_ROUNDS] = [ - [0x9a5dd25dc32e6569, 0xd4b82de00e7510fa, 0x165bdcd7b344404a, 0xa85b4c126b8edfd4, - 0xcd2735bf92ab4f96, 0xdc07742c7da8ac41, 0x953fc266fc5ae49f, 0x0a151c20bfc847bf, - 0x0c550caef5afedb5, 0x74d28901888c5fa8, 0xdc51b68c30cc1741, ], - [0x4f765e0a4246c828, 0xbbdc8cbadd477a84, 0x052a5abd7de2344c, 0xab88daa04d9c7fab, - 0xbc8fd7acbee798ef, 0xe55d796c0d8a7a09, 0x40824732ed2c556c, 0x298a94d56eabeaa4, - 0x719fcd5e11312b6c, 0x1ec9a560131d1ac7, 0xabc54a42497f7fd1, ], - [0xb51f81e6eeeeb0d6, 0xc6f3c34e7161d1ef, 0x1e93b9e2255eed5b, 0xa78338e63ec48cc2, - 0xea6e89d1c7220a56, 0xaa52f6a1c2814bc5, 0x5896b6395e09fba0, 0xf7fc97a18d5f1eee, - 0xf2712e64111823e8, 0x4f84821bf1f857f4, 0x02041415d72da206, ], - [0x39286a4a4a391e77, 0x4ac16c7bebc97214, 0x7427cbbcb895a01f, 0x2ef8491d0b14759b, - 0xbec7625ee20fa616, 0x7c64393faf749b6f, 0x0f61c751c9826dc5, 0x700e6f3ee8ccb8a7, - 0x5bdea3b447ef8667, 0xa0f569a5a6e97588, 0xcc9e78115d7cae2d, ], - [0x0933079ab678e5ee, 0xed6861bf33c54a28, 0x62503e6e1749a497, 0x745a9c65dea83ac6, - 0x20ce351f6e700cf0, 0x2ec0b18d30fafb8a, 0x0312f54c22b5f299, 0x5222977218fd6cd5, - 0x82662e8445868eec, 0xc4cab6335040265d, 0x12e5790e9efb9217, ], - [0x0d829aec63871f55, 0x384d8a425086dd8c, 0x13e78b54657bfd3e, 0x2a45a17a03093566, - 0x7b6872656233b9be, 0xddc0281bb12bbb4c, 0xa224ebff0652d7c8, 0xc5ca97207780ea5c, - 0x484236194d3586ba, 0x432a56d44a44f3f7, 0xc41f926f862fc532, ], - [0x9366cd7ed9ef5e06, 0xd7f941098175f223, 0x9af7dda3e1c9f2b1, 0x9a0ec6d0a03525f5, - 0x3ab244f4fb0fb387, 0xd8c4e357eb1d5778, 0xe62157e2e25edbbb, 0xafcd6630f841f1f8, - 0xc3969199738708fb, 0xa8224d311e6a551f, 0xc2c0a01fc655fd9f, ], - [0xd78498f2013cd9b6, 0x675d21a200b2908c, 0x70bfd23b9e88c707, 0x85472dcbcfd078e3, - 0x5658c961cfffd574, 0x89e05a2cda3ca315, 0x1b51ae1ff8186a9f, 0xca648f8c6c7822cb, - 0x7233c92647957f4d, 0x520bf21c62d37ffa, 0x897496c7407a2ca7, ], - [0x8e80cf5bca4eee19, 0x754779126bc1afcf, 0x07e887764b379cb0, 0x7dc7c14e12f91d5e, - 0xc8f5dab5fb6b0264, 0x1c842cf8021f9176, 0x69b56a7e2e2db2c0, 0xf30253f77fef3445, - 0x14bb3a62919efb99, 0xff9976d424a5d89c, 0x59dde7be0331a202, ], - [0xdbe04b62126330a2, 0x0409b2138da1eaec, 0x7bd4558eb2262691, 0xafa86cfa8d52b05b, - 0xb83f570197d8c584, 0xb3ded6cc13990ac1, 0xfd33937cb072c9e1, 0xe3b3989341d92952, - 0xd26e76d6ca949ad9, 0x35c89a8548f88e86, 0x8af785bd940c3b43, ], - [0xcbf3b86701c790da, 0x63634f67e29f4005, 0x008f903982363b81, 0xc2b07f99d6eb0229, - 0xa8344b83d15e2558, 0x880f4e5fd103b7b0, 0xd40eddb0a5929072, 0x476e27ccee571f49, - 0xe71439b4b989f9eb, 0x97e55074f852b2fe, 0xdd258c2137e1a2c5, ], - [0x982b90366d23259b, 0xb2667eacaa76b306, 0xecf233e82020ede1, 0x3cee7ac07d4a88c7, - 0x31428be2fe5a5854, 0xf1beea1d55c4c4db, 0x584fd6b580f1ffd2, 0x6e2381c3c8ba0d0b, - 0x21ab749cbafc0611, 0x8ed389f39aba3001, 0xa24ba694f2b42f13, ], - [0xdb30cd9db02606f9, 0x1b0d6736682ba257, 0x0d3bcdecf5808443, 0x31c330001dbd3dbd, - 0x9684d22370447946, 0xde0e24e6426c6935, 0xf487270dd081ef69, 0xd943f4ef48f2b252, - 0x4c52a7fdd1c52d24, 0xc293082029ea139d, 0xc2ba73ab3da0468a, ], - [0xd093bd0dcc74e0d1, 0xe91428f9ce6a98e5, 0x673dee716909dc21, 0xf22e3223548219d7, - 0x3297978d881a1300, 0x51157b1e8218d77c, 0x0e3b0a5c07843889, 0x273b48dfa36752b6, - 0x5dbf2c6323576866, 0x1c032b70763df9a7, 0x1a8d7ed4159ecbf4, ], - [0x8e40b29fa6c4f3ad, 0x43bc06dba91daa9b, 0x445df1620dd6d846, 0xae1e72ed68c45c46, - 0x496ee4e593ade46d, 0x1d3642eddce9118f, 0x71a88114bd8fd755, 0x4a10d6b22514943d, - 0x56dca305d4d72fee, 0xe2e4d9ce95fa62bf, 0xfb6bfffd47b50b0a, ], - [0x4c6c14946cc557ee, 0x9b1bcbaac7ba3226, 0xdd7410361fa0dd20, 0x9c8a098cbaf95b26, - 0x3da4f26593503adf, 0xffb07b45cd3bf859, 0xaf034373af54a559, 0xd6b9bace407146bb, - 0x7b92c04c972f4ec6, 0xfe71df71165b9845, 0xad0134b9dc9ebe51, ], - [0xfdaa64ceec88aa7c, 0x565342e2d815525c, 0xe382458f259429a8, 0x0f6ba5afd5d1d1ca, - 0xcba85de412439a41, 0x212d3c62049ccb1a, 0x930c0bf5950267e3, 0x60f87fe43fc560d8, - 0x8f1fbdbcd878a33b, 0xd28b789abf9af16f, 0xd921f0434fa0eb07, ], - [0xd69c2c80635e7c18, 0x5a3d78c8772f293f, 0x844fe5e72ad1ceb5, 0x81b217e5910dc916, - 0x2951409fb7c8ba85, 0x5c135dd95693e367, 0xc2e8a723f9f7ebd2, 0x10bb79bf5d63f38d, - 0x34625b1550385a89, 0xdc6235328d791163, 0x1eb12b7aed4d5133, ], - [0x01426faca89577d0, 0x003ca90136ac4fd0, 0x00289223dc45a17f, 0x0009921704320612, - 0x0007efae3669e451, 0x006499f206b3349d, 0x1001120d9b5dcfe1, 0x000e3aa47db4da94, - 0x0320dc8339d35692, 0x4030a0a16247ecbd, 0x04368a659c160a6b, ], - [0x0000001237b408f0, 0x00000004c8f1b79c, 0x0000000446de5309, 0x00000032a3e2d4ac, - 0x00000c007600eeb7, 0x000100040ee771b0, 0x00000198394d0817, 0x0000301810a981ba, - 0x0000030f37d86f5a, 0x0000030ab1cc04d4, 0x000000c0e7c0b7e9, ], - [0x00000000000234a0, 0x0000000000114630, 0x000000000800260c, 0x0000000100005288, - 0x0000000000900194, 0x00000000200800a3, 0x0000000002011034, 0x000000000105100e, - 0x0000000000604025, 0x0000000000114a03, 0x0000000000061481, ], - [0x0000000000000400, 0x0000000000010000, 0x0000000000000008, 0x0000000000001000, - 0x0000000000000100, 0x0000000000000002, 0x0000000000000020, 0x0000000000000008, - 0x0000000000000001, 0x0000000000000002, 0x0000000000000001, ], + [0x94877900674181c3, 0xc6c67cc37a2a2bbd, 0xd667c2055387940f, 0x0ba63a63e94b5ff0, + 0x99460cc41b8f079f, 0x7ff02375ed524bb3, 0xea0870b47a8caf0e, 0xabcad82633b7bc9d, + 0x3b8d135261052241, 0xfb4515f5e5b0d539, 0x3ee8011c2b37f77c, ], + [0x0adef3740e71c726, 0xa37bf67c6f986559, 0xc6b16f7ed4fa1b00, 0x6a065da88d8bfc3c, + 0x4cabc0916844b46f, 0x407faac0f02e78d1, 0x07a786d9cf0852cf, 0x42433fb6949a629a, + 0x891682a147ce43b0, 0x26cfd58e7b003b55, 0x2bbf0ed7b657acb3, ], + [0x481ac7746b159c67, 0xe367de32f108e278, 0x73f260087ad28bec, 0x5cfc82216bc1bdca, + 0xcaccc870a2663a0e, 0xdb69cd7b4298c45d, 0x7bc9e0c57243e62d, 0x3cc51c5d368693ae, + 0x366b4e8cc068895b, 0x2bd18715cdabbca4, 0xa752061c4f33b8cf, ], + [0xb22d2432b72d5098, 0x9e18a487f44d2fe4, 0x4b39e14ce22abd3c, 0x9e77fde2eb315e0d, + 0xca5e0385fe67014d, 0x0c2cb99bf1b6bddb, 0x99ec1cd2a4460bfe, 0x8577a815a2ff843f, + 0x7d80a6b4fd6518a5, 0xeb6c67123eab62cb, 0x8f7851650eca21a5, ], + [0x11ba9a1b81718c2a, 0x9f7d798a3323410c, 0xa821855c8c1cf5e5, 0x535e8d6fac0031b2, + 0x404e7c751b634320, 0xa729353f6e55d354, 0x4db97d92e58bb831, 0xb53926c27897bf7d, + 0x965040d52fe115c5, 0x9565fa41ebd31fd7, 0xaae4438c877ea8f4, ], + [0x37f4e36af6073c6e, 0x4edc0918210800e9, 0xc44998e99eae4188, 0x9f4310d05d068338, + 0x9ec7fe4350680f29, 0xc5b2c1fdc0b50874, 0xa01920c5ef8b2ebe, 0x59fa6f8bd91d58ba, + 0x8bfc9eb89b515a82, 0xbe86a7a2555ae775, 0xcbb8bbaa3810babf, ], + [0x577f9a9e7ee3f9c2, 0x88c522b949ace7b1, 0x82f07007c8b72106, 0x8283d37c6675b50e, + 0x98b074d9bbac1123, 0x75c56fb7758317c1, 0xfed24e206052bc72, 0x26d7c3d1bc07dae5, + 0xf88c5e441e28dbb4, 0x4fe27f9f96615270, 0x514d4ba49c2b14fe, ], + [0xf02a3ac068ee110b, 0x0a3630dafb8ae2d7, 0xce0dc874eaf9b55c, 0x9a95f6cff5b55c7e, + 0x626d76abfed00c7b, 0xa0c1cf1251c204ad, 0xdaebd3006321052c, 0x3d4bd48b625a8065, + 0x7f1e584e071f6ed2, 0x720574f0501caed3, 0xe3260ba93d23540a, ], + [0xab1cbd41d8c1e335, 0x9322ed4c0bc2df01, 0x51c3c0983d4284e5, 0x94178e291145c231, + 0xfd0f1a973d6b2085, 0xd427ad96e2b39719, 0x8a52437fecaac06b, 0xdc20ee4b8c4c9a80, + 0xa2c98e9549da2100, 0x1603fe12613db5b6, 0x0e174929433c5505, ], + [0x3d4eab2b8ef5f796, 0xcfff421583896e22, 0x4143cb32d39ac3d9, 0x22365051b78a5b65, + 0x6f7fd010d027c9b6, 0xd9dd36fba77522ab, 0xa44cf1cb33e37165, 0x3fc83d3038c86417, + 0xc4588d418e88d270, 0xce1320f10ab80fe2, 0xdb5eadbbec18de5d, ], + [0x1183dfce7c454afd, 0x21cea4aa3d3ed949, 0x0fce6f70303f2304, 0x19557d34b55551be, + 0x4c56f689afc5bbc9, 0xa1e920844334f944, 0xbad66d423d2ec861, 0xf318c785dc9e0479, + 0x99e2032e765ddd81, 0x400ccc9906d66f45, 0xe1197454db2e0dd9, ], + [0x84d1ecc4d53d2ff1, 0xd8af8b9ceb4e11b6, 0x335856bb527b52f4, 0xc756f17fb59be595, + 0xc0654e4ea5553a78, 0x9e9a46b61f2ea942, 0x14fc8b5b3b809127, 0xd7009f0f103be413, + 0x3e0ee7b7a9fb4601, 0xa74e888922085ed7, 0xe80a7cde3d4ac526, ], + [0x238aa6daa612186d, 0x9137a5c630bad4b4, 0xc7db3817870c5eda, 0x217e4f04e5718dc9, + 0xcae814e2817bd99d, 0xe3292e7ab770a8ba, 0x7bb36ef70b6b9482, 0x3c7835fb85bca2d3, + 0xfe2cdf8ee3c25e86, 0x61b3915ad7274b20, 0xeab75ca7c918e4ef, ], + [0xd6e15ffc055e154e, 0xec67881f381a32bf, 0xfbb1196092bf409c, 0xdc9d2e07830ba226, + 0x0698ef3245ff7988, 0x194fae2974f8b576, 0x7a5d9bea6ca4910e, 0x7aebfea95ccdd1c9, + 0xf9bd38a67d5f0e86, 0xfa65539de65492d8, 0xf0dfcbe7653ff787, ], + [0x0bd87ad390420258, 0x0ad8617bca9e33c8, 0x0c00ad377a1e2666, 0x0ac6fc58b3f0518f, + 0x0c0cc8a892cc4173, 0x0c210accb117bc21, 0x0b73630dbb46ca18, 0x0c8be4920cbd4a54, + 0x0bfe877a21be1690, 0x0ae790559b0ded81, 0x0bf50db2f8d6ce31, ], + [0x000cf29427ff7c58, 0x000bd9b3cf49eec8, 0x000d1dc8aa81fb26, 0x000bc792d5c394ef, + 0x000d2ae0b2266453, 0x000d413f12c496c1, 0x000c84128cfed618, 0x000db5ebd48fc0d4, + 0x000d1b77326dcb90, 0x000beb0ccc145421, 0x000d10e5b22b11d1, ], + [0x00000e24c99adad8, 0x00000cf389ed4bc8, 0x00000e580cbf6966, 0x00000cde5fd7e04f, + 0x00000e63628041b3, 0x00000e7e81a87361, 0x00000dabe78f6d98, 0x00000efb14cac554, + 0x00000e5574743b10, 0x00000d05709f42c1, 0x00000e4690c96af1, ], + [0x0000000f7157bc98, 0x0000000e3006d948, 0x0000000fa65811e6, 0x0000000e0d127e2f, + 0x0000000fc18bfe53, 0x0000000fd002d901, 0x0000000eed6461d8, 0x0000001068562754, + 0x0000000fa0236f50, 0x0000000e3af13ee1, 0x0000000fa460f6d1, ], + [0x0000000011131738, 0x000000000f56d588, 0x0000000011050f86, 0x000000000f848f4f, + 0x00000000111527d3, 0x00000000114369a1, 0x00000000106f2f38, 0x0000000011e2ca94, + 0x00000000110a29f0, 0x000000000fa9f5c1, 0x0000000010f625d1, ], + [0x000000000011f718, 0x000000000010b6c8, 0x0000000000134a96, 0x000000000010cf7f, + 0x0000000000124d03, 0x000000000013f8a1, 0x0000000000117c58, 0x0000000000132c94, + 0x0000000000134fc0, 0x000000000010a091, 0x0000000000128961, ], + [0x0000000000001300, 0x0000000000001750, 0x000000000000114e, 0x000000000000131f, + 0x000000000000167b, 0x0000000000001371, 0x0000000000001230, 0x000000000000182c, + 0x0000000000001368, 0x0000000000000f31, 0x00000000000015c9, ], + [0x0000000000000014, 0x0000000000000022, 0x0000000000000012, 0x0000000000000027, + 0x000000000000000d, 0x000000000000000d, 0x000000000000001c, 0x0000000000000002, + 0x0000000000000010, 0x0000000000000029, 0x000000000000000f, ], ]; const FAST_PARTIAL_ROUND_W_HATS: [[u64; 12 - 1]; N_PARTIAL_ROUNDS] = [ - [0x54accab273d3aeca, 0x12fecae33b1f1da9, 0x573bb85449ea9a27, 0x6b5ddc139f172aad, - 0xd2b6d0ca34465d4c, 0x51cf0aafbddfc269, 0x6075e64679e7a403, 0x678316c041900ac9, - 0x10019c84b343fc57, 0xde5b81280922f644, 0x42490a86b2f2f305, ], - [0x337c5930f7bacc46, 0x334792a4f1afb921, 0xc97ea5f1426e540e, 0x5fc74568337bd780, - 0xfd5718cc391d80ef, 0xef90b77a337d923c, 0xb28561998f153fea, 0xed5f65b8894345aa, - 0x7e2aacb5985893a7, 0xcbde536cb644fcf0, 0x07338300a07fc43b, ], - [0xd4c9ad02fcc8b4c1, 0x2890dac7a1caa815, 0x7d62bc45c45f5db2, 0x0a902300db5deac2, - 0x663f3726307f62a4, 0x050bda7dc7d8eb3b, 0xd9db68f3f051c5b6, 0xc5110194a38210aa, - 0x403862136533be0e, 0x20039e053d9b227d, 0xe2c90d16262c5f3c, ], - [0x6578da963396c755, 0xea6b546e6bc1e86f, 0x4e562ef0c66c2be3, 0x35b839dae0f9d22e, - 0x4aab3d88857b058c, 0x4f7443e07ac462d3, 0x93c2c5bbc385e50f, 0xc0c0c5c8ea023ce2, - 0x8409c53d4b62965d, 0x0489f2258135dcd1, 0x32958358c736aec9, ], - [0xe13b50ca15b0a455, 0x9878071e2b5d4547, 0xb8e50d27b4172b30, 0xbf312f828d3ea142, - 0x5b8510573020e6e8, 0x7c3091c29d8d6afa, 0x7e2d900a50f194fa, 0xb236d5080d0b0409, - 0x08f148b6c3b99320, 0x679c6b9cadbe604c, 0x6b0313be2ad9b9f2, ], - [0x12038ac320459b0e, 0x7abd36c6b25cd8e0, 0x37cc3583930e5a13, 0xafe725c4446a691d, - 0x99d89ccadeb38d80, 0x96c820be5528ec36, 0x9b63969fdc84ede6, 0x8f8f21cf5ad78c48, - 0x1a4d3573bc3c2d8b, 0x9f5a7bd9e771866e, 0x5bcef938b72497fc, ], - [0x5f969817be6add7a, 0x572b04c1ae5a4c6d, 0x8d219b8fac9a287b, 0x4566b3c56372f434, - 0xdd3f46f108bf4441, 0xd7e1469baa3912c4, 0xac36377b68e071fc, 0xf348c609201d771a, - 0x0bb926a5e2ebdd96, 0x30efa780aee4705a, 0xb24ff2673691146a, ], - [0x5d0324b3a1dab6e2, 0xbd1491a0cc9e564b, 0xb8699e13b528ef99, 0x7743d9a8753ee023, - 0xce577363cdb5bcbc, 0xc056688d4f006774, 0x61f9363c10d7fdf2, 0x5f730e5530f6e06d, - 0x25efb9ef3adf0072, 0xcf971d58e21a8aa7, 0xd830d7e8d0d70680, ], - [0x36e69157ac42f39d, 0x3e7aca69ddf62d3e, 0xbbbef86cac42bb30, 0xa2e793ae56c27043, - 0x2a315dc4bc40c8a0, 0x84022758f3b3af55, 0x668809e74e7a470d, 0xf2d91eaafdee1820, - 0x50f19afd16d03294, 0x30c087d3223bcd4b, 0xf5739d95458cc633, ], - [0x15266b5a75028317, 0x8059f198c9f88799, 0x437a070386c65244, 0xc70e0bb73942929d, - 0xa8b32cb37ae137ea, 0xc2e556278323a459, 0xbc486da754091692, 0x7815a23467d6b541, - 0x3e6dba4e930e8be6, 0x6b4277b0915d56ba, 0x20212bfac7922ea0, ], - [0xeeba270c067b0c8b, 0xa4d576458941f29a, 0xecdf04a28c8c83be, 0xc808f0af215d7dda, - 0x424f4bfbecced0fb, 0xe4cbf6c0c10e58b3, 0x66a87bebfa09c031, 0x614ffc9443d5f0a4, - 0x96c96636f7b7975a, 0x58d4222a6f860cc5, 0x2d4f51c75bf50169, ], - [0xab43452aec55310f, 0x0a719e77ec2b398c, 0x8f946888a3f5f74f, 0x7b447e0d9f7ad4fb, - 0x7a2887ceb40ef226, 0x8840b904c1c49e50, 0xd91ea2510b0eaddc, 0x6617fa40a1a220fb, - 0xb1c41a72a845cb45, 0x02c2715281868092, 0xaf5b1b6c46ca37bd, ], - [0xe27649b9dbcbe631, 0x4afdf11d1d5e73b2, 0x05285a0e99160910, 0x23bfd6197ed8d3ba, - 0xb1e6292028792aab, 0xc997f6cc14e05cae, 0x34793ec255a555bd, 0xeb4f2da35a76dd03, - 0x767a5552c9910f3a, 0x4c4cc6987c30a447, 0x64da2b6920578f8d, ], - [0xe97ce2fecc0720ac, 0x99fc5741fcdeae8a, 0x0ac47be58b345692, 0x75a446121f2cccda, - 0xf38e40a102691c8e, 0xdbe5d707594714ef, 0x6ab183bdab92e450, 0x0aed83850dc10451, - 0x66e16941a4373c93, 0x22af15bb3e1034a1, 0xab2136f22ed23ccc, ], - [0xb0d3214d3c4c46c1, 0x3983bffd4053346c, 0xab1239b72a6a9e64, 0x669bcbda2406c089, - 0xf3118af8e563feda, 0x58323dbdd43a9c95, 0x5438aa910b51fd8c, 0xcbf071f9573f7e4f, - 0x476c8fde40075e51, 0xa10f54d3c77d8bed, 0xfecafe7ec7346beb, ], - [0x79e00c6916f68fa8, 0x80e39c20c11400d6, 0x242e2b46a7c116b7, 0xea660990074fcff6, - 0x18e3369da4c9272b, 0xfa6471be8be33b80, 0xede2ed2a83a4574a, 0x9e595d610deaaed6, - 0xc7d2cf35fcacdc58, 0xc65cf113a9af2302, 0x35a74c3d0cac5fde, ], - [0x35d6cf1a9aeabd4b, 0x4dc004b0b64954c3, 0xcb67ab54210b4c8f, 0xa2359b770621d28e, - 0x027a0a0a5e315bf6, 0xed6aad0492a86ef6, 0x127074e28969232c, 0x3e3d68e6354d396f, - 0x3cf204ab96edf7c6, 0x513a9050b70c18bf, 0x73b3b7399a3f5281, ], - [0x0af9319d5b7cd620, 0x0514fbcecd8a897d, 0x542dd32e46738f8d, 0x49248ae425e9bd45, - 0x8bb9ef7ac36e53ea, 0x97981020c414a723, 0xe587f186c024e0c8, 0x14f01dd28e990ad2, - 0x4d3fca72e19ea756, 0x01a3824f1ee8e7f1, 0xb048d25b575f250e, ], - [0xe78a4cfe6c6aa236, 0x4840deffdefd3b04, 0x6e0952d028e63e47, 0x249d49fb1d93304d, - 0xd41ce9ed49f7fbb3, 0xba255e808ea77466, 0x5ce52e6dc2005436, 0x8b5bf13acd881a04, - 0xf80f439f3ac011d1, 0x1d3618fb2cc3f916, 0xf41489c837e14938, ], - [0x41e065665af15054, 0x71752ac86d1bba64, 0x9bfddd30f8ceadeb, 0x4f59dd5e6c985767, - 0x8aa3e0718ecaa657, 0x355f734ed4199ca2, 0x110f361baec4d693, 0x283a46e9e134b5b1, - 0x4fda33376f5c6514, 0xcca192f9565e7d13, 0x2251835db1c24c39, ], - [0xc583f62f5970a849, 0xb6cc325741cd89dd, 0xf83288467f07ac1f, 0xfd82624964b845e7, - 0x11967e4e00a49fdd, 0x2fb200fae9f72577, 0xd6fb31913c7d5da7, 0xfad9ae578dd090cc, - 0xcd13b2be741ea5d8, 0xc1c54f9cf54b0c27, 0x29520a761b657cce, ], - [0x0ac0e496a2b39f4a, 0x20571abb59e27953, 0xe9971143579a1d30, 0x980359c3dba518cb, - 0x05ecee5a85b427c4, 0x4620dd90ad0b5366, 0x95c98f9c5b859365, 0x0fbb1806fbc56995, - 0xfe4526fd802afae2, 0x70e3786431084092, 0xa8d78a0494939111, ], + [0x3d999c961b7c63b0, 0x814e82efcd172529, 0x2421e5d236704588, 0x887af7d4dd482328, + 0xa5e9c291f6119b27, 0xbdc52b2676a4b4aa, 0x64832009d29bcf57, 0x09c4155174a552cc, + 0x463f9ee03d290810, 0xc810936e64982542, 0x043b1c289f7bc3ac, ], + [0x673655aae8be5a8b, 0xd510fe714f39fa10, 0x2c68a099b51c9e73, 0xa667bfa9aa96999d, + 0x4d67e72f063e2108, 0xf84dde3e6acda179, 0x40f9cc8c08f80981, 0x5ead032050097142, + 0x6591b02092d671bb, 0x00e18c71963dd1b7, 0x8a21bcd24a14218a, ], + [0x202800f4addbdc87, 0xe4b5bdb1cc3504ff, 0xbe32b32a825596e7, 0x8e0f68c5dc223b9a, + 0x58022d9e1c256ce3, 0x584d29227aa073ac, 0x8b9352ad04bef9e7, 0xaead42a3f445ecbf, + 0x3c667a1d833a3cca, 0xda6f61838efa1ffe, 0xe8f749470bd7c446, ], + [0xc5b85bab9e5b3869, 0x45245258aec51cf7, 0x16e6b8e68b931830, 0xe2ae0f051418112c, + 0x0470e26a0093a65b, 0x6bef71973a8146ed, 0x119265be51812daf, 0xb0be7356254bea2e, + 0x8584defff7589bd7, 0x3c5fe4aeb1fb52ba, 0x9e7cd88acf543a5e, ], + [0x179be4bba87f0a8c, 0xacf63d95d8887355, 0x6696670196b0074f, 0xd99ddf1fe75085f9, + 0xc2597881fef0283b, 0xcf48395ee6c54f14, 0x15226a8e4cd8d3b6, 0xc053297389af5d3b, + 0x2c08893f0d1580e2, 0x0ed3cbcff6fcc5ba, 0xc82f510ecf81f6d0, ], + [0x94b06183acb715cc, 0x500392ed0d431137, 0x861cc95ad5c86323, 0x05830a443f86c4ac, + 0x3b68225874a20a7c, 0x10b3309838e236fb, 0x9b77fc8bcd559e2c, 0xbdecf5e0cb9cb213, + 0x30276f1221ace5fa, 0x7935dd342764a144, 0xeac6db520bb03708, ], + [0x7186a80551025f8f, 0x622247557e9b5371, 0xc4cbe326d1ad9742, 0x55f1523ac6a23ea2, + 0xa13dfe77a3d52f53, 0xe30750b6301c0452, 0x08bd488070a3a32b, 0xcd800caef5b72ae3, + 0x83329c90f04233ce, 0xb5b99e6664a0a3ee, 0x6b0731849e200a7f, ], + [0xec3fabc192b01799, 0x382b38cee8ee5375, 0x3bfb6c3f0e616572, 0x514abd0cf6c7bc86, + 0x47521b1361dcc546, 0x178093843f863d14, 0xad1003c5d28918e7, 0x738450e42495bc81, + 0xaf947c59af5e4047, 0x4653fb0685084ef2, 0x057fde2062ae35bf, ], + [0xe376678d843ce55e, 0x66f3860d7514e7fc, 0x7817f3dfff8b4ffa, 0x3929624a9def725b, + 0x0126ca37f215a80a, 0xfce2f5d02762a303, 0x1bc927375febbad7, 0x85b481e5243f60bf, + 0x2d3c5f42a39c91a0, 0x0811719919351ae8, 0xf669de0add993131, ], + [0x7de38bae084da92d, 0x5b848442237e8a9b, 0xf6c705da84d57310, 0x31e6a4bdb6a49017, + 0x889489706e5c5c0f, 0x0e4a205459692a1b, 0xbac3fa75ee26f299, 0x5f5894f4057d755e, + 0xb0dc3ecd724bb076, 0x5e34d8554a6452ba, 0x04f78fd8c1fdcc5f, ], + [0x4dd19c38779512ea, 0xdb79ba02704620e9, 0x92a29a3675a5d2be, 0xd5177029fe495166, + 0xd32b3298a13330c1, 0x251c4a3eb2c5f8fd, 0xe1c48b26e0d98825, 0x3301d3362a4ffccb, + 0x09bb6c88de8cd178, 0xdc05b676564f538a, 0x60192d883e473fee, ], + [0x16b9774801ac44a0, 0x3cb8411e786d3c8e, 0xa86e9cf505072491, 0x0178928152e109ae, + 0x5317b905a6e1ab7b, 0xda20b3be7f53d59f, 0xcb97dedecebee9ad, 0x4bd545218c59f58d, + 0x77dc8d856c05a44a, 0x87948589e4f243fd, 0x7e5217af969952c2, ], + [0xbc58987d06a84e4d, 0x0b5d420244c9cae3, 0xa3c4711b938c02c0, 0x3aace640a3e03990, + 0x865a0f3249aacd8a, 0x8d00b2a7dbed06c7, 0x6eacb905beb7e2f8, 0x045322b216ec3ec7, + 0xeb9de00d594828e6, 0x088c5f20df9e5c26, 0xf555f4112b19781f, ], + [0xa8cedbff1813d3a7, 0x50dcaee0fd27d164, 0xf1cb02417e23bd82, 0xfaf322786e2abe8b, + 0x937a4315beb5d9b6, 0x1b18992921a11d85, 0x7d66c4368b3c497b, 0x0e7946317a6b4e99, + 0xbe4430134182978b, 0x3771e82493ab262d, 0xa671690d8095ce82, ], + [0xb035585f6e929d9d, 0xba1579c7e219b954, 0xcb201cf846db4ba3, 0x287bf9177372cf45, + 0xa350e4f61147d0a6, 0xd5d0ecfb50bcff99, 0x2e166aa6c776ed21, 0xe1e66c991990e282, + 0x662b329b01e7bb38, 0x8aa674b36144d9a9, 0xcbabf78f97f95e65, ], + [0xeec24b15a06b53fe, 0xc8a7aa07c5633533, 0xefe9c6fa4311ad51, 0xb9173f13977109a1, + 0x69ce43c9cc94aedc, 0xecf623c9cd118815, 0x28625def198c33c7, 0xccfc5f7de5c3636a, + 0xf5e6c40f1621c299, 0xcec0e58c34cb64b1, 0xa868ea113387939f, ], + [0xd8dddbdc5ce4ef45, 0xacfc51de8131458c, 0x146bb3c0fe499ac0, 0x9e65309f15943903, + 0x80d0ad980773aa70, 0xf97817d4ddbf0607, 0xe4626620a75ba276, 0x0dfdc7fd6fc74f66, + 0xf464864ad6f2bb93, 0x02d55e52a5d44414, 0xdd8de62487c40925, ], + [0xc15acf44759545a3, 0xcbfdcf39869719d4, 0x33f62042e2f80225, 0x2599c5ead81d8fa3, + 0x0b306cb6c1d7c8d0, 0x658c80d3df3729b1, 0xe8d1b2b21b41429c, 0xa1b67f09d4b3ccb8, + 0x0e1adf8b84437180, 0x0d593a5e584af47b, 0xa023d94c56e151c7, ], + [0x49026cc3a4afc5a6, 0xe06dff00ab25b91b, 0x0ab38c561e8850ff, 0x92c3c8275e105eeb, + 0xb65256e546889bd0, 0x3c0468236ea142f6, 0xee61766b889e18f2, 0xa206f41b12c30415, + 0x02fe9d756c9f12d1, 0xe9633210630cbf12, 0x1ffea9fe85a0b0b1, ], + [0x81d1ae8cc50240f3, 0xf4c77a079a4607d7, 0xed446b2315e3efc1, 0x0b0a6b70915178c3, + 0xb11ff3e089f15d9a, 0x1d4dba0b7ae9cc18, 0x65d74e2f43b48d05, 0xa2df8c6b8ae0804a, + 0xa4e6f0a8c33348a6, 0xc0a26efc7be5669b, 0xa6b6582c547d0d60, ], + [0x84afc741f1c13213, 0x2f8f43734fc906f3, 0xde682d72da0a02d9, 0x0bb005236adb9ef2, + 0x5bdf35c10a8b5624, 0x0739a8a343950010, 0x52f515f44785cfbc, 0xcbaf4e5d82856c60, + 0xac9ea09074e3e150, 0x8f0fa011a2035fb0, 0x1a37905d8450904a, ], + [0x3abeb80def61cc85, 0x9d19c9dd4eac4133, 0x075a652d9641a985, 0x9daf69ae1b67e667, + 0x364f71da77920a18, 0x50bd769f745c95b1, 0xf223d1180dbbf3fc, 0x2f885e584e04aa99, + 0xb69a0fa70aea684a, 0x09584acaa6e062a0, 0x0bc051640145b19b, ], ]; // NB: This is in ROW-major order to support cache-friendly pre-multiplication. const FAST_PARTIAL_ROUND_INITIAL_MATRIX: [[u64; 12 - 1]; 12 - 1] = [ - [0xb8dee12bf8e622dc, 0x2a0bcfdad25a7a77, 0x35f873e941f6055d, 0x99b7b85b6028982e, - 0x86d6993880e836f7, 0x1ef8de305b9c354d, 0x8b0a80ef933c37dc, 0x715c7164aacaf4a8, - 0x43845bd4f75ac7f5, 0x3e71bb7b0ec57a1a, 0xffc5b2f8946575c3, ], - [0x863ca0992eae09b0, 0x68901dfa3ecc7696, 0x6ba9546fc13ba8be, 0x555b7567255c9650, - 0x4570c6ac5e80551b, 0x8e440c6cc2d0ed18, 0xbad8ae4dbfba0799, 0x8b71ed9e65a6ed7a, - 0xaade0f9eb69ee576, 0xdebe1855920c6e64, 0x3e71bb7b0ec57a1a, ], - [0x2c3887c29246a985, 0x5aeb127ffeece78f, 0xa86e940514be2461, 0x2cb276ddf6094068, - 0x81e59e8f82a28b3c, 0x27bc037b1569fb52, 0x706ee8b692c2ebc7, 0xeba6949241aedb71, - 0xc416ad39f1f908f8, 0xaade0f9eb69ee576, 0x43845bd4f75ac7f5, ], - [0x03df3a62e1ea48d2, 0xbb484c2d408e9b12, 0x0fbf2169623ec24c, 0x50955930c2f9eb19, - 0x3dfc3cc6123745cc, 0xa2a8d3774d197b2c, 0xd16417e43d20feab, 0xd998a362dba538ba, - 0xeba6949241aedb71, 0x8b71ed9e65a6ed7a, 0x715c7164aacaf4a8, ], - [0xbbf73d77fc6c411c, 0xad7f124615d240ee, 0x4e413fcebe9020ee, 0x540bd8044c672f2b, - 0x6db739f6d2e9f37d, 0x9aa1b0a8f56ad33d, 0x53c179d92714378f, 0xd16417e43d20feab, - 0x706ee8b692c2ebc7, 0xbad8ae4dbfba0799, 0x8b0a80ef933c37dc, ], - [0xab92e860ecde7bdc, 0xa58fc91c605c26d5, 0xfbe68b79a8d5e0b9, 0x3e7edc1407cbd848, - 0xf69c76d11eaf57bf, 0x941ef2c6beace374, 0x9aa1b0a8f56ad33d, 0xa2a8d3774d197b2c, - 0x27bc037b1569fb52, 0x8e440c6cc2d0ed18, 0x1ef8de305b9c354d, ], - [0xb522132046b25eaf, 0x2b7b18e882c3e2c6, 0xe3322ad433ba15c8, 0x87355794faf87b1b, - 0x14f6e5ac86065fce, 0xf69c76d11eaf57bf, 0x6db739f6d2e9f37d, 0x3dfc3cc6123745cc, - 0x81e59e8f82a28b3c, 0x4570c6ac5e80551b, 0x86d6993880e836f7, ], - [0x0084dd11f5c0d55c, 0x9d664d307df18036, 0x1d80d847dca52945, 0xee3eecb9b2df1658, - 0x87355794faf87b1b, 0x3e7edc1407cbd848, 0x540bd8044c672f2b, 0x50955930c2f9eb19, - 0x2cb276ddf6094068, 0x555b7567255c9650, 0x99b7b85b6028982e, ], - [0xeb7c39655546eba5, 0xf07245b62d94cf71, 0x17db9b690f0031a3, 0x1d80d847dca52945, - 0xe3322ad433ba15c8, 0xfbe68b79a8d5e0b9, 0x4e413fcebe9020ee, 0x0fbf2169623ec24c, - 0xa86e940514be2461, 0x6ba9546fc13ba8be, 0x35f873e941f6055d, ], - [0xcb7fc57923717f84, 0x795a850bf5f9e397, 0xf07245b62d94cf71, 0x9d664d307df18036, - 0x2b7b18e882c3e2c6, 0xa58fc91c605c26d5, 0xad7f124615d240ee, 0xbb484c2d408e9b12, - 0x5aeb127ffeece78f, 0x68901dfa3ecc7696, 0x2a0bcfdad25a7a77, ], - [0x3107f5edca2f02b8, 0xcb7fc57923717f84, 0xeb7c39655546eba5, 0x0084dd11f5c0d55c, - 0xb522132046b25eaf, 0xab92e860ecde7bdc, 0xbbf73d77fc6c411c, 0x03df3a62e1ea48d2, - 0x2c3887c29246a985, 0x863ca0992eae09b0, 0xb8dee12bf8e622dc, ], + [0x80772dc2645b280b, 0xdc927721da922cf8, 0xc1978156516879ad, 0x90e80c591f48b603, + 0x3a2432625475e3ae, 0x00a2d4321cca94fe, 0x77736f524010c932, 0x904d3f2804a36c54, + 0xbf9b39e28a16f354, 0x3a1ded54a6cd058b, 0x42392870da5737cf, ], + [0xe796d293a47a64cb, 0xb124c33152a2421a, 0x0ee5dc0ce131268a, 0xa9032a52f930fae6, + 0x7e33ca8c814280de, 0xad11180f69a8c29e, 0xc75ac6d5b5a10ff3, 0xf0674a8dc5a387ec, + 0xb36d43120eaa5e2b, 0x6f232aab4b533a25, 0x3a1ded54a6cd058b, ], + [0xdcedab70f40718ba, 0x14a4a64da0b2668f, 0x4715b8e5ab34653b, 0x1e8916a99c93a88e, + 0xbba4b5d86b9a3b2c, 0xe76649f9bd5d5c2e, 0xaf8e2518a1ece54d, 0xdcda1344cdca873f, + 0xcd080204256088e5, 0xb36d43120eaa5e2b, 0xbf9b39e28a16f354, ], + [0xf4a437f2888ae909, 0xc537d44dc2875403, 0x7f68007619fd8ba9, 0xa4911db6a32612da, + 0x2f7e9aade3fdaec1, 0xe7ffd578da4ea43d, 0x43a608e7afa6b5c2, 0xca46546aa99e1575, + 0xdcda1344cdca873f, 0xf0674a8dc5a387ec, 0x904d3f2804a36c54, ], + [0xf97abba0dffb6c50, 0x5e40f0c9bb82aab5, 0x5996a80497e24a6b, 0x07084430a7307c9a, + 0xad2f570a5b8545aa, 0xab7f81fef4274770, 0xcb81f535cf98c9e9, 0x43a608e7afa6b5c2, + 0xaf8e2518a1ece54d, 0xc75ac6d5b5a10ff3, 0x77736f524010c932, ], + [0x7f8e41e0b0a6cdff, 0x4b1ba8d40afca97d, 0x623708f28fca70e8, 0xbf150dc4914d380f, + 0xc26a083554767106, 0x753b8b1126665c22, 0xab7f81fef4274770, 0xe7ffd578da4ea43d, + 0xe76649f9bd5d5c2e, 0xad11180f69a8c29e, 0x00a2d4321cca94fe, ], + [0x726af914971c1374, 0x1d7f8a2cce1a9d00, 0x18737784700c75cd, 0x7fb45d605dd82838, + 0x862361aeab0f9b6e, 0xc26a083554767106, 0xad2f570a5b8545aa, 0x2f7e9aade3fdaec1, + 0xbba4b5d86b9a3b2c, 0x7e33ca8c814280de, 0x3a2432625475e3ae, ], + [0x64dd936da878404d, 0x4db9a2ead2bd7262, 0xbe2e19f6d07f1a83, 0x02290fe23c20351a, + 0x7fb45d605dd82838, 0xbf150dc4914d380f, 0x07084430a7307c9a, 0xa4911db6a32612da, + 0x1e8916a99c93a88e, 0xa9032a52f930fae6, 0x90e80c591f48b603, ], + [0x85418a9fef8a9890, 0xd8a2eb7ef5e707ad, 0xbfe85ababed2d882, 0xbe2e19f6d07f1a83, + 0x18737784700c75cd, 0x623708f28fca70e8, 0x5996a80497e24a6b, 0x7f68007619fd8ba9, + 0x4715b8e5ab34653b, 0x0ee5dc0ce131268a, 0xc1978156516879ad, ], + [0x156048ee7a738154, 0x91f7562377e81df5, 0xd8a2eb7ef5e707ad, 0x4db9a2ead2bd7262, + 0x1d7f8a2cce1a9d00, 0x4b1ba8d40afca97d, 0x5e40f0c9bb82aab5, 0xc537d44dc2875403, + 0x14a4a64da0b2668f, 0xb124c33152a2421a, 0xdc927721da922cf8, ], + [0xd841e8ef9dde8ba0, 0x156048ee7a738154, 0x85418a9fef8a9890, 0x64dd936da878404d, + 0x726af914971c1374, 0x7f8e41e0b0a6cdff, 0xf97abba0dffb6c50, 0xf4a437f2888ae909, + 0xdcedab70f40718ba, 0xe796d293a47a64cb, 0x80772dc2645b280b, ], ]; - #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))] - #[inline] - fn poseidon(input: [Self; 12]) -> [Self; 12] { - unsafe { - crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::poseidon(&input) - } - } + // #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))] + // #[inline] + // fn poseidon(input: [Self; 12]) -> [Self; 12] { + // unsafe { + // crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::poseidon(&input) + // } + // } - #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))] - #[inline(always)] - fn constant_layer(state: &mut [Self; 12], round_ctr: usize) { - unsafe { - crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::constant_layer(state, round_ctr); - } - } + // #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))] + // #[inline(always)] + // fn constant_layer(state: &mut [Self; 12], round_ctr: usize) { + // unsafe { + // crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::constant_layer(state, round_ctr); + // } + // } - #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))] - #[inline(always)] - fn sbox_layer(state: &mut [Self; 12]) { - unsafe { - crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::sbox_layer(state); - } - } + // #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))] + // #[inline(always)] + // fn sbox_layer(state: &mut [Self; 12]) { + // unsafe { + // crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::sbox_layer(state); + // } + // } - #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))] - #[inline(always)] - fn mds_layer(state: &[Self; 12]) -> [Self; 12] { - unsafe { - crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::mds_layer(state) - } - } + // #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))] + // #[inline(always)] + // fn mds_layer(state: &[Self; 12]) -> [Self; 12] { + // unsafe { + // crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::mds_layer(state) + // } + // } - #[cfg(all(target_arch="aarch64", target_feature="neon"))] - #[inline] - fn poseidon(input: [Self; 12]) -> [Self; 12] { - unsafe { - crate::hash::arch::aarch64::poseidon_goldilocks_neon::poseidon(input) - } - } + // #[cfg(all(target_arch="aarch64", target_feature="neon"))] + // #[inline] + // fn poseidon(input: [Self; 12]) -> [Self; 12] { + // unsafe { + // crate::hash::arch::aarch64::poseidon_goldilocks_neon::poseidon(input) + // } + // } - #[cfg(all(target_arch="aarch64", target_feature="neon"))] - #[inline(always)] - fn sbox_layer(state: &mut [Self; 12]) { - unsafe { - crate::hash::arch::aarch64::poseidon_goldilocks_neon::sbox_layer(state); - } - } + // #[cfg(all(target_arch="aarch64", target_feature="neon"))] + // #[inline(always)] + // fn sbox_layer(state: &mut [Self; 12]) { + // unsafe { + // crate::hash::arch::aarch64::poseidon_goldilocks_neon::sbox_layer(state); + // } + // } - #[cfg(all(target_arch="aarch64", target_feature="neon"))] - #[inline(always)] - fn mds_layer(state: &[Self; 12]) -> [Self; 12] { - unsafe { - crate::hash::arch::aarch64::poseidon_goldilocks_neon::mds_layer(state) - } - } + // #[cfg(all(target_arch="aarch64", target_feature="neon"))] + // #[inline(always)] + // fn mds_layer(state: &[Self; 12]) -> [Self; 12] { + // unsafe { + // crate::hash::arch::aarch64::poseidon_goldilocks_neon::mds_layer(state) + // } + // } } #[cfg(test)] @@ -287,46 +288,28 @@ mod tests { let neg_one: u64 = F::NEG_ONE.to_canonical_u64(); - #[rustfmt::skip] - let _test_vectors8: Vec<([u64; 8], [u64; 8])> = vec![ - ([0, 0, 0, 0, 0, 0, 0, 0, ], - [0x649eec3229475d06, 0x72afe85b8b600222, 0x816d0a50ddd39228, 0x5083133a721a187c, - 0xbb69bd7d90c490a6, 0xea1d33a65d0a3287, 0xb4d27542d2fba3bc, 0xf9756d565d90c20a, ]), - ([0, 1, 2, 3, 4, 5, 6, 7, ], - [0xdfda4e2a7ec338f4, 0x3ac8d668054b1873, 0xeaaef2f72528e7ff, 0xee7bcc836ae165bc, - 0x95561d9377c3e696, 0x2e7d39c369dfccaa, 0x992178c050936f8f, 0x34e38ec33f572850, ]), - ([neg_one, neg_one, neg_one, neg_one, - neg_one, neg_one, neg_one, neg_one, ], - [0x9d8553546c658f67, 0xd5f6422aea26962b, 0xffb40b4db302da75, 0x34f43bbd7882c16c, - 0xccb375313fa146b0, 0x87574c332e89201a, 0x60e9e6c0c0be3a16, 0xf0e2a741e90756ba, ]), - ([0x016f2dde9ccdaf6f, 0x77e29cda821fece4, 0x2f6686f781255f78, 0xd2c4c9a53070b44f, - 0x4d7035c9fd01fc40, 0xc8d460945c91d509, 0x14855cd8a36a097f, 0x49f640d6a30f9cf0, ], - [0x4c3c58a3fac4ba05, 0x3f26fc2bcb33a3d4, 0xe13fcddcd7a136bb, 0x27b05be73a91e2f2, - 0x37804ed8ca07fcd5, 0xe78ec2f213e28456, 0xecf67d2aacb4dbe3, 0xad14575187c496ca, ]), - ]; - #[rustfmt::skip] let test_vectors12: Vec<([u64; 12], [u64; 12])> = vec![ ([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], - [0x3901858a44be6b3a, 0xb3470607c5f0ba0e, 0xb3b3ac3d89b37e8e, 0xd389513a7f6fe6e9, - 0x1eceb92f5da1c96b, 0x55d0bdfc6a842adf, 0x0112c568afb8819c, 0x6ac21107619569ee, - 0x3de33babbb421a85, 0x83688eb15ffe4ca3, 0x47e285b477551fa9, 0x1dd3dda781901271, ]), + [0x3c18a9786cb0b359, 0xc4055e3364a246c3, 0x7953db0ab48808f4, 0xc71603f33a1144ca, + 0xd7709673896996dc, 0x46a84e87642f44ed, 0xd032648251ee0b3c, 0x1c687363b207df62, + 0xdf8565563e8045fe, 0x40f5b37ff4254dae, 0xd070f637b431067c, 0x1792b1c4342109d7, ]), ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ], - [0x641772a94a77c7e5, 0x38d2cec9c47e7314, 0x3577218e825058c9, 0x1cdb3b4d22c54bcc, - 0x803234d4b16eb152, 0xbbb6c8438627c0f0, 0x1b219561c95a41fa, 0x9bdc97531bacc401, - 0x4251f4fac8271d9d, 0x0279ffa7ba5ce9aa, 0x63baf77c533b5874, 0xb7ada3e1f98b25e7, ]), + [0xd64e1e3efc5b8e9e, 0x53666633020aaa47, 0xd40285597c6a8825, 0x613a4f81e81231d2, + 0x414754bfebd051f0, 0xcb1f8980294a023f, 0x6eb2a9e4d54a9d0f, 0x1902bc3af467e056, + 0xf045d5eafdc6021f, 0xe4150f77caaa3be5, 0xc9bfd01d39b50cce, 0x5c0a27fcb0e1459b, ]), ([neg_one, neg_one, neg_one, neg_one, neg_one, neg_one, neg_one, neg_one, neg_one, neg_one, neg_one, neg_one, ], - [0xd2e4605ed1eb9613, 0x62510e8cbaf8a3b5, 0x64dc1e941dbaf46c, 0x1d6c5a5fd43cc4c5, - 0xac4b4f6bf503a6b4, 0x19e17983f5e52404, 0x927b08e033b29b6f, 0xa41bc2cb5ddb9bc0, - 0x270d528b1accc148, 0x022169acf46c71ae, 0xbbd4566e7b49ad7d, 0x0ed1ea54401533ef, ]), - ([0xa48728856b047229, 0xc43ab5e4aa986608, 0x715f470f075c057f, 0x36e955a095478013, - 0x7c036db7200ba52d, 0x20377cd3410dc7dc, 0x058c0956659b05b2, 0xa66c880ee57e8399, - 0xb06521c88afbd610, 0xdfa4d72ba95c8895, 0x25b403dac3622acc, 0xda607d79268a8fce, ], - [0xe85b56b0764df429, 0x7c0796201b43fe68, 0x231673b8300a6a16, 0x25db4745a952a677, - 0x01431a6817415a4d, 0xfdfbbe63602076eb, 0x82c643dabf1154c1, 0x896e7e87b3f3417d, - 0x27eca78818ef9c27, 0xf08c93583c24dc47, 0x1c9e1552c07a9f73, 0x7659179192cfdc88, ]), + [0xbe0085cfc57a8357, 0xd95af71847d05c09, 0xcf55a13d33c1c953, 0x95803a74f4530e82, + 0xfcd99eb30a135df1, 0xe095905e913a3029, 0xde0392461b42919b, 0x7d3260e24e81d031, + 0x10d3d0465d9deaa0, 0xa87571083dfc2a47, 0xe18263681e9958f8, 0xe28e96f1ae5e60d3, ]), + ([0x8ccbbbea4fe5d2b7, 0xc2af59ee9ec49970, 0x90f7e1a9e658446a, 0xdcc0630a3ab8b1b8, + 0x7ff8256bca20588c, 0x5d99a7ca0c44ecfb, 0x48452b17a70fbee3, 0xeb09d654690b6c88, + 0x4a55d3a39c676a88, 0xc0407a38d2285139, 0xa234bac9356386d1, 0xe1633f2bad98a52f, ], + [0xa89280105650c4ec, 0xab542d53860d12ed, 0x5704148e9ccab94f, 0xd3a826d4b62da9f5, + 0x8a7a6ca87892574f, 0xc7017e1cad1a674e, 0x1f06668922318e34, 0xa3b203bc8102676f, + 0xfcc781b0ce382bf2, 0x934c69ff3ed14ba5, 0x504688a5996e8f13, 0x401f3f2ed524a2ba, ]), ]; check_test_vectors::(test_vectors12); From bedd2aa711c2628c050a0ec86d9f4f0cbcf71a52 Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Sat, 19 Feb 2022 18:32:11 -0700 Subject: [PATCH 07/32] Rename arithmetic unit to ALU (#496) --- system_zero/src/{arithmetic => alu}/addition.rs | 2 +- system_zero/src/{arithmetic => alu}/division.rs | 2 +- system_zero/src/{arithmetic => alu}/mod.rs | 16 ++++++++-------- .../src/{arithmetic => alu}/multiplication.rs | 2 +- .../src/{arithmetic => alu}/subtraction.rs | 2 +- system_zero/src/lib.rs | 2 +- .../src/registers/{arithmetic.rs => alu.rs} | 10 +++++----- system_zero/src/registers/mod.rs | 6 +++--- system_zero/src/system_zero.rs | 12 +++++------- 9 files changed, 26 insertions(+), 28 deletions(-) rename system_zero/src/{arithmetic => alu}/addition.rs (98%) rename system_zero/src/{arithmetic => alu}/division.rs (96%) rename system_zero/src/{arithmetic => alu}/mod.rs (80%) rename system_zero/src/{arithmetic => alu}/multiplication.rs (96%) rename system_zero/src/{arithmetic => alu}/subtraction.rs (96%) rename system_zero/src/registers/{arithmetic.rs => alu.rs} (84%) diff --git a/system_zero/src/arithmetic/addition.rs b/system_zero/src/alu/addition.rs similarity index 98% rename from system_zero/src/arithmetic/addition.rs rename to system_zero/src/alu/addition.rs index 7aa0d81a..068092e8 100644 --- a/system_zero/src/arithmetic/addition.rs +++ b/system_zero/src/alu/addition.rs @@ -7,7 +7,7 @@ use plonky2::plonk::circuit_builder::CircuitBuilder; use plonky2::plonk::plonk_common::reduce_with_powers_ext_recursive; use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; -use crate::registers::arithmetic::*; +use crate::registers::alu::*; use crate::registers::NUM_COLUMNS; pub(crate) fn generate_addition(values: &mut [F; NUM_COLUMNS]) { diff --git a/system_zero/src/arithmetic/division.rs b/system_zero/src/alu/division.rs similarity index 96% rename from system_zero/src/arithmetic/division.rs rename to system_zero/src/alu/division.rs index e91288b9..f0d645f1 100644 --- a/system_zero/src/arithmetic/division.rs +++ b/system_zero/src/alu/division.rs @@ -6,7 +6,7 @@ use plonky2::iop::ext_target::ExtensionTarget; use plonky2::plonk::circuit_builder::CircuitBuilder; use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; -use crate::registers::arithmetic::*; +use crate::registers::alu::*; use crate::registers::NUM_COLUMNS; pub(crate) fn generate_division(values: &mut [F; NUM_COLUMNS]) { diff --git a/system_zero/src/arithmetic/mod.rs b/system_zero/src/alu/mod.rs similarity index 80% rename from system_zero/src/arithmetic/mod.rs rename to system_zero/src/alu/mod.rs index a2b3a4f8..17a12df1 100644 --- a/system_zero/src/arithmetic/mod.rs +++ b/system_zero/src/alu/mod.rs @@ -7,16 +7,16 @@ use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsume use starky::vars::StarkEvaluationTargets; use starky::vars::StarkEvaluationVars; -use crate::arithmetic::addition::{eval_addition, eval_addition_recursively, generate_addition}; -use crate::arithmetic::division::{eval_division, eval_division_recursively, generate_division}; -use crate::arithmetic::multiplication::{ +use crate::alu::addition::{eval_addition, eval_addition_recursively, generate_addition}; +use crate::alu::division::{eval_division, eval_division_recursively, generate_division}; +use crate::alu::multiplication::{ eval_multiplication, eval_multiplication_recursively, generate_multiplication, }; -use crate::arithmetic::subtraction::{ +use crate::alu::subtraction::{ eval_subtraction, eval_subtraction_recursively, generate_subtraction, }; use crate::public_input_layout::NUM_PUBLIC_INPUTS; -use crate::registers::arithmetic::*; +use crate::registers::alu::*; use crate::registers::NUM_COLUMNS; mod addition; @@ -24,7 +24,7 @@ mod division; mod multiplication; mod subtraction; -pub(crate) fn generate_arithmetic_unit(values: &mut [F; NUM_COLUMNS]) { +pub(crate) fn generate_alu(values: &mut [F; NUM_COLUMNS]) { if values[IS_ADD].is_one() { generate_addition(values); } else if values[IS_SUB].is_one() { @@ -36,7 +36,7 @@ pub(crate) fn generate_arithmetic_unit(values: &mut [F; NUM_COL } } -pub(crate) fn eval_arithmetic_unit>( +pub(crate) fn eval_alu>( vars: StarkEvaluationVars, yield_constr: &mut ConstraintConsumer

, ) { @@ -54,7 +54,7 @@ pub(crate) fn eval_arithmetic_unit>( eval_division(local_values, yield_constr); } -pub(crate) fn eval_arithmetic_unit_recursively, const D: usize>( +pub(crate) fn eval_alu_recursively, const D: usize>( builder: &mut CircuitBuilder, vars: StarkEvaluationTargets, yield_constr: &mut RecursiveConstraintConsumer, diff --git a/system_zero/src/arithmetic/multiplication.rs b/system_zero/src/alu/multiplication.rs similarity index 96% rename from system_zero/src/arithmetic/multiplication.rs rename to system_zero/src/alu/multiplication.rs index 70c181d8..a88b42f6 100644 --- a/system_zero/src/arithmetic/multiplication.rs +++ b/system_zero/src/alu/multiplication.rs @@ -6,7 +6,7 @@ use plonky2::iop::ext_target::ExtensionTarget; use plonky2::plonk::circuit_builder::CircuitBuilder; use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; -use crate::registers::arithmetic::*; +use crate::registers::alu::*; use crate::registers::NUM_COLUMNS; pub(crate) fn generate_multiplication(values: &mut [F; NUM_COLUMNS]) { diff --git a/system_zero/src/arithmetic/subtraction.rs b/system_zero/src/alu/subtraction.rs similarity index 96% rename from system_zero/src/arithmetic/subtraction.rs rename to system_zero/src/alu/subtraction.rs index 267bac72..8f8bb810 100644 --- a/system_zero/src/arithmetic/subtraction.rs +++ b/system_zero/src/alu/subtraction.rs @@ -6,7 +6,7 @@ use plonky2::iop::ext_target::ExtensionTarget; use plonky2::plonk::circuit_builder::CircuitBuilder; use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; -use crate::registers::arithmetic::*; +use crate::registers::alu::*; use crate::registers::NUM_COLUMNS; pub(crate) fn generate_subtraction(values: &mut [F; NUM_COLUMNS]) { diff --git a/system_zero/src/lib.rs b/system_zero/src/lib.rs index 1c097573..35576cd3 100644 --- a/system_zero/src/lib.rs +++ b/system_zero/src/lib.rs @@ -2,7 +2,7 @@ #![allow(dead_code)] #![allow(unused_variables)] -mod arithmetic; +mod alu; mod core_registers; mod memory; mod permutation_unit; diff --git a/system_zero/src/registers/arithmetic.rs b/system_zero/src/registers/alu.rs similarity index 84% rename from system_zero/src/registers/arithmetic.rs rename to system_zero/src/registers/alu.rs index 92c0d2c3..b4f82dff 100644 --- a/system_zero/src/registers/arithmetic.rs +++ b/system_zero/src/registers/alu.rs @@ -1,13 +1,13 @@ -//! Arithmetic unit. +//! Arithmetic and logic unit. -pub(crate) const IS_ADD: usize = super::START_ARITHMETIC; +pub(crate) const IS_ADD: usize = super::START_ALU; pub(crate) const IS_SUB: usize = IS_ADD + 1; pub(crate) const IS_MUL: usize = IS_SUB + 1; pub(crate) const IS_DIV: usize = IS_MUL + 1; const START_SHARED_COLS: usize = IS_DIV + 1; -/// Within the arithmetic unit, there are shared columns which can be used by any arithmetic +/// Within the ALU, there are shared columns which can be used by any arithmetic/logic /// circuit, depending on which one is active this cycle. // Can be increased as needed as other operations are implemented. const NUM_SHARED_COLS: usize = 3; @@ -26,7 +26,7 @@ pub(crate) const COL_ADD_INPUT_3: usize = shared_col(2); // Note: Addition outputs three 16-bit chunks, and since these values need to be range-checked // anyway, we might as well use the range check unit's columns as our addition outputs. So the -// three proceeding columns are basically aliases, not columns owned by the arithmetic unit. +// three proceeding columns are basically aliases, not columns owned by the ALU. /// The first 16-bit chunk of the output, based on little-endian ordering. pub(crate) const COL_ADD_OUTPUT_1: usize = super::range_check_16::col_rc_16_input(0); /// The second 16-bit chunk of the output, based on little-endian ordering. @@ -34,4 +34,4 @@ pub(crate) const COL_ADD_OUTPUT_2: usize = super::range_check_16::col_rc_16_inpu /// The third 16-bit chunk of the output, based on little-endian ordering. pub(crate) const COL_ADD_OUTPUT_3: usize = super::range_check_16::col_rc_16_input(2); -pub(super) const END: usize = super::START_ARITHMETIC + NUM_SHARED_COLS; +pub(super) const END: usize = super::START_ALU + NUM_SHARED_COLS; diff --git a/system_zero/src/registers/mod.rs b/system_zero/src/registers/mod.rs index 134a28bf..12688b1c 100644 --- a/system_zero/src/registers/mod.rs +++ b/system_zero/src/registers/mod.rs @@ -1,4 +1,4 @@ -pub(crate) mod arithmetic; +pub(crate) mod alu; pub(crate) mod boolean; pub(crate) mod core; pub(crate) mod logic; @@ -8,8 +8,8 @@ pub(crate) mod permutation; pub(crate) mod range_check_16; pub(crate) mod range_check_degree; -const START_ARITHMETIC: usize = 0; -const START_BOOLEAN: usize = arithmetic::END; +const START_ALU: usize = 0; +const START_BOOLEAN: usize = alu::END; const START_CORE: usize = boolean::END; const START_LOGIC: usize = core::END; const START_LOOKUP: usize = logic::END; diff --git a/system_zero/src/system_zero.rs b/system_zero/src/system_zero.rs index cd7796d7..c42a04a8 100644 --- a/system_zero/src/system_zero.rs +++ b/system_zero/src/system_zero.rs @@ -10,9 +10,7 @@ use starky::stark::Stark; use starky::vars::StarkEvaluationTargets; use starky::vars::StarkEvaluationVars; -use crate::arithmetic::{ - eval_arithmetic_unit, eval_arithmetic_unit_recursively, generate_arithmetic_unit, -}; +use crate::alu::{eval_alu, eval_alu_recursively, generate_alu}; use crate::core_registers::{ eval_core_registers, eval_core_registers_recursively, generate_first_row_core_registers, generate_next_row_core_registers, @@ -38,7 +36,7 @@ impl, const D: usize> SystemZero { let mut row = [F::ZERO; NUM_COLUMNS]; generate_first_row_core_registers(&mut row); - generate_arithmetic_unit(&mut row); + generate_alu(&mut row); generate_permutation_unit(&mut row); let mut trace = Vec::with_capacity(MIN_TRACE_ROWS); @@ -46,7 +44,7 @@ impl, const D: usize> SystemZero { loop { let mut next_row = [F::ZERO; NUM_COLUMNS]; generate_next_row_core_registers(&row, &mut next_row); - generate_arithmetic_unit(&mut next_row); + generate_alu(&mut next_row); generate_permutation_unit(&mut next_row); trace.push(row); @@ -84,7 +82,7 @@ impl, const D: usize> Stark for SystemZero, { eval_core_registers(vars, yield_constr); - eval_arithmetic_unit(vars, yield_constr); + eval_alu(vars, yield_constr); eval_permutation_unit::(vars, yield_constr); // TODO: Other units } @@ -96,7 +94,7 @@ impl, const D: usize> Stark for SystemZero, ) { eval_core_registers_recursively(builder, vars, yield_constr); - eval_arithmetic_unit_recursively(builder, vars, yield_constr); + eval_alu_recursively(builder, vars, yield_constr); eval_permutation_unit_recursively(builder, vars, yield_constr); // TODO: Other units } From bc3685587cc371f96eabd7d169498251e1db55b1 Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Sun, 20 Feb 2022 17:48:31 -0700 Subject: [PATCH 08/32] Rename constraint methods (#497) Most of our constraints apply to all rows, and it seems safest to make that the "default". --- starky/src/constraint_consumer.rs | 20 ++++++++++---------- starky/src/fibonacci_stark.rs | 10 ++++++---- system_zero/src/alu/addition.rs | 4 ++-- system_zero/src/alu/mod.rs | 4 ++-- system_zero/src/core_registers.rs | 8 ++++---- system_zero/src/permutation_unit.rs | 28 ++++++++++++---------------- 6 files changed, 36 insertions(+), 38 deletions(-) diff --git a/starky/src/constraint_consumer.rs b/starky/src/constraint_consumer.rs index 88f66118..ada28730 100644 --- a/starky/src/constraint_consumer.rs +++ b/starky/src/constraint_consumer.rs @@ -53,12 +53,12 @@ impl ConstraintConsumer

{ } /// Add one constraint valid on all rows except the last. - pub fn constraint(&mut self, constraint: P) { - self.constraint_wrapping(constraint * self.z_last); + pub fn constraint_transition(&mut self, constraint: P) { + self.constraint(constraint * self.z_last); } /// Add one constraint on all rows. - pub fn constraint_wrapping(&mut self, constraint: P) { + pub fn constraint(&mut self, constraint: P) { for (&alpha, acc) in self.alphas.iter().zip(&mut self.constraint_accs) { *acc *= alpha; *acc += constraint; @@ -68,13 +68,13 @@ impl ConstraintConsumer

{ /// Add one constraint, but first multiply it by a filter such that it will only apply to the /// first row of the trace. pub fn constraint_first_row(&mut self, constraint: P) { - self.constraint_wrapping(constraint * self.lagrange_basis_first); + self.constraint(constraint * self.lagrange_basis_first); } /// Add one constraint, but first multiply it by a filter such that it will only apply to the /// last row of the trace. pub fn constraint_last_row(&mut self, constraint: P) { - self.constraint_wrapping(constraint * self.lagrange_basis_last); + self.constraint(constraint * self.lagrange_basis_last); } } @@ -122,17 +122,17 @@ impl, const D: usize> RecursiveConstraintConsumer, constraint: ExtensionTarget, ) { let filtered_constraint = builder.mul_extension(constraint, self.z_last); - self.constraint_wrapping(builder, filtered_constraint); + self.constraint(builder, filtered_constraint); } /// Add one constraint valid on all rows. - pub fn constraint_wrapping( + pub fn constraint( &mut self, builder: &mut CircuitBuilder, constraint: ExtensionTarget, @@ -150,7 +150,7 @@ impl, const D: usize> RecursiveConstraintConsumer, ) { let filtered_constraint = builder.mul_extension(constraint, self.lagrange_basis_first); - self.constraint_wrapping(builder, filtered_constraint); + self.constraint(builder, filtered_constraint); } /// Add one constraint, but first multiply it by a filter such that it will only apply to the @@ -161,6 +161,6 @@ impl, const D: usize> RecursiveConstraintConsumer, ) { let filtered_constraint = builder.mul_extension(constraint, self.lagrange_basis_last); - self.constraint_wrapping(builder, filtered_constraint); + self.constraint(builder, filtered_constraint); } } diff --git a/starky/src/fibonacci_stark.rs b/starky/src/fibonacci_stark.rs index bd1775e1..a0204359 100644 --- a/starky/src/fibonacci_stark.rs +++ b/starky/src/fibonacci_stark.rs @@ -68,9 +68,11 @@ impl, const D: usize> Stark for FibonacciStar .constraint_last_row(vars.local_values[1] - vars.public_inputs[Self::PI_INDEX_RES]); // x0' <- x1 - yield_constr.constraint(vars.next_values[0] - vars.local_values[1]); + yield_constr.constraint_transition(vars.next_values[0] - vars.local_values[1]); // x1' <- x0 + x1 - yield_constr.constraint(vars.next_values[1] - vars.local_values[0] - vars.local_values[1]); + yield_constr.constraint_transition( + vars.next_values[1] - vars.local_values[0] - vars.local_values[1], + ); } fn eval_ext_recursively( @@ -91,13 +93,13 @@ impl, const D: usize> Stark for FibonacciStar // x0' <- x1 let first_col_constraint = builder.sub_extension(vars.next_values[0], vars.local_values[1]); - yield_constr.constraint(builder, first_col_constraint); + yield_constr.constraint_transition(builder, first_col_constraint); // x1' <- x0 + x1 let second_col_constraint = { let tmp = builder.sub_extension(vars.next_values[1], vars.local_values[0]); builder.sub_extension(tmp, vars.local_values[1]) }; - yield_constr.constraint(builder, second_col_constraint); + yield_constr.constraint_transition(builder, second_col_constraint); } fn constraint_degree(&self) -> usize { diff --git a/system_zero/src/alu/addition.rs b/system_zero/src/alu/addition.rs index 068092e8..dc83ecb8 100644 --- a/system_zero/src/alu/addition.rs +++ b/system_zero/src/alu/addition.rs @@ -41,7 +41,7 @@ pub(crate) fn eval_addition>( let computed_out = in_1 + in_2 + in_3; - yield_constr.constraint_wrapping(is_add * (out - computed_out)); + yield_constr.constraint(is_add * (out - computed_out)); } pub(crate) fn eval_addition_recursively, const D: usize>( @@ -66,5 +66,5 @@ pub(crate) fn eval_addition_recursively, const D: u let diff = builder.sub_extension(out, computed_out); let filtered_diff = builder.mul_extension(is_add, diff); - yield_constr.constraint_wrapping(builder, filtered_diff); + yield_constr.constraint(builder, filtered_diff); } diff --git a/system_zero/src/alu/mod.rs b/system_zero/src/alu/mod.rs index 17a12df1..4e7e09fa 100644 --- a/system_zero/src/alu/mod.rs +++ b/system_zero/src/alu/mod.rs @@ -45,7 +45,7 @@ pub(crate) fn eval_alu>( // Check that the operation flag values are binary. for col in [IS_ADD, IS_SUB, IS_MUL, IS_DIV] { let val = local_values[col]; - yield_constr.constraint_wrapping(val * val - val); + yield_constr.constraint(val * val - val); } eval_addition(local_values, yield_constr); @@ -65,7 +65,7 @@ pub(crate) fn eval_alu_recursively, const D: usize> for col in [IS_ADD, IS_SUB, IS_MUL, IS_DIV] { let val = local_values[col]; let constraint = builder.mul_sub_extension(val, val, val); - yield_constr.constraint_wrapping(builder, constraint); + yield_constr.constraint(builder, constraint); } eval_addition_recursively(builder, local_values, yield_constr); diff --git a/system_zero/src/core_registers.rs b/system_zero/src/core_registers.rs index c8c6533b..1f33611a 100644 --- a/system_zero/src/core_registers.rs +++ b/system_zero/src/core_registers.rs @@ -49,7 +49,7 @@ pub(crate) fn eval_core_registers>( let next_clock = vars.next_values[COL_CLOCK]; let delta_clock = next_clock - local_clock; yield_constr.constraint_first_row(local_clock); - yield_constr.constraint(delta_clock - F::ONE); + yield_constr.constraint_transition(delta_clock - F::ONE); // The 16-bit table must start with 0, end with 2^16 - 1, and increment by 0 or 1. let local_range_16 = vars.local_values[COL_RANGE_16]; @@ -57,7 +57,7 @@ pub(crate) fn eval_core_registers>( let delta_range_16 = next_range_16 - local_range_16; yield_constr.constraint_first_row(local_range_16); yield_constr.constraint_last_row(local_range_16 - F::from_canonical_u64((1 << 16) - 1)); - yield_constr.constraint(delta_range_16 * delta_range_16 - delta_range_16); + yield_constr.constraint_transition(delta_range_16 * delta_range_16 - delta_range_16); // TODO constraints for stack etc. } @@ -77,7 +77,7 @@ pub(crate) fn eval_core_registers_recursively, cons let delta_clock = builder.sub_extension(next_clock, local_clock); yield_constr.constraint_first_row(builder, local_clock); let constraint = builder.sub_extension(delta_clock, one_ext); - yield_constr.constraint(builder, constraint); + yield_constr.constraint_transition(builder, constraint); // The 16-bit table must start with 0, end with 2^16 - 1, and increment by 0 or 1. let local_range_16 = vars.local_values[COL_RANGE_16]; @@ -87,7 +87,7 @@ pub(crate) fn eval_core_registers_recursively, cons let constraint = builder.sub_extension(local_range_16, max_u16_ext); yield_constr.constraint_last_row(builder, constraint); let constraint = builder.mul_add_extension(delta_range_16, delta_range_16, delta_range_16); - yield_constr.constraint(builder, constraint); + yield_constr.constraint_transition(builder, constraint); // TODO constraints for stack etc. } diff --git a/system_zero/src/permutation_unit.rs b/system_zero/src/permutation_unit.rs index 366cff65..079ab14a 100644 --- a/system_zero/src/permutation_unit.rs +++ b/system_zero/src/permutation_unit.rs @@ -127,8 +127,7 @@ pub(crate) fn eval_permutation_unit( for i in 0..SPONGE_WIDTH { let state_cubed = state[i] * state[i].square(); - yield_constr - .constraint_wrapping(state_cubed - local_values[col_full_first_mid_sbox(r, i)]); + yield_constr.constraint(state_cubed - local_values[col_full_first_mid_sbox(r, i)]); let state_cubed = local_values[col_full_first_mid_sbox(r, i)]; state[i] *= state_cubed.square(); // Form state ** 7. } @@ -136,8 +135,7 @@ pub(crate) fn eval_permutation_unit( state = mds_layer(state); for i in 0..SPONGE_WIDTH { - yield_constr - .constraint_wrapping(state[i] - local_values[col_full_first_after_mds(r, i)]); + yield_constr.constraint(state[i] - local_values[col_full_first_after_mds(r, i)]); state[i] = local_values[col_full_first_after_mds(r, i)]; } } @@ -146,10 +144,10 @@ pub(crate) fn eval_permutation_unit( state = constant_layer(state, HALF_N_FULL_ROUNDS + r); let state0_cubed = state[0] * state[0].square(); - yield_constr.constraint_wrapping(state0_cubed - local_values[col_partial_mid_sbox(r)]); + yield_constr.constraint(state0_cubed - local_values[col_partial_mid_sbox(r)]); let state0_cubed = local_values[col_partial_mid_sbox(r)]; state[0] *= state0_cubed.square(); // Form state ** 7. - yield_constr.constraint_wrapping(state[0] - local_values[col_partial_after_sbox(r)]); + yield_constr.constraint(state[0] - local_values[col_partial_after_sbox(r)]); state[0] = local_values[col_partial_after_sbox(r)]; state = mds_layer(state); @@ -160,8 +158,7 @@ pub(crate) fn eval_permutation_unit( for i in 0..SPONGE_WIDTH { let state_cubed = state[i] * state[i].square(); - yield_constr - .constraint_wrapping(state_cubed - local_values[col_full_second_mid_sbox(r, i)]); + yield_constr.constraint(state_cubed - local_values[col_full_second_mid_sbox(r, i)]); let state_cubed = local_values[col_full_second_mid_sbox(r, i)]; state[i] *= state_cubed.square(); // Form state ** 7. } @@ -169,8 +166,7 @@ pub(crate) fn eval_permutation_unit( state = mds_layer(state); for i in 0..SPONGE_WIDTH { - yield_constr - .constraint_wrapping(state[i] - local_values[col_full_second_after_mds(r, i)]); + yield_constr.constraint(state[i] - local_values[col_full_second_after_mds(r, i)]); state[i] = local_values[col_full_second_after_mds(r, i)]; } } @@ -197,7 +193,7 @@ pub(crate) fn eval_permutation_unit_recursively, co let state_cubed = builder.cube_extension(state[i]); let diff = builder.sub_extension(state_cubed, local_values[col_full_first_mid_sbox(r, i)]); - yield_constr.constraint_wrapping(builder, diff); + yield_constr.constraint(builder, diff); let state_cubed = local_values[col_full_first_mid_sbox(r, i)]; state[i] = builder.mul_many_extension(&[state[i], state_cubed, state_cubed]); // Form state ** 7. @@ -208,7 +204,7 @@ pub(crate) fn eval_permutation_unit_recursively, co for i in 0..SPONGE_WIDTH { let diff = builder.sub_extension(state[i], local_values[col_full_first_after_mds(r, i)]); - yield_constr.constraint_wrapping(builder, diff); + yield_constr.constraint(builder, diff); state[i] = local_values[col_full_first_after_mds(r, i)]; } } @@ -218,11 +214,11 @@ pub(crate) fn eval_permutation_unit_recursively, co let state0_cubed = builder.cube_extension(state[0]); let diff = builder.sub_extension(state0_cubed, local_values[col_partial_mid_sbox(r)]); - yield_constr.constraint_wrapping(builder, diff); + yield_constr.constraint(builder, diff); let state0_cubed = local_values[col_partial_mid_sbox(r)]; state[0] = builder.mul_many_extension(&[state[0], state0_cubed, state0_cubed]); // Form state ** 7. let diff = builder.sub_extension(state[0], local_values[col_partial_after_sbox(r)]); - yield_constr.constraint_wrapping(builder, diff); + yield_constr.constraint(builder, diff); state[0] = local_values[col_partial_after_sbox(r)]; state = F::mds_layer_recursive(builder, &state); @@ -239,7 +235,7 @@ pub(crate) fn eval_permutation_unit_recursively, co let state_cubed = builder.cube_extension(state[i]); let diff = builder.sub_extension(state_cubed, local_values[col_full_second_mid_sbox(r, i)]); - yield_constr.constraint_wrapping(builder, diff); + yield_constr.constraint(builder, diff); let state_cubed = local_values[col_full_second_mid_sbox(r, i)]; state[i] = builder.mul_many_extension(&[state[i], state_cubed, state_cubed]); // Form state ** 7. @@ -250,7 +246,7 @@ pub(crate) fn eval_permutation_unit_recursively, co for i in 0..SPONGE_WIDTH { let diff = builder.sub_extension(state[i], local_values[col_full_second_after_mds(r, i)]); - yield_constr.constraint_wrapping(builder, diff); + yield_constr.constraint(builder, diff); state[i] = local_values[col_full_second_after_mds(r, i)]; } } From 6072fab0770eb2f9797bdc09997e72b85282e77f Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Mon, 21 Feb 2022 00:39:04 -0800 Subject: [PATCH 09/32] Implement a mul-add circuit in the ALU (#495) * Implement a mul-add circuit in the ALU The inputs are assumed to be `u32`s, while the output is encoded as four `u16 limbs`. Each output limb is range-checked. So, our basic mul-add constraint looks like out_0 + 2^16 out_1 + 2^32 out_2 + 2^48 out_3 = in_1 * in_2 + in_3 The right hand side will never overflow, since `u32::MAX * u32::MAX + u32::MAX < |F|`. However, the left hand side could overflow, even though we know each limb is less than `2^16`. For example, an operation like `0 * 0 + 0` could have two possible outputs, 0 and `|F|`, both of which would satisfy the constraint above. To prevent these non-canonical outputs, we need a comparison to enforce that `out < |F|`. Thankfully, `F::MAX` has all zeros in its low 32 bits, so `x <= F::MAX` is equivalent to `x_lo == 0 || x_hi != u32::MAX`. `x_hi != u32::MAX` can be checked by showing that `u32::MAX - x_hi` has an inverse. If `x_hi != u32::MAX`, the prover provides this (purported) inverse in an advice column. See @bobbinth's [post](https://hackmd.io/NC-yRmmtRQSvToTHb96e8Q#Checking-element-validity) for details. That post calls the purported inverse column `m`; I named it `canonical_inv` in this code. * fix * PR feedback * naming --- system_zero/Cargo.toml | 1 + system_zero/src/alu/addition.rs | 36 ++++----- system_zero/src/alu/canonical.rs | 109 ++++++++++++++++++++++++++ system_zero/src/alu/mod.rs | 13 ++- system_zero/src/alu/mul_add.rs | 88 +++++++++++++++++++++ system_zero/src/alu/multiplication.rs | 31 -------- system_zero/src/registers/alu.rs | 34 ++++++-- 7 files changed, 249 insertions(+), 63 deletions(-) create mode 100644 system_zero/src/alu/canonical.rs create mode 100644 system_zero/src/alu/mul_add.rs delete mode 100644 system_zero/src/alu/multiplication.rs diff --git a/system_zero/Cargo.toml b/system_zero/Cargo.toml index e5b617c9..032bfb53 100644 --- a/system_zero/Cargo.toml +++ b/system_zero/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [dependencies] plonky2 = { path = "../plonky2" } +plonky2_util = { path = "../util" } starky = { path = "../starky" } anyhow = "1.0.40" env_logger = "0.9.0" diff --git a/system_zero/src/alu/addition.rs b/system_zero/src/alu/addition.rs index dc83ecb8..c2293b4a 100644 --- a/system_zero/src/alu/addition.rs +++ b/system_zero/src/alu/addition.rs @@ -11,14 +11,14 @@ use crate::registers::alu::*; use crate::registers::NUM_COLUMNS; pub(crate) fn generate_addition(values: &mut [F; NUM_COLUMNS]) { - let in_1 = values[COL_ADD_INPUT_1].to_canonical_u64(); - let in_2 = values[COL_ADD_INPUT_2].to_canonical_u64(); - let in_3 = values[COL_ADD_INPUT_3].to_canonical_u64(); + let in_1 = values[COL_ADD_INPUT_0].to_canonical_u64(); + let in_2 = values[COL_ADD_INPUT_1].to_canonical_u64(); + let in_3 = values[COL_ADD_INPUT_2].to_canonical_u64(); let output = in_1 + in_2 + in_3; - values[COL_ADD_OUTPUT_1] = F::from_canonical_u16(output as u16); - values[COL_ADD_OUTPUT_2] = F::from_canonical_u16((output >> 16) as u16); - values[COL_ADD_OUTPUT_3] = F::from_canonical_u16((output >> 32) as u16); + values[COL_ADD_OUTPUT_0] = F::from_canonical_u16(output as u16); + values[COL_ADD_OUTPUT_1] = F::from_canonical_u16((output >> 16) as u16); + values[COL_ADD_OUTPUT_2] = F::from_canonical_u16((output >> 32) as u16); } pub(crate) fn eval_addition>( @@ -26,12 +26,12 @@ pub(crate) fn eval_addition>( yield_constr: &mut ConstraintConsumer

, ) { let is_add = local_values[IS_ADD]; - let in_1 = local_values[COL_ADD_INPUT_1]; - let in_2 = local_values[COL_ADD_INPUT_2]; - let in_3 = local_values[COL_ADD_INPUT_3]; - let out_1 = local_values[COL_ADD_OUTPUT_1]; - let out_2 = local_values[COL_ADD_OUTPUT_2]; - let out_3 = local_values[COL_ADD_OUTPUT_3]; + let in_1 = local_values[COL_ADD_INPUT_0]; + let in_2 = local_values[COL_ADD_INPUT_1]; + let in_3 = local_values[COL_ADD_INPUT_2]; + let out_1 = local_values[COL_ADD_OUTPUT_0]; + let out_2 = local_values[COL_ADD_OUTPUT_1]; + let out_3 = local_values[COL_ADD_OUTPUT_2]; let weight_2 = F::from_canonical_u64(1 << 16); let weight_3 = F::from_canonical_u64(1 << 32); @@ -50,12 +50,12 @@ pub(crate) fn eval_addition_recursively, const D: u yield_constr: &mut RecursiveConstraintConsumer, ) { let is_add = local_values[IS_ADD]; - let in_1 = local_values[COL_ADD_INPUT_1]; - let in_2 = local_values[COL_ADD_INPUT_2]; - let in_3 = local_values[COL_ADD_INPUT_3]; - let out_1 = local_values[COL_ADD_OUTPUT_1]; - let out_2 = local_values[COL_ADD_OUTPUT_2]; - let out_3 = local_values[COL_ADD_OUTPUT_3]; + let in_1 = local_values[COL_ADD_INPUT_0]; + let in_2 = local_values[COL_ADD_INPUT_1]; + let in_3 = local_values[COL_ADD_INPUT_2]; + let out_1 = local_values[COL_ADD_OUTPUT_0]; + let out_2 = local_values[COL_ADD_OUTPUT_1]; + let out_3 = local_values[COL_ADD_OUTPUT_2]; let limb_base = builder.constant(F::from_canonical_u64(1 << 16)); // Note that this can't overflow. Since each output limb has been range checked as 16-bits, diff --git a/system_zero/src/alu/canonical.rs b/system_zero/src/alu/canonical.rs new file mode 100644 index 00000000..fb90eb0d --- /dev/null +++ b/system_zero/src/alu/canonical.rs @@ -0,0 +1,109 @@ +//! Helper methods for checking that a value is canonical, i.e. is less than `|F|`. +//! +//! See https://hackmd.io/NC-yRmmtRQSvToTHb96e8Q#Checking-element-validity + +use plonky2::field::extension_field::Extendable; +use plonky2::field::field_types::Field; +use plonky2::field::packed_field::PackedField; +use plonky2::hash::hash_types::RichField; +use plonky2::iop::ext_target::ExtensionTarget; +use plonky2::plonk::circuit_builder::CircuitBuilder; +use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; + +/// Computes the helper value used in the is-canonical check. +pub(crate) fn compute_canonical_inv(value_to_check: u64) -> F { + let value_hi_32 = (value_to_check >> 32) as u32; + + if value_hi_32 == u32::MAX { + debug_assert_eq!(value_to_check as u32, 0, "Value was not canonical."); + // In this case it doesn't matter what we put for the purported inverse value. The + // constraint containing this value will get multiplied by the low u32 limb, which will be + // zero, satisfying the constraint regardless of what we put here. + F::ZERO + } else { + F::from_canonical_u32(u32::MAX - value_hi_32).inverse() + } +} + +/// Adds constraints to require that a list of four `u16`s, in little-endian order, represent a +/// canonical field element, i.e. that their combined value is less than `|F|`. Returns their +/// combined value. +pub(crate) fn combine_u16s_check_canonical>( + limb_0_u16: P, + limb_1_u16: P, + limb_2_u16: P, + limb_3_u16: P, + inverse: P, + yield_constr: &mut ConstraintConsumer

, +) -> P { + let base = F::from_canonical_u32(1 << 16); + let limb_0_u32 = limb_0_u16 + limb_1_u16 * base; + let limb_1_u32 = limb_2_u16 + limb_3_u16 * base; + combine_u32s_check_canonical(limb_0_u32, limb_1_u32, inverse, yield_constr) +} + +/// Adds constraints to require that a list of four `u16`s, in little-endian order, represent a +/// canonical field element, i.e. that their combined value is less than `|F|`. Returns their +/// combined value. +pub(crate) fn combine_u16s_check_canonical_circuit, const D: usize>( + builder: &mut CircuitBuilder, + limb_0_u16: ExtensionTarget, + limb_1_u16: ExtensionTarget, + limb_2_u16: ExtensionTarget, + limb_3_u16: ExtensionTarget, + inverse: ExtensionTarget, + yield_constr: &mut RecursiveConstraintConsumer, +) -> ExtensionTarget { + let base = F::from_canonical_u32(1 << 16); + let limb_0_u32 = builder.mul_const_add_extension(base, limb_1_u16, limb_0_u16); + let limb_1_u32 = builder.mul_const_add_extension(base, limb_3_u16, limb_2_u16); + combine_u32s_check_canonical_circuit(builder, limb_0_u32, limb_1_u32, inverse, yield_constr) +} + +/// Adds constraints to require that a pair of `u32`s, in little-endian order, represent a canonical +/// field element, i.e. that their combined value is less than `|F|`. Returns their combined value. +pub(crate) fn combine_u32s_check_canonical>( + limb_0_u32: P, + limb_1_u32: P, + inverse: P, + yield_constr: &mut ConstraintConsumer

, +) -> P { + let u32_max = P::from(F::from_canonical_u32(u32::MAX)); + + // This is zero if and only if the high limb is `u32::MAX`. + let diff = u32_max - limb_1_u32; + // If this is zero, the diff is invertible, so the high limb is not `u32::MAX`. + let hi_not_max = inverse * diff - F::ONE; + // If this is zero, either the high limb is not `u32::MAX`, or the low limb is zero. + let hi_not_max_or_lo_zero = hi_not_max * limb_0_u32; + + yield_constr.constraint(hi_not_max_or_lo_zero); + + // Return the combined value. + limb_0_u32 + limb_1_u32 * F::from_canonical_u64(1 << 32) +} + +/// Adds constraints to require that a pair of `u32`s, in little-endian order, represent a canonical +/// field element, i.e. that their combined value is less than `|F|`. Returns their combined value. +pub(crate) fn combine_u32s_check_canonical_circuit, const D: usize>( + builder: &mut CircuitBuilder, + limb_0_u32: ExtensionTarget, + limb_1_u32: ExtensionTarget, + inverse: ExtensionTarget, + yield_constr: &mut RecursiveConstraintConsumer, +) -> ExtensionTarget { + let one = builder.one_extension(); + let u32_max = builder.constant_extension(F::Extension::from_canonical_u32(u32::MAX)); + + // This is zero if and only if the high limb is `u32::MAX`. + let diff = builder.sub_extension(u32_max, limb_1_u32); + // If this is zero, the diff is invertible, so the high limb is not `u32::MAX`. + let hi_not_max = builder.mul_sub_extension(inverse, diff, one); + // If this is zero, either the high limb is not `u32::MAX`, or the low limb is zero. + let hi_not_max_or_lo_zero = builder.mul_extension(hi_not_max, limb_0_u32); + + yield_constr.constraint(builder, hi_not_max_or_lo_zero); + + // Return the combined value. + builder.mul_const_add_extension(F::from_canonical_u64(1 << 32), limb_1_u32, limb_0_u32) +} diff --git a/system_zero/src/alu/mod.rs b/system_zero/src/alu/mod.rs index 4e7e09fa..730ca302 100644 --- a/system_zero/src/alu/mod.rs +++ b/system_zero/src/alu/mod.rs @@ -9,9 +9,7 @@ use starky::vars::StarkEvaluationVars; use crate::alu::addition::{eval_addition, eval_addition_recursively, generate_addition}; use crate::alu::division::{eval_division, eval_division_recursively, generate_division}; -use crate::alu::multiplication::{ - eval_multiplication, eval_multiplication_recursively, generate_multiplication, -}; +use crate::alu::mul_add::{eval_mul_add, eval_mul_add_recursively, generate_mul_add}; use crate::alu::subtraction::{ eval_subtraction, eval_subtraction_recursively, generate_subtraction, }; @@ -20,8 +18,9 @@ use crate::registers::alu::*; use crate::registers::NUM_COLUMNS; mod addition; +mod canonical; mod division; -mod multiplication; +mod mul_add; mod subtraction; pub(crate) fn generate_alu(values: &mut [F; NUM_COLUMNS]) { @@ -30,7 +29,7 @@ pub(crate) fn generate_alu(values: &mut [F; NUM_COLUMNS]) { } else if values[IS_SUB].is_one() { generate_subtraction(values); } else if values[IS_MUL].is_one() { - generate_multiplication(values); + generate_mul_add(values); } else if values[IS_DIV].is_one() { generate_division(values); } @@ -50,7 +49,7 @@ pub(crate) fn eval_alu>( eval_addition(local_values, yield_constr); eval_subtraction(local_values, yield_constr); - eval_multiplication(local_values, yield_constr); + eval_mul_add(local_values, yield_constr); eval_division(local_values, yield_constr); } @@ -70,6 +69,6 @@ pub(crate) fn eval_alu_recursively, const D: usize> eval_addition_recursively(builder, local_values, yield_constr); eval_subtraction_recursively(builder, local_values, yield_constr); - eval_multiplication_recursively(builder, local_values, yield_constr); + eval_mul_add_recursively(builder, local_values, yield_constr); eval_division_recursively(builder, local_values, yield_constr); } diff --git a/system_zero/src/alu/mul_add.rs b/system_zero/src/alu/mul_add.rs new file mode 100644 index 00000000..53ba34a2 --- /dev/null +++ b/system_zero/src/alu/mul_add.rs @@ -0,0 +1,88 @@ +use plonky2::field::extension_field::Extendable; +use plonky2::field::field_types::{Field, PrimeField64}; +use plonky2::field::packed_field::PackedField; +use plonky2::hash::hash_types::RichField; +use plonky2::iop::ext_target::ExtensionTarget; +use plonky2::plonk::circuit_builder::CircuitBuilder; +use plonky2_util::assume; +use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; + +use crate::alu::canonical::*; +use crate::registers::alu::*; +use crate::registers::NUM_COLUMNS; + +pub(crate) fn generate_mul_add(values: &mut [F; NUM_COLUMNS]) { + let factor_0 = values[COL_MUL_ADD_FACTOR_0].to_canonical_u64(); + let factor_1 = values[COL_MUL_ADD_FACTOR_1].to_canonical_u64(); + let addend = values[COL_MUL_ADD_ADDEND].to_canonical_u64(); + + // Let the compiler know that each input must fit in 32 bits. + assume(factor_0 <= u32::MAX as u64); + assume(factor_1 <= u32::MAX as u64); + assume(addend <= u32::MAX as u64); + + let output = factor_0 * factor_1 + addend; + + // An advice value used to help verify that the limbs represent a canonical field element. + values[COL_MUL_ADD_RESULT_CANONICAL_INV] = compute_canonical_inv(output); + + values[COL_MUL_ADD_OUTPUT_0] = F::from_canonical_u16(output as u16); + values[COL_MUL_ADD_OUTPUT_1] = F::from_canonical_u16((output >> 16) as u16); + values[COL_MUL_ADD_OUTPUT_2] = F::from_canonical_u16((output >> 32) as u16); + values[COL_MUL_ADD_OUTPUT_3] = F::from_canonical_u16((output >> 48) as u16); +} + +pub(crate) fn eval_mul_add>( + local_values: &[P; NUM_COLUMNS], + yield_constr: &mut ConstraintConsumer

, +) { + let is_mul = local_values[IS_MUL]; + let factor_0 = local_values[COL_MUL_ADD_FACTOR_0]; + let factor_1 = local_values[COL_MUL_ADD_FACTOR_1]; + let addend = local_values[COL_MUL_ADD_ADDEND]; + let output_1 = local_values[COL_MUL_ADD_OUTPUT_0]; + let output_2 = local_values[COL_MUL_ADD_OUTPUT_1]; + let output_3 = local_values[COL_MUL_ADD_OUTPUT_2]; + let output_4 = local_values[COL_MUL_ADD_OUTPUT_3]; + let result_canonical_inv = local_values[COL_MUL_ADD_RESULT_CANONICAL_INV]; + + let computed_output = factor_0 * factor_1 + addend; + let output = combine_u16s_check_canonical( + output_1, + output_2, + output_3, + output_4, + result_canonical_inv, + yield_constr, + ); + yield_constr.constraint(computed_output - output); +} + +pub(crate) fn eval_mul_add_recursively, const D: usize>( + builder: &mut CircuitBuilder, + local_values: &[ExtensionTarget; NUM_COLUMNS], + yield_constr: &mut RecursiveConstraintConsumer, +) { + let is_mul = local_values[IS_MUL]; + let factor_0 = local_values[COL_MUL_ADD_FACTOR_0]; + let factor_1 = local_values[COL_MUL_ADD_FACTOR_1]; + let addend = local_values[COL_MUL_ADD_ADDEND]; + let output_1 = local_values[COL_MUL_ADD_OUTPUT_0]; + let output_2 = local_values[COL_MUL_ADD_OUTPUT_1]; + let output_3 = local_values[COL_MUL_ADD_OUTPUT_2]; + let output_4 = local_values[COL_MUL_ADD_OUTPUT_3]; + let result_canonical_inv = local_values[COL_MUL_ADD_RESULT_CANONICAL_INV]; + + let computed_output = builder.mul_add_extension(factor_0, factor_1, addend); + let output = combine_u16s_check_canonical_circuit( + builder, + output_1, + output_2, + output_3, + output_4, + result_canonical_inv, + yield_constr, + ); + let diff = builder.sub_extension(computed_output, output); + yield_constr.constraint(builder, diff); +} diff --git a/system_zero/src/alu/multiplication.rs b/system_zero/src/alu/multiplication.rs deleted file mode 100644 index a88b42f6..00000000 --- a/system_zero/src/alu/multiplication.rs +++ /dev/null @@ -1,31 +0,0 @@ -use plonky2::field::extension_field::Extendable; -use plonky2::field::field_types::{Field, PrimeField64}; -use plonky2::field::packed_field::PackedField; -use plonky2::hash::hash_types::RichField; -use plonky2::iop::ext_target::ExtensionTarget; -use plonky2::plonk::circuit_builder::CircuitBuilder; -use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; - -use crate::registers::alu::*; -use crate::registers::NUM_COLUMNS; - -pub(crate) fn generate_multiplication(values: &mut [F; NUM_COLUMNS]) { - // TODO -} - -pub(crate) fn eval_multiplication>( - local_values: &[P; NUM_COLUMNS], - yield_constr: &mut ConstraintConsumer

, -) { - let is_mul = local_values[IS_MUL]; - // TODO -} - -pub(crate) fn eval_multiplication_recursively, const D: usize>( - builder: &mut CircuitBuilder, - local_values: &[ExtensionTarget; NUM_COLUMNS], - yield_constr: &mut RecursiveConstraintConsumer, -) { - let is_mul = local_values[IS_MUL]; - // TODO -} diff --git a/system_zero/src/registers/alu.rs b/system_zero/src/registers/alu.rs index b4f82dff..e678d8e4 100644 --- a/system_zero/src/registers/alu.rs +++ b/system_zero/src/registers/alu.rs @@ -10,7 +10,7 @@ const START_SHARED_COLS: usize = IS_DIV + 1; /// Within the ALU, there are shared columns which can be used by any arithmetic/logic /// circuit, depending on which one is active this cycle. // Can be increased as needed as other operations are implemented. -const NUM_SHARED_COLS: usize = 3; +const NUM_SHARED_COLS: usize = 4; const fn shared_col(i: usize) -> usize { debug_assert!(i < NUM_SHARED_COLS); @@ -18,20 +18,40 @@ const fn shared_col(i: usize) -> usize { } /// The first value to be added; treated as an unsigned u32. -pub(crate) const COL_ADD_INPUT_1: usize = shared_col(0); +pub(crate) const COL_ADD_INPUT_0: usize = shared_col(0); /// The second value to be added; treated as an unsigned u32. -pub(crate) const COL_ADD_INPUT_2: usize = shared_col(1); +pub(crate) const COL_ADD_INPUT_1: usize = shared_col(1); /// The third value to be added; treated as an unsigned u32. -pub(crate) const COL_ADD_INPUT_3: usize = shared_col(2); +pub(crate) const COL_ADD_INPUT_2: usize = shared_col(2); // Note: Addition outputs three 16-bit chunks, and since these values need to be range-checked // anyway, we might as well use the range check unit's columns as our addition outputs. So the // three proceeding columns are basically aliases, not columns owned by the ALU. /// The first 16-bit chunk of the output, based on little-endian ordering. -pub(crate) const COL_ADD_OUTPUT_1: usize = super::range_check_16::col_rc_16_input(0); +pub(crate) const COL_ADD_OUTPUT_0: usize = super::range_check_16::col_rc_16_input(0); /// The second 16-bit chunk of the output, based on little-endian ordering. -pub(crate) const COL_ADD_OUTPUT_2: usize = super::range_check_16::col_rc_16_input(1); +pub(crate) const COL_ADD_OUTPUT_1: usize = super::range_check_16::col_rc_16_input(1); /// The third 16-bit chunk of the output, based on little-endian ordering. -pub(crate) const COL_ADD_OUTPUT_3: usize = super::range_check_16::col_rc_16_input(2); +pub(crate) const COL_ADD_OUTPUT_2: usize = super::range_check_16::col_rc_16_input(2); + +/// The first value to be multiplied; treated as an unsigned u32. +pub(crate) const COL_MUL_ADD_FACTOR_0: usize = shared_col(0); +/// The second value to be multiplied; treated as an unsigned u32. +pub(crate) const COL_MUL_ADD_FACTOR_1: usize = shared_col(1); +/// The value to be added to the product; treated as an unsigned u32. +pub(crate) const COL_MUL_ADD_ADDEND: usize = shared_col(2); + +/// The inverse of `u32::MAX - result_hi`, where `output_hi` is the high 32-bits of the result. +/// See https://hackmd.io/NC-yRmmtRQSvToTHb96e8Q#Checking-element-validity +pub(crate) const COL_MUL_ADD_RESULT_CANONICAL_INV: usize = shared_col(3); + +/// The first 16-bit chunk of the output, based on little-endian ordering. +pub(crate) const COL_MUL_ADD_OUTPUT_0: usize = super::range_check_16::col_rc_16_input(0); +/// The second 16-bit chunk of the output, based on little-endian ordering. +pub(crate) const COL_MUL_ADD_OUTPUT_1: usize = super::range_check_16::col_rc_16_input(1); +/// The third 16-bit chunk of the output, based on little-endian ordering. +pub(crate) const COL_MUL_ADD_OUTPUT_2: usize = super::range_check_16::col_rc_16_input(2); +/// The fourth 16-bit chunk of the output, based on little-endian ordering. +pub(crate) const COL_MUL_ADD_OUTPUT_3: usize = super::range_check_16::col_rc_16_input(3); pub(super) const END: usize = super::START_ALU + NUM_SHARED_COLS; From d52fabaf26975be5d00ea520282b9dd23bd80045 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Mon, 21 Feb 2022 10:18:05 +0100 Subject: [PATCH 10/32] First pass --- starky/src/lib.rs | 1 + starky/src/permutation.rs | 54 ++++++++++++++++++++------------ starky/src/vanishing_poly.rs | 60 ++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 20 deletions(-) create mode 100644 starky/src/vanishing_poly.rs diff --git a/starky/src/lib.rs b/starky/src/lib.rs index 1df9629e..51a73479 100644 --- a/starky/src/lib.rs +++ b/starky/src/lib.rs @@ -14,6 +14,7 @@ pub mod prover; pub mod recursive_verifier; pub mod stark; pub mod stark_testing; +pub mod vanishing_poly; pub mod vars; pub mod verifier; diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs index 01cfa8bf..9306d0b2 100644 --- a/starky/src/permutation.rs +++ b/starky/src/permutation.rs @@ -62,26 +62,12 @@ where stark.permutation_batch_size(), ); - // Get a list of instances of our batch-permutation argument. These are permutation arguments - // where the same `Z(x)` polynomial is used to check more than one permutation. - // Before batching, each permutation pair leads to `num_challenges` permutation arguments, so we - // start with the cartesian product of `permutation_pairs` and `0..num_challenges`. Then we - // chunk these arguments based on our batch size. - let permutation_batches = permutation_pairs - .iter() - .cartesian_product(0..config.num_challenges) - .chunks(stark.permutation_batch_size()) - .into_iter() - .map(|batch| { - batch - .enumerate() - .map(|(i, (pair, chal))| { - let challenge = permutation_challenge_sets[i].challenges[chal]; - PermutationInstance { pair, challenge } - }) - .collect_vec() - }) - .collect_vec(); + let permutation_batches = get_permutation_batches( + &permutation_pairs, + &permutation_challenge_sets, + config.num_challenges, + stark.permutation_batch_size(), + ); permutation_batches .into_par_iter() @@ -178,3 +164,31 @@ pub(crate) fn get_n_permutation_challenge_sets>( .map(|_| get_permutation_challenge_set(challenger, num_challenges)) .collect() } + +/// Get a list of instances of our batch-permutation argument. These are permutation arguments +/// where the same `Z(x)` polynomial is used to check more than one permutation. +/// Before batching, each permutation pair leads to `num_challenges` permutation arguments, so we +/// start with the cartesian product of `permutation_pairs` and `0..num_challenges`. Then we +/// chunk these arguments based on our batch size. +pub(crate) fn get_permutation_batches<'a, F: Field>( + permutation_pairs: &'a [PermutationPair], + permutation_challenge_sets: &[PermutationChallengeSet], + num_challenges: usize, + batch_size: usize, +) -> Vec>> { + permutation_pairs + .iter() + .cartesian_product(0..num_challenges) + .chunks(batch_size) + .into_iter() + .map(|batch| { + batch + .enumerate() + .map(|(i, (pair, chal))| { + let challenge = permutation_challenge_sets[i].challenges[chal]; + PermutationInstance { pair, challenge } + }) + .collect_vec() + }) + .collect() +} diff --git a/starky/src/vanishing_poly.rs b/starky/src/vanishing_poly.rs new file mode 100644 index 00000000..6f7225b5 --- /dev/null +++ b/starky/src/vanishing_poly.rs @@ -0,0 +1,60 @@ +use plonky2::field::extension_field::Extendable; +use plonky2::field::packed_field::PackedField; +use plonky2::hash::hash_types::RichField; +use plonky2::plonk::config::GenericConfig; +use rayon::prelude::*; + +use crate::config::StarkConfig; +use crate::constraint_consumer::ConstraintConsumer; +use crate::permutation::{get_permutation_batches, PermutationChallenge}; +use crate::stark::Stark; +use crate::vars::StarkEvaluationVars; + +pub(crate) fn eval_vanishing_poly( + stark: S, + config: &StarkConfig, + vars: StarkEvaluationVars, + local_zs: &[F::Extension], + next_zs: &[F::Extension], + mut consumer: ConstraintConsumer, + permutation_challenge_sets: &[PermutationChallenge], +) where + F: RichField + Extendable, + C: GenericConfig, + S: Stark, + [(); S::COLUMNS]:, + [(); S::PUBLIC_INPUTS]:, +{ + stark.eval_packed_base(vars, &mut consumer); +} + +fn eval_permutation_checks( + stark: S, + config: &StarkConfig, + vars: StarkEvaluationVars, + local_zs: &[F::Extension], + next_zs: &[F::Extension], + mut consumer: ConstraintConsumer, + permutation_challenge_sets: &[PermutationChallenge], +) where + F: RichField + Extendable, + C: GenericConfig, + S: Stark, + [(); S::COLUMNS]:, + [(); S::PUBLIC_INPUTS]:, +{ + let permutation_pairs = stark.permutation_pairs(); + + let permutation_batches = get_permutation_batches( + &permutation_pairs, + &permutation_challenge_sets, + config.num_challenges, + stark.permutation_batch_size(), + ); + + // Each zs value corresponds to a permutation batch. + permutation_batches + .into_par_iter() + .map(|instances| compute_permutation_z_poly(&instances, trace_poly_values)) + .collect() +} From 79ba85eb088a41cda96b76ccf1ca5b50646a2597 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Mon, 21 Feb 2022 10:52:04 +0100 Subject: [PATCH 11/32] Compiles --- starky/src/vanishing_poly.rs | 57 ++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/starky/src/vanishing_poly.rs b/starky/src/vanishing_poly.rs index 6f7225b5..a3323796 100644 --- a/starky/src/vanishing_poly.rs +++ b/starky/src/vanishing_poly.rs @@ -1,23 +1,24 @@ -use plonky2::field::extension_field::Extendable; -use plonky2::field::packed_field::PackedField; +use plonky2::field::extension_field::{Extendable, FieldExtension}; use plonky2::hash::hash_types::RichField; use plonky2::plonk::config::GenericConfig; -use rayon::prelude::*; use crate::config::StarkConfig; use crate::constraint_consumer::ConstraintConsumer; -use crate::permutation::{get_permutation_batches, PermutationChallenge}; +use crate::permutation::{ + get_permutation_batches, PermutationChallenge, PermutationChallengeSet, PermutationInstance, + PermutationPair, +}; use crate::stark::Stark; use crate::vars::StarkEvaluationVars; pub(crate) fn eval_vanishing_poly( stark: S, config: &StarkConfig, - vars: StarkEvaluationVars, + vars: StarkEvaluationVars, local_zs: &[F::Extension], next_zs: &[F::Extension], - mut consumer: ConstraintConsumer, - permutation_challenge_sets: &[PermutationChallenge], + mut consumer: ConstraintConsumer, + permutation_challenge_sets: &[PermutationChallengeSet], ) where F: RichField + Extendable, C: GenericConfig, @@ -25,17 +26,17 @@ pub(crate) fn eval_vanishing_poly( [(); S::COLUMNS]:, [(); S::PUBLIC_INPUTS]:, { - stark.eval_packed_base(vars, &mut consumer); + stark.eval_packed_generic(vars, &mut consumer); } fn eval_permutation_checks( stark: S, config: &StarkConfig, - vars: StarkEvaluationVars, + vars: StarkEvaluationVars, local_zs: &[F::Extension], next_zs: &[F::Extension], - mut consumer: ConstraintConsumer, - permutation_challenge_sets: &[PermutationChallenge], + consumer: &mut ConstraintConsumer, + permutation_challenge_sets: &[PermutationChallengeSet], ) where F: RichField + Extendable, C: GenericConfig, @@ -43,18 +44,42 @@ fn eval_permutation_checks( [(); S::COLUMNS]:, [(); S::PUBLIC_INPUTS]:, { + // TODO: Z_1 check. let permutation_pairs = stark.permutation_pairs(); let permutation_batches = get_permutation_batches( &permutation_pairs, - &permutation_challenge_sets, + permutation_challenge_sets, config.num_challenges, stark.permutation_batch_size(), ); // Each zs value corresponds to a permutation batch. - permutation_batches - .into_par_iter() - .map(|instances| compute_permutation_z_poly(&instances, trace_poly_values)) - .collect() + for (i, instances) in permutation_batches.iter().enumerate() { + // Z(gx) * down = Z x * up + let (reduced_lhs, reduced_rhs): (Vec, Vec) = instances + .iter() + .map(|instance| { + let PermutationInstance { + pair: PermutationPair { column_pairs }, + challenge: PermutationChallenge { beta, gamma }, + } = instance; + column_pairs.iter().rev().fold( + ( + F::Extension::from_basefield(*gamma), + F::Extension::from_basefield(*gamma), + ), + |(lhs, rhs), &(i, j)| { + ( + lhs.scalar_mul(*beta) + vars.local_values[i], + rhs.scalar_mul(*beta) + vars.local_values[j], + ) + }, + ) + }) + .unzip(); + let constraint = next_zs[i] * reduced_rhs.into_iter().product() + - local_zs[i] * reduced_lhs.into_iter().product(); + consumer.constraint(constraint); + } } From 5c1173379e4c6f111f84fcb03fa8fa354cb3f19e Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Mon, 21 Feb 2022 16:05:24 +0100 Subject: [PATCH 12/32] Compiles --- starky/src/permutation.rs | 72 ++++++++++++++++++++++++++--- starky/src/prover.rs | 66 +++++++++++++++++++++------ starky/src/vanishing_poly.rs | 87 +++++++++--------------------------- 3 files changed, 139 insertions(+), 86 deletions(-) diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs index 9306d0b2..8a33eb41 100644 --- a/starky/src/permutation.rs +++ b/starky/src/permutation.rs @@ -2,7 +2,7 @@ use itertools::Itertools; use plonky2::field::batch_util::batch_multiply_inplace; -use plonky2::field::extension_field::Extendable; +use plonky2::field::extension_field::{Extendable, FieldExtension}; use plonky2::field::field_types::Field; use plonky2::field::polynomial::PolynomialValues; use plonky2::hash::hash_types::RichField; @@ -11,7 +11,9 @@ use plonky2::plonk::config::{GenericConfig, Hasher}; use rayon::prelude::*; use crate::config::StarkConfig; +use crate::constraint_consumer::ConstraintConsumer; use crate::stark::Stark; +use crate::vars::StarkEvaluationVars; /// A pair of lists of columns, `lhs` and `rhs`, that should be permutations of one another. /// In particular, there should exist some permutation `pi` such that for any `i`, @@ -39,6 +41,7 @@ pub(crate) struct PermutationChallenge { } /// Like `PermutationChallenge`, but with `num_challenges` copies to boost soundness. +#[derive(Clone)] pub(crate) struct PermutationChallengeSet { pub(crate) challenges: Vec>, } @@ -49,6 +52,7 @@ pub(crate) fn compute_permutation_z_polys( config: &StarkConfig, challenger: &mut Challenger, trace_poly_values: &[PolynomialValues], + permutation_challenge_sets: &[PermutationChallengeSet], ) -> Vec> where F: RichField + Extendable, @@ -56,12 +60,6 @@ where S: Stark, { let permutation_pairs = stark.permutation_pairs(); - let permutation_challenge_sets = get_n_permutation_challenge_sets( - challenger, - config.num_challenges, - stark.permutation_batch_size(), - ); - let permutation_batches = get_permutation_batches( &permutation_pairs, &permutation_challenge_sets, @@ -192,3 +190,63 @@ pub(crate) fn get_permutation_batches<'a, F: Field>( }) .collect() } + +// TODO: Use slices. +pub struct PermutationCheckData, const D2: usize> { + pub(crate) local_zs: Vec, + pub(crate) next_zs: Vec, + pub(crate) permutation_challenge_sets: Vec>, +} + +pub(crate) fn eval_permutation_checks( + stark: &S, + config: &StarkConfig, + vars: StarkEvaluationVars, + local_zs: &[FE], + next_zs: &[FE], + consumer: &mut ConstraintConsumer, + permutation_challenge_sets: &[PermutationChallengeSet], +) where + F: RichField + Extendable, + FE: FieldExtension, + C: GenericConfig, + S: Stark, + [(); S::COLUMNS]:, + [(); S::PUBLIC_INPUTS]:, +{ + // TODO: Z_1 check. + let permutation_pairs = stark.permutation_pairs(); + + let permutation_batches = get_permutation_batches( + &permutation_pairs, + permutation_challenge_sets, + config.num_challenges, + stark.permutation_batch_size(), + ); + + // Each zs value corresponds to a permutation batch. + for (i, instances) in permutation_batches.iter().enumerate() { + // Z(gx) * down = Z x * up + let (reduced_lhs, reduced_rhs): (Vec, Vec) = instances + .iter() + .map(|instance| { + let PermutationInstance { + pair: PermutationPair { column_pairs }, + challenge: PermutationChallenge { beta, gamma }, + } = instance; + column_pairs.iter().rev().fold( + (FE::from_basefield(*gamma), FE::from_basefield(*gamma)), + |(lhs, rhs), &(i, j)| { + ( + lhs.scalar_mul(*beta) + vars.local_values[i], + rhs.scalar_mul(*beta) + vars.local_values[j], + ) + }, + ) + }) + .unzip(); + let constraint = next_zs[i] * reduced_rhs.into_iter().product() + - local_zs[i] * reduced_lhs.into_iter().product(); + consumer.constraint(constraint); + } +} diff --git a/starky/src/prover.rs b/starky/src/prover.rs index be1f198b..0206cb95 100644 --- a/starky/src/prover.rs +++ b/starky/src/prover.rs @@ -18,9 +18,13 @@ use rayon::prelude::*; use crate::config::StarkConfig; use crate::constraint_consumer::ConstraintConsumer; -use crate::permutation::compute_permutation_z_polys; +use crate::permutation::PermutationCheckData; +use crate::permutation::{ + compute_permutation_z_polys, get_n_permutation_challenge_sets, PermutationChallengeSet, +}; use crate::proof::{StarkOpeningSet, StarkProof, StarkProofWithPublicInputs}; use crate::stark::Stark; +use crate::vanishing_poly::eval_vanishing_poly; use crate::vars::StarkEvaluationVars; pub fn prove( @@ -80,28 +84,41 @@ where challenger.observe_cap(&trace_cap); // Permutation arguments. - let permutation_zs_commitment = if stark.uses_permutation_args() { + let permutation_zs_commitment_challenges = if stark.uses_permutation_args() { + let permutation_challenge_sets = get_n_permutation_challenge_sets( + &mut challenger, + config.num_challenges, + stark.permutation_batch_size(), + ); let permutation_z_polys = compute_permutation_z_polys::( &stark, config, &mut challenger, &trace_poly_values, + &permutation_challenge_sets, ); + timed!( timing, "compute permutation Z commitments", - Some(PolynomialBatch::from_values( - permutation_z_polys, - rate_bits, - false, - config.fri_config.cap_height, - timing, - None, + Some(( + PolynomialBatch::from_values( + permutation_z_polys, + rate_bits, + false, + config.fri_config.cap_height, + timing, + None, + ), + permutation_challenge_sets )) ) } else { None }; + let permutation_zs_commitment = permutation_zs_commitment_challenges + .as_ref() + .map(|(comm, _)| comm); let permutation_zs_cap = permutation_zs_commitment .as_ref() .map(|commit| commit.merkle_tree.cap.clone()); @@ -113,10 +130,11 @@ where let quotient_polys = compute_quotient_polys::( &stark, &trace_commitment, + &permutation_zs_commitment_challenges, public_inputs, alphas, degree_bits, - rate_bits, + config, ); let all_quotient_chunks = quotient_polys .into_par_iter() @@ -156,13 +174,13 @@ where zeta, g, &trace_commitment, - permutation_zs_commitment.as_ref(), + permutation_zs_commitment, "ient_commitment, ); challenger.observe_openings(&openings.to_fri_openings()); let initial_merkle_trees = once(&trace_commitment) - .chain(permutation_zs_commitment.as_ref()) + .chain(permutation_zs_commitment) .chain(once("ient_commitment)) .collect_vec(); @@ -196,10 +214,14 @@ where fn compute_quotient_polys( stark: &S, trace_commitment: &PolynomialBatch, + permutation_zs_commitment_challenges: &Option<( + PolynomialBatch, + Vec>, + )>, public_inputs: [F; S::PUBLIC_INPUTS], alphas: Vec, degree_bits: usize, - rate_bits: usize, + config: &StarkConfig, ) -> Vec> where F: RichField + Extendable, @@ -209,6 +231,7 @@ where [(); S::PUBLIC_INPUTS]:, { let degree = 1 << degree_bits; + let rate_bits = config.fri_config.rate_bits; let quotient_degree_bits = log2_ceil(stark.quotient_degree_factor()); assert!( @@ -255,7 +278,22 @@ where next_values: &get_at_index(trace_commitment, (i + next_step) % size), public_inputs: &public_inputs, }; - stark.eval_packed_base(vars, &mut consumer); + let permutation_check_data = permutation_zs_commitment_challenges.as_ref().map( + |(permutation_zs_commitment, permutation_challenge_sets)| PermutationCheckData { + local_zs: get_at_index(&permutation_zs_commitment, i).to_vec(), + next_zs: get_at_index(&permutation_zs_commitment, (i + next_step) % size) + .to_vec(), + permutation_challenge_sets: permutation_challenge_sets.to_vec(), + }, + ); + eval_vanishing_poly::( + stark, + config, + vars, + permutation_check_data, + &mut consumer, + ); + // stark.eval_packed_base(vars, &mut consumer); // TODO: Add in constraints for permutation arguments. // TODO: Fix this once we use a genuine `PackedField`. let mut constraints_evals = consumer.accumulators(); diff --git a/starky/src/vanishing_poly.rs b/starky/src/vanishing_poly.rs index a3323796..dc598167 100644 --- a/starky/src/vanishing_poly.rs +++ b/starky/src/vanishing_poly.rs @@ -4,82 +4,39 @@ use plonky2::plonk::config::GenericConfig; use crate::config::StarkConfig; use crate::constraint_consumer::ConstraintConsumer; -use crate::permutation::{ - get_permutation_batches, PermutationChallenge, PermutationChallengeSet, PermutationInstance, - PermutationPair, -}; +use crate::permutation::{eval_permutation_checks, PermutationCheckData}; use crate::stark::Stark; use crate::vars::StarkEvaluationVars; -pub(crate) fn eval_vanishing_poly( - stark: S, +pub(crate) fn eval_vanishing_poly( + stark: &S, config: &StarkConfig, - vars: StarkEvaluationVars, - local_zs: &[F::Extension], - next_zs: &[F::Extension], - mut consumer: ConstraintConsumer, - permutation_challenge_sets: &[PermutationChallengeSet], + vars: StarkEvaluationVars, + permutation_data: Option>, + consumer: &mut ConstraintConsumer, ) where F: RichField + Extendable, + FE: FieldExtension, C: GenericConfig, S: Stark, [(); S::COLUMNS]:, [(); S::PUBLIC_INPUTS]:, { - stark.eval_packed_generic(vars, &mut consumer); -} - -fn eval_permutation_checks( - stark: S, - config: &StarkConfig, - vars: StarkEvaluationVars, - local_zs: &[F::Extension], - next_zs: &[F::Extension], - consumer: &mut ConstraintConsumer, - permutation_challenge_sets: &[PermutationChallengeSet], -) where - F: RichField + Extendable, - C: GenericConfig, - S: Stark, - [(); S::COLUMNS]:, - [(); S::PUBLIC_INPUTS]:, -{ - // TODO: Z_1 check. - let permutation_pairs = stark.permutation_pairs(); - - let permutation_batches = get_permutation_batches( - &permutation_pairs, + stark.eval_packed_generic(vars, consumer); + if let Some(PermutationCheckData { + local_zs, + next_zs, permutation_challenge_sets, - config.num_challenges, - stark.permutation_batch_size(), - ); - - // Each zs value corresponds to a permutation batch. - for (i, instances) in permutation_batches.iter().enumerate() { - // Z(gx) * down = Z x * up - let (reduced_lhs, reduced_rhs): (Vec, Vec) = instances - .iter() - .map(|instance| { - let PermutationInstance { - pair: PermutationPair { column_pairs }, - challenge: PermutationChallenge { beta, gamma }, - } = instance; - column_pairs.iter().rev().fold( - ( - F::Extension::from_basefield(*gamma), - F::Extension::from_basefield(*gamma), - ), - |(lhs, rhs), &(i, j)| { - ( - lhs.scalar_mul(*beta) + vars.local_values[i], - rhs.scalar_mul(*beta) + vars.local_values[j], - ) - }, - ) - }) - .unzip(); - let constraint = next_zs[i] * reduced_rhs.into_iter().product() - - local_zs[i] * reduced_lhs.into_iter().product(); - consumer.constraint(constraint); + }) = permutation_data + { + eval_permutation_checks::( + stark, + config, + vars, + &local_zs, + &next_zs, + consumer, + &permutation_challenge_sets, + ); } } From 85c1e1d5e07bfc4c4cb34a85373f09392a18e5c1 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Mon, 21 Feb 2022 18:00:03 +0100 Subject: [PATCH 13/32] Should work (does not) --- starky/src/fibonacci_stark.rs | 29 ++++++++++++++++++++++------- starky/src/prover.rs | 17 +++++++++-------- starky/src/verifier.rs | 16 +++++++++++++++- 3 files changed, 46 insertions(+), 16 deletions(-) diff --git a/starky/src/fibonacci_stark.rs b/starky/src/fibonacci_stark.rs index a0204359..2bbd333f 100644 --- a/starky/src/fibonacci_stark.rs +++ b/starky/src/fibonacci_stark.rs @@ -2,16 +2,21 @@ use std::marker::PhantomData; use plonky2::field::extension_field::{Extendable, FieldExtension}; use plonky2::field::packed_field::PackedField; +use plonky2::fri::structure::{FriInstanceInfo, FriInstanceInfoTarget}; use plonky2::hash::hash_types::RichField; +use plonky2::iop::ext_target::ExtensionTarget; use plonky2::plonk::circuit_builder::CircuitBuilder; +use crate::config::StarkConfig; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; +use crate::permutation::PermutationPair; use crate::stark::Stark; use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; /// Toy STARK system used for testing. -/// Computes a Fibonacci sequence with state `[x0, x1]` using the state transition -/// `x0 <- x1, x1 <- x0 + x1`. +/// Computes a Fibonacci sequence with state `[x0, x1, i, j]` using the state transition +/// `x0' <- x1, x1' <- x0 + x1, i' <- i+1, j' <- j+1`. +/// Note: The `i, j` columns are used to test the permutation argument. #[derive(Copy, Clone)] struct FibonacciStark, const D: usize> { num_rows: usize, @@ -34,21 +39,25 @@ impl, const D: usize> FibonacciStark { } } - /// Generate the trace using `x0, x1` as inital state values. + /// Generate the trace using `x0, x1, 0, 1` as initial state values. fn generate_trace(&self, x0: F, x1: F) -> Vec<[F; Self::COLUMNS]> { - (0..self.num_rows) - .scan([x0, x1], |acc, _| { + let mut trace = (0..self.num_rows) + .scan([x0, x1, F::ZERO, F::ONE], |acc, _| { let tmp = *acc; acc[0] = tmp[1]; acc[1] = tmp[0] + tmp[1]; + acc[2] = tmp[2] + F::ONE; + acc[3] = tmp[3] + F::ONE; Some(tmp) }) - .collect() + .collect::>(); + trace[self.num_rows - 1][3] = F::ZERO; + trace } } impl, const D: usize> Stark for FibonacciStark { - const COLUMNS: usize = 2; + const COLUMNS: usize = 4; const PUBLIC_INPUTS: usize = 3; fn eval_packed_generic( @@ -105,6 +114,12 @@ impl, const D: usize> Stark for FibonacciStar fn constraint_degree(&self) -> usize { 2 } + + fn permutation_pairs(&self) -> Vec { + vec![PermutationPair { + column_pairs: vec![(2, 3)], + }] + } } #[cfg(test)] diff --git a/starky/src/prover.rs b/starky/src/prover.rs index 0206cb95..e0c14dde 100644 --- a/starky/src/prover.rs +++ b/starky/src/prover.rs @@ -211,10 +211,10 @@ where /// Computes the quotient polynomials `(sum alpha^i C_i(x)) / Z_H(x)` for `alpha` in `alphas`, /// where the `C_i`s are the Stark constraints. -fn compute_quotient_polys( +fn compute_quotient_polys<'a, F, C, S, const D: usize>( stark: &S, - trace_commitment: &PolynomialBatch, - permutation_zs_commitment_challenges: &Option<( + trace_commitment: &'a PolynomialBatch, + permutation_zs_commitment_challenges: &'a Option<( PolynomialBatch, Vec>, )>, @@ -251,9 +251,8 @@ where let z_h_on_coset = ZeroPolyOnCoset::::new(degree_bits, quotient_degree_bits); // Retrieve the LDE values at index `i`. - let get_at_index = |comm: &PolynomialBatch, i: usize| -> [F; S::COLUMNS] { - comm.get_lde_values(i * step).try_into().unwrap() - }; + let get_at_index = + |comm: &'a PolynomialBatch, i: usize| -> &'a [F] { comm.get_lde_values(i * step) }; // Last element of the subgroup. let last = F::primitive_root_of_unity(degree_bits).inverse(); let size = degree << quotient_degree_bits; @@ -274,8 +273,10 @@ where lagrange_last.values[i], ); let vars = StarkEvaluationVars:: { - local_values: &get_at_index(trace_commitment, i), - next_values: &get_at_index(trace_commitment, (i + next_step) % size), + local_values: &get_at_index(trace_commitment, i).try_into().unwrap(), + next_values: &get_at_index(trace_commitment, (i + next_step) % size) + .try_into() + .unwrap(), public_inputs: &public_inputs, }; let permutation_check_data = permutation_zs_commitment_challenges.as_ref().map( diff --git a/starky/src/verifier.rs b/starky/src/verifier.rs index 686ecd98..1603b208 100644 --- a/starky/src/verifier.rs +++ b/starky/src/verifier.rs @@ -11,8 +11,10 @@ use plonky2::plonk::plonk_common::reduce_with_powers; use crate::config::StarkConfig; use crate::constraint_consumer::ConstraintConsumer; +use crate::permutation::PermutationCheckData; use crate::proof::{StarkOpeningSet, StarkProofChallenges, StarkProofWithPublicInputs}; use crate::stark::Stark; +use crate::vanishing_poly::eval_vanishing_poly; use crate::vars::StarkEvaluationVars; pub fn verify_stark_proof< @@ -88,7 +90,19 @@ where l_1, l_last, ); - stark.eval_ext(vars, &mut consumer); + // stark.eval_ext(vars, &mut consumer); + let permutation_data = stark.uses_permutation_args().then(|| PermutationCheckData { + local_zs: permutation_zs.as_ref().unwrap().clone(), + next_zs: permutation_zs_right.as_ref().unwrap().clone(), + permutation_challenge_sets: challenges.permutation_challenge_sets, + }); + eval_vanishing_poly::( + &stark, + config, + vars, + permutation_data, + &mut consumer, + ); // TODO: Add in constraints for permutation arguments. let vanishing_polys_zeta = consumer.accumulators(); From c7af63957995d87c7527ceb0341bbfcc431dc413 Mon Sep 17 00:00:00 2001 From: Jakub Nabaglo Date: Mon, 21 Feb 2022 17:45:01 -0800 Subject: [PATCH 14/32] Restore vectorization to full Poseidon rounds on Aarch64 (#498) * Restore vectorization to full Poseidon layers on Aarch64 * Typos --- plonky2/src/hash/arch/aarch64/mod.rs | 4 +- .../arch/aarch64/poseidon_goldilocks_neon.rs | 588 +++++++----------- plonky2/src/hash/poseidon_goldilocks.rs | 28 +- 3 files changed, 250 insertions(+), 370 deletions(-) diff --git a/plonky2/src/hash/arch/aarch64/mod.rs b/plonky2/src/hash/arch/aarch64/mod.rs index ba86797d..b8ae14af 100644 --- a/plonky2/src/hash/arch/aarch64/mod.rs +++ b/plonky2/src/hash/arch/aarch64/mod.rs @@ -1,2 +1,2 @@ -// #[cfg(target_feature = "neon")] -// pub(crate) mod poseidon_goldilocks_neon; +#[cfg(target_feature = "neon")] +pub(crate) mod poseidon_goldilocks_neon; diff --git a/plonky2/src/hash/arch/aarch64/poseidon_goldilocks_neon.rs b/plonky2/src/hash/arch/aarch64/poseidon_goldilocks_neon.rs index f2276506..352456e7 100644 --- a/plonky2/src/hash/arch/aarch64/poseidon_goldilocks_neon.rs +++ b/plonky2/src/hash/arch/aarch64/poseidon_goldilocks_neon.rs @@ -2,37 +2,24 @@ use std::arch::aarch64::*; use std::arch::asm; +use std::mem::transmute; -use plonky2_field::field_types::Field64; use plonky2_field::goldilocks_field::GoldilocksField; use plonky2_util::branch_hint; use static_assertions::const_assert; use unroll::unroll_for_loops; -use crate::hash::poseidon::{ - Poseidon, ALL_ROUND_CONSTANTS, HALF_N_FULL_ROUNDS, N_PARTIAL_ROUNDS, N_ROUNDS, -}; +use crate::hash::poseidon::Poseidon; // ========================================== CONSTANTS =========================================== const WIDTH: usize = 12; -// The order below is arbitrary. Repeated coefficients have been removed so these constants fit in -// two registers. -// TODO: ensure this is aligned to 16 bytes (for vector loads), ideally on the same cacheline -const MDS_CONSTS: [u32; 8] = [ - 0xffffffff, - 1 << 1, - 1 << 3, - 1 << 5, - 1 << 8, - 1 << 10, - 1 << 12, - 1 << 16, -]; +const EPSILON: u64 = 0xffffffff; -// The round constants to be applied by the second set of full rounds. These are just the usual round constants, -// shifted by one round, with zeros shifted in. +// The round constants to be applied by the second set of full rounds. These are just the usual +// round constants, shifted by one round, with zeros shifted in. +/* const fn make_final_round_constants() -> [u64; WIDTH * HALF_N_FULL_ROUNDS] { let mut res = [0; WIDTH * HALF_N_FULL_ROUNDS]; let mut i: usize = 0; @@ -43,6 +30,7 @@ const fn make_final_round_constants() -> [u64; WIDTH * HALF_N_FULL_ROUNDS] { res } const FINAL_ROUND_CONSTANTS: [u64; WIDTH * HALF_N_FULL_ROUNDS] = make_final_round_constants(); +*/ // ===================================== COMPILE-TIME CHECKS ====================================== @@ -52,9 +40,12 @@ const FINAL_ROUND_CONSTANTS: [u64; WIDTH * HALF_N_FULL_ROUNDS] = make_final_roun const fn check_mds_matrix() -> bool { // Can't == two arrays in a const_assert! (: let mut i = 0; - let wanted_matrix_exps = [0, 0, 1, 0, 3, 5, 1, 8, 12, 3, 16, 10]; + let wanted_matrix_circ = [17, 15, 41, 16, 2, 28, 13, 13, 39, 18, 34, 20]; + let wanted_matrix_diag = [8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; while i < WIDTH { - if ::MDS_MATRIX_EXPS[i] != wanted_matrix_exps[i] { + if ::MDS_MATRIX_CIRC[i] != wanted_matrix_circ[i] + || ::MDS_MATRIX_DIAG[i] != wanted_matrix_diag[i] + { return false; } i += 1; @@ -63,37 +54,10 @@ const fn check_mds_matrix() -> bool { } const_assert!(check_mds_matrix()); -/// The maximum amount by which the MDS matrix will multiply the input. -/// i.e. max(MDS(state)) <= mds_matrix_inf_norm() * max(state). -const fn mds_matrix_inf_norm() -> u64 { - let mut cumul = 0; - let mut i = 0; - while i < WIDTH { - cumul += 1 << ::MDS_MATRIX_EXPS[i]; - i += 1; - } - cumul -} - -/// Ensure that adding round constants to the low result of the MDS multiplication can never -/// overflow. -#[allow(dead_code)] -const fn check_round_const_bounds_mds() -> bool { - let max_mds_res = mds_matrix_inf_norm() * (u32::MAX as u64); - let mut i = WIDTH; // First const layer is handled specially. - while i < WIDTH * N_ROUNDS { - if ALL_ROUND_CONSTANTS[i].overflowing_add(max_mds_res).1 { - return false; - } - i += 1; - } - true -} -const_assert!(check_round_const_bounds_mds()); - /// Ensure that the first WIDTH round constants are in canonical* form. This is required because /// the first constant layer does not handle double overflow. /// *: round_const == GoldilocksField::ORDER is safe. +/* #[allow(dead_code)] const fn check_round_const_bounds_init() -> bool { let mut i = 0; @@ -106,11 +70,9 @@ const fn check_round_const_bounds_init() -> bool { true } const_assert!(check_round_const_bounds_init()); - +*/ // ====================================== SCALAR ARITHMETIC ======================================= -const EPSILON: u64 = 0xffffffff; - /// Addition modulo ORDER accounting for wraparound. Correct only when a + b < 2**64 + ORDER. #[inline(always)] unsafe fn add_with_wraparound(a: u64, b: u64) -> u64 { @@ -133,7 +95,16 @@ unsafe fn add_with_wraparound(a: u64, b: u64) -> u64 { /// Subtraction of a and (b >> 32) modulo ORDER accounting for wraparound. #[inline(always)] unsafe fn sub_with_wraparound_lsr32(a: u64, b: u64) -> u64 { - let b_hi = b >> 32; + let mut b_hi = b >> 32; + // Make sure that LLVM emits two separate instructions for the shift and the subtraction. This + // reduces pressure on the execution units with access to the flags, as they are no longer + // responsible for the shift. The hack is to insert a fake computation between the two + // instructions with an `asm` block to make LLVM think that they can't be merged. + asm!( + "/* {0} */", // Make Rust think we're using the register. + inlateout(reg) b_hi, + options(nomem, nostack, preserves_flags, pure), + ); // This could be done with a.overflowing_add(b_hi), but `checked_sub` signals to the compiler // that overflow is unlikely (note: this is a standard library implementation detail, not part // of the spec). @@ -153,7 +124,8 @@ unsafe fn sub_with_wraparound_lsr32(a: u64, b: u64) -> u64 { unsafe fn mul_epsilon(x: u64) -> u64 { let res; asm!( - // Use UMULL to save one instruction. The compiler emits two: extract the low word and then multiply. + // Use UMULL to save one instruction. The compiler emits two: extract the low word and then + // multiply. "umull {res}, {x:w}, {epsilon:w}", x = in(reg) x, epsilon = in(reg) EPSILON, @@ -179,8 +151,9 @@ unsafe fn multiply(x: u64, y: u64) -> u64 { // ==================================== STANDALONE CONST LAYER ===================================== -/// Standalone const layer. Run only once, at the start of round 1. Remaining const layers are fused with the preceeding -/// MDS matrix multiplication. +/// Standalone const layer. Run only once, at the start of round 1. Remaining const layers are fused +/// with the preceeding MDS matrix multiplication. +/* #[inline(always)] #[unroll_for_loops] unsafe fn const_layer_full( @@ -195,15 +168,15 @@ unsafe fn const_layer_full( } state } - +*/ // ========================================== FULL ROUNDS ========================================== /// Full S-box. #[inline(always)] #[unroll_for_loops] unsafe fn sbox_layer_full(state: [u64; WIDTH]) -> [u64; WIDTH] { - // This is done in scalar. S-boxes in vector are only slightly slower throughput-wise but have an insane latency - // (~100 cycles) on the M1. + // This is done in scalar. S-boxes in vector are only slightly slower throughput-wise but have + // an insane latency (~100 cycles) on the M1. let mut state2 = [0u64; WIDTH]; assert!(WIDTH == 12); @@ -228,297 +201,227 @@ unsafe fn sbox_layer_full(state: [u64; WIDTH]) -> [u64; WIDTH] { state7 } -// Aliases for readability. E.g. MDS[5] can be found in mdsv5[MDSI5]. -const MDSI2: i32 = 1; // MDS[2] == 1 -const MDSI4: i32 = 2; // MDS[4] == 3 -const MDSI5: i32 = 3; // MDS[5] == 5 -const MDSI6: i32 = 1; // MDS[6] == 1 -const MDSI7: i32 = 0; // MDS[7] == 8 -const MDSI8: i32 = 2; // MDS[8] == 12 -const MDSI9: i32 = 2; // MDS[9] == 3 -const MDSI10: i32 = 3; // MDS[10] == 16 -const MDSI11: i32 = 1; // MDS[11] == 10 - #[inline(always)] unsafe fn mds_reduce( - [[cumul0_a, cumul0_b], [cumul1_a, cumul1_b]]: [[uint64x2_t; 2]; 2], + // `cumul_a` and `cumul_b` represent two separate field elements. We take advantage of + // vectorization by reducing them simultaneously. + [cumul_a, cumul_b]: [uint32x4_t; 2], ) -> uint64x2_t { - // mds_consts0 == [0xffffffff, 1 << 1, 1 << 3, 1 << 5] - let mds_consts0: uint32x4_t = vld1q_u32((&MDS_CONSTS[0..4]).as_ptr().cast::()); - - // Merge accumulators - let cumul0 = vaddq_u64(cumul0_a, cumul0_b); - let cumul1 = vaddq_u64(cumul1_a, cumul1_b); - - // Swizzle - let res_lo = vzip1q_u64(cumul0, cumul1); - let res_hi = vzip2q_u64(cumul0, cumul1); - - // Reduce from u96 - let res_hi = vsraq_n_u64::<32>(res_hi, res_lo); - let res_lo = vsliq_n_u64::<32>(res_lo, res_hi); - - // Extract high 32-bits. - let res_hi_hi = vget_low_u32(vuzp2q_u32( - vreinterpretq_u32_u64(res_hi), - vreinterpretq_u32_u64(res_hi), - )); - - // Multiply by EPSILON and accumulate. - let res_unadj = vmlal_laneq_u32::<0>(res_lo, res_hi_hi, mds_consts0); - let res_adj = vcgtq_u64(res_lo, res_unadj); - vsraq_n_u64::<32>(res_unadj, res_adj) + // Form: + // `lo = [cumul_a[0] + cumul_a[2] * 2**32, cumul_b[0] + cumul_b[2] * 2**32]` + // `hi = [cumul_a[1] + cumul_a[3] * 2**32, cumul_b[1] + cumul_b[3] * 2**32]` + // Observe that the result `== lo + hi * 2**16 (mod Goldilocks)`. + let mut lo = vreinterpretq_u64_u32(vuzp1q_u32(cumul_a, cumul_b)); + let mut hi = vreinterpretq_u64_u32(vuzp2q_u32(cumul_a, cumul_b)); + // Add the high 48 bits of `lo` to `hi`. This cannot overflow. + hi = vsraq_n_u64::<16>(hi, lo); + // Now, result `== lo.bits[0..16] + hi * 2**16 (mod Goldilocks)`. + // Set the high 48 bits of `lo` to the low 48 bits of `hi`. + lo = vsliq_n_u64::<16>(lo, hi); + // At this point, result `== lo + hi.bits[48..64] * 2**64 (mod Goldilocks)`. + // It remains to fold `hi.bits[48..64]` into `lo`. + let top = { + // Extract the top 16 bits of `hi` as a `u32`. + // Interpret `hi` as a vector of bytes, so we can use a table lookup instruction. + let hi_u8 = vreinterpretq_u8_u64(hi); + // Indices defining the permutation. `0xff` is out of bounds, producing `0`. + let top_idx = + transmute::<[u8; 8], uint8x8_t>([0x06, 0x07, 0xff, 0xff, 0x0e, 0x0f, 0xff, 0xff]); + let top_u8 = vqtbl1_u8(hi_u8, top_idx); + vreinterpret_u32_u8(top_u8) + }; + // result `== lo + top * 2**64 (mod Goldilocks)`. + let adj_lo = vmlal_n_u32(lo, top, EPSILON as u32); + let wraparound_mask = vcgtq_u64(lo, adj_lo); + vsraq_n_u64::<32>(adj_lo, wraparound_mask) // Add epsilon on overflow. } #[inline(always)] -unsafe fn mds_const_layers_full( - state: [u64; WIDTH], - round_constants: &[u64; WIDTH], -) -> [u64; WIDTH] { - // mds_consts0 == [0xffffffff, 1 << 1, 1 << 3, 1 << 5] - // mds_consts1 == [1 << 8, 1 << 10, 1 << 12, 1 << 16] - let mds_consts0: uint32x4_t = vld1q_u32((&MDS_CONSTS[0..4]).as_ptr().cast::()); - let mds_consts1: uint32x4_t = vld1q_u32((&MDS_CONSTS[4..8]).as_ptr().cast::()); +unsafe fn mds_layer_full(state: [u64; WIDTH]) -> [u64; WIDTH] { + // This function performs an MDS multiplication in complex FFT space. + // However, instead of performing a width-12 FFT, we perform three width-4 FFTs, which is + // cheaper. The 12x12 matrix-vector multiplication (a convolution) becomes two 3x3 real + // matrix-vector multiplications and one 3x3 complex matrix-vector multiplication. - // Aliases for readability. E.g. MDS[5] can be found in mdsv5[mdsi5]. MDS[0], MDS[1], and - // MDS[3] are 0, so they are not needed. - let mdsv2 = mds_consts0; // MDS[2] == 1 - let mdsv4 = mds_consts0; // MDS[4] == 3 - let mdsv5 = mds_consts0; // MDS[5] == 5 - let mdsv6 = mds_consts0; // MDS[6] == 1 - let mdsv7 = mds_consts1; // MDS[7] == 8 - let mdsv8 = mds_consts1; // MDS[8] == 12 - let mdsv9 = mds_consts0; // MDS[9] == 3 - let mdsv10 = mds_consts1; // MDS[10] == 16 - let mdsv11 = mds_consts1; // MDS[11] == 10 + // We split each 64-bit into four chunks of 16 bits. To prevent overflow, each chunk is 32 bits + // long. Each NEON vector below represents one field element and consists of four 32-bit chunks: + // `elem == vector[0] + vector[1] * 2**16 + vector[2] * 2**32 + vector[3] * 2**48`. - // For i even, we combine state[i] and state[i + 1] into one vector to save on registers. - // Thus, state1 actually contains state0 and state1 but is only used in the intrinsics that - // access the high high doubleword. - let state1: uint32x4_t = - vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[0]), vcreate_u64(state[1]))); - let state3: uint32x4_t = - vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[2]), vcreate_u64(state[3]))); - let state5: uint32x4_t = - vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[4]), vcreate_u64(state[5]))); - let state7: uint32x4_t = - vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[6]), vcreate_u64(state[7]))); - let state9: uint32x4_t = - vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[8]), vcreate_u64(state[9]))); - let state11: uint32x4_t = - vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[10]), vcreate_u64(state[11]))); - // state0 is an alias to the low doubleword of state1. The compiler should use one register for both. - let state0: uint32x2_t = vget_low_u32(state1); - let state2: uint32x2_t = vget_low_u32(state3); - let state4: uint32x2_t = vget_low_u32(state5); - let state6: uint32x2_t = vget_low_u32(state7); - let state8: uint32x2_t = vget_low_u32(state9); - let state10: uint32x2_t = vget_low_u32(state11); + // Constants that we multiply by. + let mut consts: uint32x4_t = transmute::<[u32; 4], _>([2, 4, 8, 16]); - // Two accumulators per output to hide latency. Each accumulator is a vector of two u64s, - // containing the result for the low 32 bits and the high 32 bits. Thus, the final result at - // index i is (cumuli_a[0] + cumuli_b[0]) + (cumuli_a[1] + cumuli_b[1]) * 2**32. + // Prevent LLVM from turning fused multiply (by power of 2)-add (1 instruction) into shift and + // add (two instructions). This fake `asm` block means that LLVM no longer knows the contents of + // `consts`. + asm!("/* {0:v} */", // Make Rust think the register is being used. + inout(vreg) consts, + options(pure, nomem, nostack, preserves_flags), + ); - // Start by loading the round constants. - let mut cumul0_a = vcombine_u64(vld1_u64(&round_constants[0]), vcreate_u64(0)); - let mut cumul1_a = vcombine_u64(vld1_u64(&round_constants[1]), vcreate_u64(0)); - let mut cumul2_a = vcombine_u64(vld1_u64(&round_constants[2]), vcreate_u64(0)); - let mut cumul3_a = vcombine_u64(vld1_u64(&round_constants[3]), vcreate_u64(0)); - let mut cumul4_a = vcombine_u64(vld1_u64(&round_constants[4]), vcreate_u64(0)); - let mut cumul5_a = vcombine_u64(vld1_u64(&round_constants[5]), vcreate_u64(0)); - let mut cumul6_a = vcombine_u64(vld1_u64(&round_constants[6]), vcreate_u64(0)); - let mut cumul7_a = vcombine_u64(vld1_u64(&round_constants[7]), vcreate_u64(0)); - let mut cumul8_a = vcombine_u64(vld1_u64(&round_constants[8]), vcreate_u64(0)); - let mut cumul9_a = vcombine_u64(vld1_u64(&round_constants[9]), vcreate_u64(0)); - let mut cumul10_a = vcombine_u64(vld1_u64(&round_constants[10]), vcreate_u64(0)); - let mut cumul11_a = vcombine_u64(vld1_u64(&round_constants[11]), vcreate_u64(0)); + // Four length-3 complex FFTs. + let mut state_fft = [vdupq_n_u32(0); 12]; + for i in 0..3 { + // Interpret each field element as a 4-vector of `u16`s. + let x0 = vcreate_u16(state[i]); + let x1 = vcreate_u16(state[i + 3]); + let x2 = vcreate_u16(state[i + 6]); + let x3 = vcreate_u16(state[i + 9]); - // Now the matrix multiplication. - // MDS exps: [0, 0, 1, 0, 3, 5, 1, 8, 12, 3, 16, 10] - // out[i] += in[j] << mds[j - i] + // `vaddl_u16` and `vsubl_u16` yield 4-vectors of `u32`s. + let y0 = vaddl_u16(x0, x2); + let y1 = vaddl_u16(x1, x3); + let y2 = vsubl_u16(x0, x2); + let y3 = vsubl_u16(x1, x3); - let mut cumul0_b = vshll_n_u32::<0>(state0); // MDS[0] - let mut cumul1_b = vshll_n_u32::<10>(state0); // MDS[11] - let mut cumul2_b = vshll_n_u32::<16>(state0); // MDS[10] - let mut cumul3_b = vshll_n_u32::<3>(state0); // MDS[9] - let mut cumul4_b = vshll_n_u32::<12>(state0); // MDS[8] - let mut cumul5_b = vshll_n_u32::<8>(state0); // MDS[7] - let mut cumul6_b = vshll_n_u32::<1>(state0); // MDS[6] - let mut cumul7_b = vshll_n_u32::<5>(state0); // MDS[5] - let mut cumul8_b = vshll_n_u32::<3>(state0); // MDS[4] - let mut cumul9_b = vshll_n_u32::<0>(state0); // MDS[3] - let mut cumul10_b = vshll_n_u32::<1>(state0); // MDS[2] - let mut cumul11_b = vshll_n_u32::<0>(state0); // MDS[1] + let z0 = vaddq_u32(y0, y1); + let z1 = vsubq_u32(y0, y1); + let z2 = y2; + let z3 = y3; - cumul0_a = vaddw_high_u32(cumul0_a, state1); // MDS[1] - cumul1_a = vaddw_high_u32(cumul1_a, state1); // MDS[0] - cumul2_a = vmlal_high_laneq_u32::(cumul2_a, state1, mdsv11); // MDS[11] - cumul3_a = vmlal_high_laneq_u32::(cumul3_a, state1, mdsv10); // MDS[10] - cumul4_a = vmlal_high_laneq_u32::(cumul4_a, state1, mdsv9); // MDS[9] - cumul5_a = vmlal_high_laneq_u32::(cumul5_a, state1, mdsv8); // MDS[8] - cumul6_a = vmlal_high_laneq_u32::(cumul6_a, state1, mdsv7); // MDS[7] - cumul7_a = vmlal_high_laneq_u32::(cumul7_a, state1, mdsv6); // MDS[6] - cumul8_a = vmlal_high_laneq_u32::(cumul8_a, state1, mdsv5); // MDS[5] - cumul9_a = vmlal_high_laneq_u32::(cumul9_a, state1, mdsv4); // MDS[4] - cumul10_a = vaddw_high_u32(cumul10_a, state1); // MDS[3] - cumul11_a = vmlal_high_laneq_u32::(cumul11_a, state1, mdsv2); // MDS[2] + // The FFT is `[z0, z2 + z3 i, z1, z2 - z3 i]`. - cumul0_b = vmlal_laneq_u32::(cumul0_b, state2, mdsv2); // MDS[2] - cumul1_b = vaddw_u32(cumul1_b, state2); // MDS[1] - cumul2_b = vaddw_u32(cumul2_b, state2); // MDS[0] - cumul3_b = vmlal_laneq_u32::(cumul3_b, state2, mdsv11); // MDS[11] - cumul4_b = vmlal_laneq_u32::(cumul4_b, state2, mdsv10); // MDS[10] - cumul5_b = vmlal_laneq_u32::(cumul5_b, state2, mdsv9); // MDS[9] - cumul6_b = vmlal_laneq_u32::(cumul6_b, state2, mdsv8); // MDS[8] - cumul7_b = vmlal_laneq_u32::(cumul7_b, state2, mdsv7); // MDS[7] - cumul8_b = vmlal_laneq_u32::(cumul8_b, state2, mdsv6); // MDS[6] - cumul9_b = vmlal_laneq_u32::(cumul9_b, state2, mdsv5); // MDS[5] - cumul10_b = vmlal_laneq_u32::(cumul10_b, state2, mdsv4); // MDS[4] - cumul11_b = vaddw_u32(cumul11_b, state2); // MDS[3] + state_fft[i] = z0; + state_fft[i + 3] = z1; + state_fft[i + 6] = z2; + state_fft[i + 9] = z3; + } - cumul0_a = vaddw_high_u32(cumul0_a, state3); // MDS[3] - cumul1_a = vmlal_high_laneq_u32::(cumul1_a, state3, mdsv2); // MDS[2] - cumul2_a = vaddw_high_u32(cumul2_a, state3); // MDS[1] - cumul3_a = vaddw_high_u32(cumul3_a, state3); // MDS[0] - cumul4_a = vmlal_high_laneq_u32::(cumul4_a, state3, mdsv11); // MDS[11] - cumul5_a = vmlal_high_laneq_u32::(cumul5_a, state3, mdsv10); // MDS[10] - cumul6_a = vmlal_high_laneq_u32::(cumul6_a, state3, mdsv9); // MDS[9] - cumul7_a = vmlal_high_laneq_u32::(cumul7_a, state3, mdsv8); // MDS[8] - cumul8_a = vmlal_high_laneq_u32::(cumul8_a, state3, mdsv7); // MDS[7] - cumul9_a = vmlal_high_laneq_u32::(cumul9_a, state3, mdsv6); // MDS[6] - cumul10_a = vmlal_high_laneq_u32::(cumul10_a, state3, mdsv5); // MDS[5] - cumul11_a = vmlal_high_laneq_u32::(cumul11_a, state3, mdsv4); // MDS[4] + // 3x3 real matrix-vector mul for component 0 of the FFTs. + // Multiply the vector `[x0, x1, x2]` by the matrix + // `[[ 64, 64, 128],` + // ` [128, 64, 64],` + // ` [ 64, 128, 64]]` + // The results are divided by 4 (this ends up cancelling out some later computations). + { + let x0 = state_fft[0]; + let x1 = state_fft[1]; + let x2 = state_fft[2]; - cumul0_b = vmlal_laneq_u32::(cumul0_b, state4, mdsv4); // MDS[4] - cumul1_b = vaddw_u32(cumul1_b, state4); // MDS[3] - cumul2_b = vmlal_laneq_u32::(cumul2_b, state4, mdsv2); // MDS[2] - cumul3_b = vaddw_u32(cumul3_b, state4); // MDS[1] - cumul4_b = vaddw_u32(cumul4_b, state4); // MDS[0] - cumul5_b = vmlal_laneq_u32::(cumul5_b, state4, mdsv11); // MDS[11] - cumul6_b = vmlal_laneq_u32::(cumul6_b, state4, mdsv10); // MDS[10] - cumul7_b = vmlal_laneq_u32::(cumul7_b, state4, mdsv9); // MDS[9] - cumul8_b = vmlal_laneq_u32::(cumul8_b, state4, mdsv8); // MDS[8] - cumul9_b = vmlal_laneq_u32::(cumul9_b, state4, mdsv7); // MDS[7] - cumul10_b = vmlal_laneq_u32::(cumul10_b, state4, mdsv6); // MDS[6] - cumul11_b = vmlal_laneq_u32::(cumul11_b, state4, mdsv5); // MDS[5] + let t = vshlq_n_u32::<4>(x0); + let u = vaddq_u32(x1, x2); - cumul0_a = vmlal_high_laneq_u32::(cumul0_a, state5, mdsv5); // MDS[5] - cumul1_a = vmlal_high_laneq_u32::(cumul1_a, state5, mdsv4); // MDS[4] - cumul2_a = vaddw_high_u32(cumul2_a, state5); // MDS[3] - cumul3_a = vmlal_high_laneq_u32::(cumul3_a, state5, mdsv2); // MDS[2] - cumul4_a = vaddw_high_u32(cumul4_a, state5); // MDS[1] - cumul5_a = vaddw_high_u32(cumul5_a, state5); // MDS[0] - cumul6_a = vmlal_high_laneq_u32::(cumul6_a, state5, mdsv11); // MDS[11] - cumul7_a = vmlal_high_laneq_u32::(cumul7_a, state5, mdsv10); // MDS[10] - cumul8_a = vmlal_high_laneq_u32::(cumul8_a, state5, mdsv9); // MDS[9] - cumul9_a = vmlal_high_laneq_u32::(cumul9_a, state5, mdsv8); // MDS[8] - cumul10_a = vmlal_high_laneq_u32::(cumul10_a, state5, mdsv7); // MDS[7] - cumul11_a = vmlal_high_laneq_u32::(cumul11_a, state5, mdsv6); // MDS[6] + let y0 = vshlq_n_u32::<4>(u); + let y1 = vmlaq_laneq_u32::<3>(t, x2, consts); + let y2 = vmlaq_laneq_u32::<3>(t, x1, consts); - cumul0_b = vmlal_laneq_u32::(cumul0_b, state6, mdsv6); // MDS[6] - cumul1_b = vmlal_laneq_u32::(cumul1_b, state6, mdsv5); // MDS[5] - cumul2_b = vmlal_laneq_u32::(cumul2_b, state6, mdsv4); // MDS[4] - cumul3_b = vaddw_u32(cumul3_b, state6); // MDS[3] - cumul4_b = vmlal_laneq_u32::(cumul4_b, state6, mdsv2); // MDS[2] - cumul5_b = vaddw_u32(cumul5_b, state6); // MDS[1] - cumul6_b = vaddw_u32(cumul6_b, state6); // MDS[0] - cumul7_b = vmlal_laneq_u32::(cumul7_b, state6, mdsv11); // MDS[11] - cumul8_b = vmlal_laneq_u32::(cumul8_b, state6, mdsv10); // MDS[10] - cumul9_b = vmlal_laneq_u32::(cumul9_b, state6, mdsv9); // MDS[9] - cumul10_b = vmlal_laneq_u32::(cumul10_b, state6, mdsv8); // MDS[8] - cumul11_b = vmlal_laneq_u32::(cumul11_b, state6, mdsv7); // MDS[7] + state_fft[0] = vaddq_u32(y0, y1); + state_fft[1] = vaddq_u32(y1, y2); + state_fft[2] = vaddq_u32(y0, y2); + } - cumul0_a = vmlal_high_laneq_u32::(cumul0_a, state7, mdsv7); // MDS[7] - cumul1_a = vmlal_high_laneq_u32::(cumul1_a, state7, mdsv6); // MDS[6] - cumul2_a = vmlal_high_laneq_u32::(cumul2_a, state7, mdsv5); // MDS[5] - cumul3_a = vmlal_high_laneq_u32::(cumul3_a, state7, mdsv4); // MDS[4] - cumul4_a = vaddw_high_u32(cumul4_a, state7); // MDS[3] - cumul5_a = vmlal_high_laneq_u32::(cumul5_a, state7, mdsv2); // MDS[2] - cumul6_a = vaddw_high_u32(cumul6_a, state7); // MDS[1] - cumul7_a = vaddw_high_u32(cumul7_a, state7); // MDS[0] - cumul8_a = vmlal_high_laneq_u32::(cumul8_a, state7, mdsv11); // MDS[11] - cumul9_a = vmlal_high_laneq_u32::(cumul9_a, state7, mdsv10); // MDS[10] - cumul10_a = vmlal_high_laneq_u32::(cumul10_a, state7, mdsv9); // MDS[9] - cumul11_a = vmlal_high_laneq_u32::(cumul11_a, state7, mdsv8); // MDS[8] + // 3x3 real matrix-vector mul for component 2 of the FFTs. + // Multiply the vector `[x0, x1, x2]` by the matrix + // `[[ -4, -8, 32],` + // ` [-32, -4, -8],` + // ` [ 8, -32, -4]]` + // The results are divided by 4 (this ends up cancelling out some later computations). + { + let x0 = state_fft[3]; + let x1 = state_fft[4]; + let x2 = state_fft[5]; + state_fft[3] = vmlsq_laneq_u32::<2>(vmlaq_laneq_u32::<0>(x0, x1, consts), x2, consts); + state_fft[4] = vmlaq_laneq_u32::<0>(vmlaq_laneq_u32::<2>(x1, x0, consts), x2, consts); + state_fft[5] = vmlsq_laneq_u32::<0>(x2, vmlsq_laneq_u32::<1>(x0, x1, consts), consts); + } - cumul0_b = vmlal_laneq_u32::(cumul0_b, state8, mdsv8); // MDS[8] - cumul1_b = vmlal_laneq_u32::(cumul1_b, state8, mdsv7); // MDS[7] - cumul2_b = vmlal_laneq_u32::(cumul2_b, state8, mdsv6); // MDS[6] - cumul3_b = vmlal_laneq_u32::(cumul3_b, state8, mdsv5); // MDS[5] - cumul4_b = vmlal_laneq_u32::(cumul4_b, state8, mdsv4); // MDS[4] - cumul5_b = vaddw_u32(cumul5_b, state8); // MDS[3] - cumul6_b = vmlal_laneq_u32::(cumul6_b, state8, mdsv2); // MDS[2] - cumul7_b = vaddw_u32(cumul7_b, state8); // MDS[1] - cumul8_b = vaddw_u32(cumul8_b, state8); // MDS[0] - cumul9_b = vmlal_laneq_u32::(cumul9_b, state8, mdsv11); // MDS[11] - cumul10_b = vmlal_laneq_u32::(cumul10_b, state8, mdsv10); // MDS[10] - cumul11_b = vmlal_laneq_u32::(cumul11_b, state8, mdsv9); // MDS[9] + // 3x3 complex matrix-vector mul for components 1 and 3 of the FFTs. + // Multiply the vector `[x0r + x0i i, x1r + x1i i, x2r + x2i i]` by the matrix + // `[[ 4 + 2i, 2 + 32i, 2 - 8i],` + // ` [-8 - 2i, 4 + 2i, 2 + 32i],` + // ` [32 - 2i, -8 - 2i, 4 + 2i]]` + // The results are divided by 2 (this ends up cancelling out some later computations). + { + let x0r = state_fft[6]; + let x1r = state_fft[7]; + let x2r = state_fft[8]; - cumul0_a = vmlal_high_laneq_u32::(cumul0_a, state9, mdsv9); // MDS[9] - cumul1_a = vmlal_high_laneq_u32::(cumul1_a, state9, mdsv8); // MDS[8] - cumul2_a = vmlal_high_laneq_u32::(cumul2_a, state9, mdsv7); // MDS[7] - cumul3_a = vmlal_high_laneq_u32::(cumul3_a, state9, mdsv6); // MDS[6] - cumul4_a = vmlal_high_laneq_u32::(cumul4_a, state9, mdsv5); // MDS[5] - cumul5_a = vmlal_high_laneq_u32::(cumul5_a, state9, mdsv4); // MDS[4] - cumul6_a = vaddw_high_u32(cumul6_a, state9); // MDS[3] - cumul7_a = vmlal_high_laneq_u32::(cumul7_a, state9, mdsv2); // MDS[2] - cumul8_a = vaddw_high_u32(cumul8_a, state9); // MDS[1] - cumul9_a = vaddw_high_u32(cumul9_a, state9); // MDS[0] - cumul10_a = vmlal_high_laneq_u32::(cumul10_a, state9, mdsv11); // MDS[11] - cumul11_a = vmlal_high_laneq_u32::(cumul11_a, state9, mdsv10); // MDS[10] + let x0i = state_fft[9]; + let x1i = state_fft[10]; + let x2i = state_fft[11]; - cumul0_b = vmlal_laneq_u32::(cumul0_b, state10, mdsv10); // MDS[10] - cumul1_b = vmlal_laneq_u32::(cumul1_b, state10, mdsv9); // MDS[9] - cumul2_b = vmlal_laneq_u32::(cumul2_b, state10, mdsv8); // MDS[8] - cumul3_b = vmlal_laneq_u32::(cumul3_b, state10, mdsv7); // MDS[7] - cumul4_b = vmlal_laneq_u32::(cumul4_b, state10, mdsv6); // MDS[6] - cumul5_b = vmlal_laneq_u32::(cumul5_b, state10, mdsv5); // MDS[5] - cumul6_b = vmlal_laneq_u32::(cumul6_b, state10, mdsv4); // MDS[4] - cumul7_b = vaddw_u32(cumul7_b, state10); // MDS[3] - cumul8_b = vmlal_laneq_u32::(cumul8_b, state10, mdsv2); // MDS[2] - cumul9_b = vaddw_u32(cumul9_b, state10); // MDS[1] - cumul10_b = vaddw_u32(cumul10_b, state10); // MDS[0] - cumul11_b = vmlal_laneq_u32::(cumul11_b, state10, mdsv11); // MDS[11] + // real part of result <- real part of input + let r0rr = vaddq_u32(vmlaq_laneq_u32::<0>(x1r, x0r, consts), x2r); + let r1rr = vmlaq_laneq_u32::<0>(x2r, vmlsq_laneq_u32::<0>(x1r, x0r, consts), consts); + let r2rr = vmlsq_laneq_u32::<0>(x2r, vmlsq_laneq_u32::<1>(x1r, x0r, consts), consts); - cumul0_a = vmlal_high_laneq_u32::(cumul0_a, state11, mdsv11); // MDS[11] - cumul1_a = vmlal_high_laneq_u32::(cumul1_a, state11, mdsv10); // MDS[10] - cumul2_a = vmlal_high_laneq_u32::(cumul2_a, state11, mdsv9); // MDS[9] - cumul3_a = vmlal_high_laneq_u32::(cumul3_a, state11, mdsv8); // MDS[8] - cumul4_a = vmlal_high_laneq_u32::(cumul4_a, state11, mdsv7); // MDS[7] - cumul5_a = vmlal_high_laneq_u32::(cumul5_a, state11, mdsv6); // MDS[6] - cumul6_a = vmlal_high_laneq_u32::(cumul6_a, state11, mdsv5); // MDS[5] - cumul7_a = vmlal_high_laneq_u32::(cumul7_a, state11, mdsv4); // MDS[4] - cumul8_a = vaddw_high_u32(cumul8_a, state11); // MDS[3] - cumul9_a = vmlal_high_laneq_u32::(cumul9_a, state11, mdsv2); // MDS[2] - cumul10_a = vaddw_high_u32(cumul10_a, state11); // MDS[1] - cumul11_a = vaddw_high_u32(cumul11_a, state11); // MDS[0] + // real part of result <- imaginary part of input + let r0ri = vmlsq_laneq_u32::<1>(vmlaq_laneq_u32::<3>(x0i, x1i, consts), x2i, consts); + let r1ri = vmlsq_laneq_u32::<3>(vsubq_u32(x0i, x1i), x2i, consts); + let r2ri = vsubq_u32(vaddq_u32(x0i, x1i), x2i); - let reduced = [ - mds_reduce([[cumul0_a, cumul0_b], [cumul1_a, cumul1_b]]), - mds_reduce([[cumul2_a, cumul2_b], [cumul3_a, cumul3_b]]), - mds_reduce([[cumul4_a, cumul4_b], [cumul5_a, cumul5_b]]), - mds_reduce([[cumul6_a, cumul6_b], [cumul7_a, cumul7_b]]), - mds_reduce([[cumul8_a, cumul8_b], [cumul9_a, cumul9_b]]), - mds_reduce([[cumul10_a, cumul10_b], [cumul11_a, cumul11_b]]), - ]; - [ - vgetq_lane_u64::<0>(reduced[0]), - vgetq_lane_u64::<1>(reduced[0]), - vgetq_lane_u64::<0>(reduced[1]), - vgetq_lane_u64::<1>(reduced[1]), - vgetq_lane_u64::<0>(reduced[2]), - vgetq_lane_u64::<1>(reduced[2]), - vgetq_lane_u64::<0>(reduced[3]), - vgetq_lane_u64::<1>(reduced[3]), - vgetq_lane_u64::<0>(reduced[4]), - vgetq_lane_u64::<1>(reduced[4]), - vgetq_lane_u64::<0>(reduced[5]), - vgetq_lane_u64::<1>(reduced[5]), - ] + // real part of result (total) + let r0r = vsubq_u32(r0rr, r0ri); + let r1r = vaddq_u32(r1rr, r1ri); + let r2r = vmlaq_laneq_u32::<0>(r2ri, r2rr, consts); + + // imaginary part of result <- real part of input + let r0ir = vmlsq_laneq_u32::<1>(vmlaq_laneq_u32::<3>(x0r, x1r, consts), x2r, consts); + let r1ir = vmlaq_laneq_u32::<3>(vsubq_u32(x1r, x0r), x2r, consts); + let r2ir = vsubq_u32(x2r, vaddq_u32(x0r, x1r)); + + // imaginary part of result <- imaginary part of input + let r0ii = vaddq_u32(vmlaq_laneq_u32::<0>(x1i, x0i, consts), x2i); + let r1ii = vmlaq_laneq_u32::<0>(x2i, vmlsq_laneq_u32::<0>(x1i, x0i, consts), consts); + let r2ii = vmlsq_laneq_u32::<0>(x2i, vmlsq_laneq_u32::<1>(x1i, x0i, consts), consts); + + // imaginary part of result (total) + let r0i = vaddq_u32(r0ir, r0ii); + let r1i = vaddq_u32(r1ir, r1ii); + let r2i = vmlaq_laneq_u32::<0>(r2ir, r2ii, consts); + + state_fft[6] = r0r; + state_fft[7] = r1r; + state_fft[8] = r2r; + + state_fft[9] = r0i; + state_fft[10] = r1i; + state_fft[11] = r2i; + } + + // Three length-4 inverse FFTs. + // Normally, such IFFT would divide by 4, but we've already taken care of that. + for i in 0..3 { + let z0 = state_fft[i]; + let z1 = state_fft[i + 3]; + let z2 = state_fft[i + 6]; + let z3 = state_fft[i + 9]; + + let y0 = vsubq_u32(z0, z1); + let y1 = vaddq_u32(z0, z1); + let y2 = z2; + let y3 = z3; + + let x0 = vaddq_u32(y0, y2); + let x1 = vaddq_u32(y1, y3); + let x2 = vsubq_u32(y0, y2); + let x3 = vsubq_u32(y1, y3); + + state_fft[i] = x0; + state_fft[i + 3] = x1; + state_fft[i + 6] = x2; + state_fft[i + 9] = x3; + } + + // Perform `res[0] += state[0] * 8` for the diagonal component of the MDS matrix. + state_fft[0] = vmlal_laneq_u16::<4>( + state_fft[0], + vcreate_u16(state[0]), // Each 16-bit chunk gets zero-extended. + vreinterpretq_u16_u32(consts), // Hack: these constants fit in `u16s`, so we can bit-cast. + ); + + let mut res_arr = [0; 12]; + for i in 0..6 { + let res = mds_reduce([state_fft[2 * i], state_fft[2 * i + 1]]); + res_arr[2 * i] = vgetq_lane_u64::<0>(res); + res_arr[2 * i + 1] = vgetq_lane_u64::<1>(res); + } + + res_arr } // ======================================== PARTIAL ROUNDS ========================================= +/* #[rustfmt::skip] macro_rules! mds_reduce_asm { ($c0:literal, $c1:literal, $out:literal, $consts:literal) => { @@ -961,13 +864,15 @@ unsafe fn partial_round( [res23, res45, res67, res89, res1011], ) } +*/ // ========================================== GLUE CODE =========================================== +/* #[inline(always)] unsafe fn full_round(state: [u64; 12], round_constants: &[u64; WIDTH]) -> [u64; 12] { let state = sbox_layer_full(state); - mds_const_layers_full(state, round_constants) + mds_layer_full(state, round_constants) } #[inline] @@ -1001,43 +906,19 @@ unsafe fn partial_rounds( } state.0 } +*/ #[inline(always)] fn unwrap_state(state: [GoldilocksField; 12]) -> [u64; 12] { - [ - state[0].0, - state[1].0, - state[2].0, - state[3].0, - state[4].0, - state[5].0, - state[6].0, - state[7].0, - state[8].0, - state[9].0, - state[10].0, - state[11].0, - ] + state.map(|s| s.0) } #[inline(always)] fn wrap_state(state: [u64; 12]) -> [GoldilocksField; 12] { - [ - GoldilocksField(state[0]), - GoldilocksField(state[1]), - GoldilocksField(state[2]), - GoldilocksField(state[3]), - GoldilocksField(state[4]), - GoldilocksField(state[5]), - GoldilocksField(state[6]), - GoldilocksField(state[7]), - GoldilocksField(state[8]), - GoldilocksField(state[9]), - GoldilocksField(state[10]), - GoldilocksField(state[11]), - ] + state.map(GoldilocksField) } +/* #[inline(always)] pub unsafe fn poseidon(state: [GoldilocksField; 12]) -> [GoldilocksField; 12] { let state = unwrap_state(state); @@ -1058,6 +939,7 @@ pub unsafe fn poseidon(state: [GoldilocksField; 12]) -> [GoldilocksField; 12] { let state = full_rounds(state, &FINAL_ROUND_CONSTANTS); wrap_state(state) } +*/ #[inline(always)] pub unsafe fn sbox_layer(state: &mut [GoldilocksField; WIDTH]) { @@ -1067,8 +949,6 @@ pub unsafe fn sbox_layer(state: &mut [GoldilocksField; WIDTH]) { #[inline(always)] pub unsafe fn mds_layer(state: &[GoldilocksField; WIDTH]) -> [GoldilocksField; WIDTH] { let state = unwrap_state(*state); - // We want to do an MDS layer without the constant layer. - let round_consts = [0u64; WIDTH]; - let state = mds_const_layers_full(state, &round_consts); + let state = mds_layer_full(state); wrap_state(state) } diff --git a/plonky2/src/hash/poseidon_goldilocks.rs b/plonky2/src/hash/poseidon_goldilocks.rs index 971fda0f..177b30ff 100644 --- a/plonky2/src/hash/poseidon_goldilocks.rs +++ b/plonky2/src/hash/poseidon_goldilocks.rs @@ -252,21 +252,21 @@ impl Poseidon for GoldilocksField { // } // } - // #[cfg(all(target_arch="aarch64", target_feature="neon"))] - // #[inline(always)] - // fn sbox_layer(state: &mut [Self; 12]) { - // unsafe { - // crate::hash::arch::aarch64::poseidon_goldilocks_neon::sbox_layer(state); - // } - // } + #[cfg(all(target_arch="aarch64", target_feature="neon"))] + #[inline(always)] + fn sbox_layer(state: &mut [Self; 12]) { + unsafe { + crate::hash::arch::aarch64::poseidon_goldilocks_neon::sbox_layer(state); + } + } - // #[cfg(all(target_arch="aarch64", target_feature="neon"))] - // #[inline(always)] - // fn mds_layer(state: &[Self; 12]) -> [Self; 12] { - // unsafe { - // crate::hash::arch::aarch64::poseidon_goldilocks_neon::mds_layer(state) - // } - // } + #[cfg(all(target_arch="aarch64", target_feature="neon"))] + #[inline(always)] + fn mds_layer(state: &[Self; 12]) -> [Self; 12] { + unsafe { + crate::hash::arch::aarch64::poseidon_goldilocks_neon::mds_layer(state) + } + } } #[cfg(test)] From 56e269e27abaa50f33493b06cc105240e8cdd334 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Tue, 22 Feb 2022 10:37:08 +0100 Subject: [PATCH 15/32] Working (not recursively) --- starky/src/fibonacci_stark.rs | 3 --- starky/src/lib.rs | 1 + starky/src/permutation.rs | 24 ++++++++++++++---------- starky/src/prover.rs | 4 ++-- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/starky/src/fibonacci_stark.rs b/starky/src/fibonacci_stark.rs index 2bbd333f..6ffbe858 100644 --- a/starky/src/fibonacci_stark.rs +++ b/starky/src/fibonacci_stark.rs @@ -2,12 +2,9 @@ use std::marker::PhantomData; use plonky2::field::extension_field::{Extendable, FieldExtension}; use plonky2::field::packed_field::PackedField; -use plonky2::fri::structure::{FriInstanceInfo, FriInstanceInfoTarget}; use plonky2::hash::hash_types::RichField; -use plonky2::iop::ext_target::ExtensionTarget; use plonky2::plonk::circuit_builder::CircuitBuilder; -use crate::config::StarkConfig; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use crate::permutation::PermutationPair; use crate::stark::Stark; diff --git a/starky/src/lib.rs b/starky/src/lib.rs index 51a73479..8249d90b 100644 --- a/starky/src/lib.rs +++ b/starky/src/lib.rs @@ -3,6 +3,7 @@ #![allow(unused_variables)] #![allow(incomplete_features)] #![allow(clippy::too_many_arguments)] +#![allow(clippy::type_complexity)] #![feature(generic_const_exprs)] pub mod config; diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs index 8a33eb41..75fa8400 100644 --- a/starky/src/permutation.rs +++ b/starky/src/permutation.rs @@ -62,7 +62,7 @@ where let permutation_pairs = stark.permutation_pairs(); let permutation_batches = get_permutation_batches( &permutation_pairs, - &permutation_challenge_sets, + permutation_challenge_sets, config.num_challenges, stark.permutation_batch_size(), ); @@ -234,15 +234,19 @@ pub(crate) fn eval_permutation_checks Date: Tue, 22 Feb 2022 10:46:51 +0100 Subject: [PATCH 16/32] Fill permutation todos --- starky/src/recursive_verifier.rs | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/starky/src/recursive_verifier.rs b/starky/src/recursive_verifier.rs index ea7ffb70..c1071b5a 100644 --- a/starky/src/recursive_verifier.rs +++ b/starky/src/recursive_verifier.rs @@ -187,24 +187,29 @@ pub fn add_virtual_stark_proof, S: Stark, con let fri_params = config.fri_params(degree_bits); let cap_height = fri_params.config.cap_height; - let num_leaves_per_oracle = &[ - S::COLUMNS, - // TODO: permutation polys - stark.quotient_degree_factor() * config.num_challenges, - ]; - - let permutation_zs_cap = if stark.uses_permutation_args() { - Some(builder.add_virtual_cap(cap_height)) + let num_leaves_per_oracle = if stark.uses_permutation_args() { + vec![ + S::COLUMNS, + stark.num_permutation_batches(config), + stark.quotient_degree_factor() * config.num_challenges, + ] } else { - None + vec![ + S::COLUMNS, + stark.quotient_degree_factor() * config.num_challenges, + ] }; + let permutation_zs_cap = stark + .uses_permutation_args() + .then(|| builder.add_virtual_cap(cap_height)); + StarkProofTarget { trace_cap: builder.add_virtual_cap(cap_height), permutation_zs_cap, quotient_polys_cap: builder.add_virtual_cap(cap_height), openings: add_stark_opening_set::(builder, stark, config), - opening_proof: builder.add_virtual_fri_proof(num_leaves_per_oracle, &fri_params), + opening_proof: builder.add_virtual_fri_proof(&num_leaves_per_oracle, &fri_params), } } @@ -217,8 +222,10 @@ fn add_stark_opening_set, S: Stark, const D: StarkOpeningSetTarget { local_values: builder.add_virtual_extension_targets(S::COLUMNS), next_values: builder.add_virtual_extension_targets(S::COLUMNS), - permutation_zs: vec![/*TODO*/], - permutation_zs_right: vec![/*TODO*/], + permutation_zs: builder + .add_virtual_extension_targets(stark.num_permutation_batches(config)), + permutation_zs_right: builder + .add_virtual_extension_targets(stark.num_permutation_batches(config)), quotient_polys: builder .add_virtual_extension_targets(stark.quotient_degree_factor() * num_challenges), } From 6cd2fc62b57714357b6e6dd21ed085dbb2f95b46 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Tue, 22 Feb 2022 11:44:24 +0100 Subject: [PATCH 17/32] Should work (does not) --- plonky2/src/iop/challenger.rs | 2 +- starky/src/fibonacci_stark.rs | 2 +- starky/src/get_challenges.rs | 42 ++++++-- starky/src/permutation.rs | 160 +++++++++++++++++++++++++++---- starky/src/proof.rs | 29 +++--- starky/src/recursive_verifier.rs | 37 +++++-- starky/src/vanishing_poly.rs | 48 +++++++--- starky/src/verifier.rs | 1 - 8 files changed, 260 insertions(+), 61 deletions(-) diff --git a/plonky2/src/iop/challenger.rs b/plonky2/src/iop/challenger.rs index c3a4403a..5b374834 100644 --- a/plonky2/src/iop/challenger.rs +++ b/plonky2/src/iop/challenger.rs @@ -208,7 +208,7 @@ impl, H: AlgebraicHasher, const D: usize> } } - pub(crate) fn get_challenge(&mut self, builder: &mut CircuitBuilder) -> Target { + pub fn get_challenge(&mut self, builder: &mut CircuitBuilder) -> Target { self.absorb_buffered_inputs(builder); if self.output_buffer.is_empty() { diff --git a/starky/src/fibonacci_stark.rs b/starky/src/fibonacci_stark.rs index 6ffbe858..10b54d69 100644 --- a/starky/src/fibonacci_stark.rs +++ b/starky/src/fibonacci_stark.rs @@ -234,7 +234,7 @@ mod tests { let pt = add_virtual_stark_proof_with_pis(&mut builder, stark, inner_config, degree_bits); set_stark_proof_with_pis_target(&mut pw, &pt, &inner_proof); - recursively_verify_stark_proof::(&mut builder, stark, pt, inner_config); + recursively_verify_stark_proof::(&mut builder, stark, pt, inner_config)?; if print_gate_counts { builder.print_gate_counts(0); diff --git a/starky/src/get_challenges.rs b/starky/src/get_challenges.rs index 1cb1e633..8ee71667 100644 --- a/starky/src/get_challenges.rs +++ b/starky/src/get_challenges.rs @@ -11,7 +11,9 @@ use plonky2::plonk::circuit_builder::CircuitBuilder; use plonky2::plonk::config::{AlgebraicHasher, GenericConfig}; use crate::config::StarkConfig; -use crate::permutation::get_n_permutation_challenge_sets; +use crate::permutation::{ + get_n_permutation_challenge_sets, get_n_permutation_challenge_sets_target, +}; use crate::proof::*; use crate::stark::Stark; @@ -131,9 +133,11 @@ where pub(crate) fn get_challenges_target< F: RichField + Extendable, C: GenericConfig, + S: Stark, const D: usize, >( builder: &mut CircuitBuilder, + stark: &S, trace_cap: &MerkleCapTarget, permutation_zs_cap: Option<&MerkleCapTarget>, quotient_polys_cap: &MerkleCapTarget, @@ -142,7 +146,7 @@ pub(crate) fn get_challenges_target< final_poly: &PolynomialCoeffsExtTarget, pow_witness: Target, config: &StarkConfig, -) -> StarkProofChallengesTarget +) -> Result> where C::Hasher: AlgebraicHasher, { @@ -151,6 +155,23 @@ where let mut challenger = RecursiveChallenger::::new(builder); challenger.observe_cap(trace_cap); + + let permutation_challenge_sets = if stark.uses_permutation_args() { + get_n_permutation_challenge_sets_target( + builder, + &mut challenger, + num_challenges, + stark.permutation_batch_size(), + ) + } else { + vec![] + }; + if stark.uses_permutation_args() { + let cap = + permutation_zs_cap.ok_or_else(|| anyhow::Error::msg("expected permutation_zs_cap")); + challenger.observe_cap(cap?); + } + let stark_alphas = challenger.get_n_challenges(builder, num_challenges); challenger.observe_cap(quotient_polys_cap); @@ -158,7 +179,8 @@ where challenger.observe_openings(&openings.to_fri_openings()); - StarkProofChallengesTarget { + Ok(StarkProofChallengesTarget { + permutation_challenge_sets, stark_alphas, stark_zeta, fri_challenges: challenger.fri_challenges::( @@ -168,15 +190,20 @@ where pow_witness, &config.fri_config, ), - } + }) } impl StarkProofWithPublicInputsTarget { - pub(crate) fn get_challenges, C: GenericConfig>( + pub(crate) fn get_challenges< + F: RichField + Extendable, + C: GenericConfig, + S: Stark, + >( &self, builder: &mut CircuitBuilder, + stark: &S, config: &StarkConfig, - ) -> StarkProofChallengesTarget + ) -> Result> where C::Hasher: AlgebraicHasher, { @@ -194,8 +221,9 @@ impl StarkProofWithPublicInputsTarget { }, } = &self.proof; - get_challenges_target::( + get_challenges_target::( builder, + stark, trace_cap, permutation_zs_cap.as_ref(), quotient_polys_cap, diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs index 75fa8400..1113094d 100644 --- a/starky/src/permutation.rs +++ b/starky/src/permutation.rs @@ -6,14 +6,17 @@ use plonky2::field::extension_field::{Extendable, FieldExtension}; use plonky2::field::field_types::Field; use plonky2::field::polynomial::PolynomialValues; use plonky2::hash::hash_types::RichField; -use plonky2::iop::challenger::Challenger; -use plonky2::plonk::config::{GenericConfig, Hasher}; +use plonky2::iop::challenger::{Challenger, RecursiveChallenger}; +use plonky2::iop::ext_target::ExtensionTarget; +use plonky2::iop::target::Target; +use plonky2::plonk::circuit_builder::CircuitBuilder; +use plonky2::plonk::config::{AlgebraicHasher, GenericConfig, Hasher}; use rayon::prelude::*; use crate::config::StarkConfig; -use crate::constraint_consumer::ConstraintConsumer; +use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use crate::stark::Stark; -use crate::vars::StarkEvaluationVars; +use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; /// A pair of lists of columns, `lhs` and `rhs`, that should be permutations of one another. /// In particular, there should exist some permutation `pi` such that for any `i`, @@ -26,24 +29,24 @@ pub struct PermutationPair { } /// A single instance of a permutation check protocol. -pub(crate) struct PermutationInstance<'a, F: Field> { +pub(crate) struct PermutationInstance<'a, T: Copy> { pub(crate) pair: &'a PermutationPair, - pub(crate) challenge: PermutationChallenge, + pub(crate) challenge: PermutationChallenge, } /// Randomness for a single instance of a permutation check protocol. #[derive(Copy, Clone)] -pub(crate) struct PermutationChallenge { +pub(crate) struct PermutationChallenge { /// Randomness used to combine multiple columns into one. - pub(crate) beta: F, + pub(crate) beta: T, /// Random offset that's added to the beta-reduced column values. - pub(crate) gamma: F, + pub(crate) gamma: T, } /// Like `PermutationChallenge`, but with `num_challenges` copies to boost soundness. #[derive(Clone)] -pub(crate) struct PermutationChallengeSet { - pub(crate) challenges: Vec>, +pub(crate) struct PermutationChallengeSet { + pub(crate) challenges: Vec>, } /// Compute all Z polynomials (for permutation arguments). @@ -163,17 +166,60 @@ pub(crate) fn get_n_permutation_challenge_sets>( .collect() } +fn get_permutation_challenge_target< + F: RichField + Extendable, + H: AlgebraicHasher, + const D: usize, +>( + builder: &mut CircuitBuilder, + challenger: &mut RecursiveChallenger, +) -> PermutationChallenge { + let beta = challenger.get_challenge(builder); + let gamma = challenger.get_challenge(builder); + PermutationChallenge { beta, gamma } +} + +fn get_permutation_challenge_set_target< + F: RichField + Extendable, + H: AlgebraicHasher, + const D: usize, +>( + builder: &mut CircuitBuilder, + challenger: &mut RecursiveChallenger, + num_challenges: usize, +) -> PermutationChallengeSet { + let challenges = (0..num_challenges) + .map(|_| get_permutation_challenge_target(builder, challenger)) + .collect(); + PermutationChallengeSet { challenges } +} + +pub(crate) fn get_n_permutation_challenge_sets_target< + F: RichField + Extendable, + H: AlgebraicHasher, + const D: usize, +>( + builder: &mut CircuitBuilder, + challenger: &mut RecursiveChallenger, + num_challenges: usize, + num_sets: usize, +) -> Vec> { + (0..num_sets) + .map(|_| get_permutation_challenge_set_target(builder, challenger, num_challenges)) + .collect() +} + /// Get a list of instances of our batch-permutation argument. These are permutation arguments /// where the same `Z(x)` polynomial is used to check more than one permutation. /// Before batching, each permutation pair leads to `num_challenges` permutation arguments, so we /// start with the cartesian product of `permutation_pairs` and `0..num_challenges`. Then we /// chunk these arguments based on our batch size. -pub(crate) fn get_permutation_batches<'a, F: Field>( +pub(crate) fn get_permutation_batches<'a, T: Copy>( permutation_pairs: &'a [PermutationPair], - permutation_challenge_sets: &[PermutationChallengeSet], + permutation_challenge_sets: &[PermutationChallengeSet], num_challenges: usize, batch_size: usize, -) -> Vec>> { +) -> Vec>> { permutation_pairs .iter() .cartesian_product(0..num_challenges) @@ -202,10 +248,8 @@ pub(crate) fn eval_permutation_checks, - local_zs: &[FE], - next_zs: &[FE], + permutation_data: PermutationCheckData, consumer: &mut ConstraintConsumer, - permutation_challenge_sets: &[PermutationChallengeSet], ) where F: RichField + Extendable, FE: FieldExtension, @@ -214,12 +258,17 @@ pub(crate) fn eval_permutation_checks { + pub(crate) local_zs: Vec>, + pub(crate) next_zs: Vec>, + pub(crate) permutation_challenge_sets: Vec>, +} + +pub(crate) fn eval_permutation_checks_recursively( + builder: &mut CircuitBuilder, + stark: &S, + config: &StarkConfig, + vars: StarkEvaluationTargets, + permutation_data: PermutationCheckDataTarget, + consumer: &mut RecursiveConstraintConsumer, +) where + F: RichField + Extendable, + S: Stark, + [(); S::COLUMNS]:, + [(); S::PUBLIC_INPUTS]:, +{ + let PermutationCheckDataTarget { + local_zs, + next_zs, + permutation_challenge_sets, + } = permutation_data; + // TODO: Z_1 check. + let permutation_pairs = stark.permutation_pairs(); + + let permutation_batches = get_permutation_batches( + &permutation_pairs, + &permutation_challenge_sets, + config.num_challenges, + stark.permutation_batch_size(), + ); + + // Each zs value corresponds to a permutation batch. + for (i, instances) in permutation_batches.iter().enumerate() { + // Z(gx) * down = Z x * up + let (reduced_lhs, reduced_rhs): (Vec>, Vec>) = + instances + .iter() + .map(|instance| { + let PermutationInstance { + pair: PermutationPair { column_pairs }, + challenge: PermutationChallenge { beta, gamma }, + } = instance; + let zero = builder.zero_extension(); + let beta_ext = builder.convert_to_ext(*beta); + let gamma_ext = builder.convert_to_ext(*gamma); + let mut reduced = + column_pairs + .iter() + .rev() + .fold((zero, zero), |(lhs, rhs), &(i, j)| { + ( + builder.mul_add_extension(lhs, beta_ext, vars.local_values[i]), + builder.mul_add_extension(rhs, beta_ext, vars.local_values[j]), + ) + }); + reduced.0 = builder.add_extension(reduced.0, gamma_ext); + reduced.1 = builder.add_extension(reduced.1, gamma_ext); + reduced + }) + .unzip(); + let reduced_lhs_product = builder.mul_many_extension(&reduced_lhs); + let reduced_rhs_product = builder.mul_many_extension(&reduced_rhs); + // constraint = next_zs[i] * reduced_rhs_product - local_zs[i] * reduced_lhs_product + let constraint = { + let tmp = builder.mul_extension(local_zs[i], reduced_lhs_product); + builder.mul_sub_extension(next_zs[i], reduced_rhs_product, tmp) + }; + consumer.constraint(builder, constraint) + } +} diff --git a/starky/src/proof.rs b/starky/src/proof.rs index 4807b443..d1f86d7e 100644 --- a/starky/src/proof.rs +++ b/starky/src/proof.rs @@ -113,6 +113,7 @@ pub(crate) struct StarkProofChallenges, const D: us } pub(crate) struct StarkProofChallengesTarget { + pub permutation_challenge_sets: Vec>, pub stark_alphas: Vec, pub stark_zeta: ExtensionTarget, pub fri_challenges: FriChallengesTarget, @@ -179,27 +180,29 @@ impl, const D: usize> StarkOpeningSet { pub struct StarkOpeningSetTarget { pub local_values: Vec>, pub next_values: Vec>, - pub permutation_zs: Vec>, - pub permutation_zs_right: Vec>, + pub permutation_zs: Option>>, + pub permutation_zs_right: Option>>, pub quotient_polys: Vec>, } impl StarkOpeningSetTarget { pub(crate) fn to_fri_openings(&self) -> FriOpeningsTarget { let zeta_batch = FriOpeningBatchTarget { - values: [ - self.local_values.as_slice(), - self.quotient_polys.as_slice(), - self.permutation_zs.as_slice(), - ] - .concat(), + values: self + .local_values + .iter() + .chain(self.permutation_zs.iter().flatten()) + .chain(&self.quotient_polys) + .copied() + .collect_vec(), }; let zeta_right_batch = FriOpeningBatchTarget { - values: [ - self.next_values.as_slice(), - self.permutation_zs_right.as_slice(), - ] - .concat(), + values: self + .next_values + .iter() + .chain(self.permutation_zs_right.iter().flatten()) + .copied() + .collect_vec(), }; FriOpeningsTarget { batches: vec![zeta_batch, zeta_right_batch], diff --git a/starky/src/recursive_verifier.rs b/starky/src/recursive_verifier.rs index c1071b5a..cc547396 100644 --- a/starky/src/recursive_verifier.rs +++ b/starky/src/recursive_verifier.rs @@ -1,5 +1,6 @@ use std::iter::once; +use anyhow::Result; use itertools::Itertools; use plonky2::field::extension_field::Extendable; use plonky2::field::field_types::Field; @@ -13,11 +14,13 @@ use plonky2::util::reducing::ReducingFactorTarget; use crate::config::StarkConfig; use crate::constraint_consumer::RecursiveConstraintConsumer; +use crate::permutation::PermutationCheckDataTarget; use crate::proof::{ StarkOpeningSetTarget, StarkProof, StarkProofChallengesTarget, StarkProofTarget, StarkProofWithPublicInputs, StarkProofWithPublicInputsTarget, }; use crate::stark::Stark; +use crate::vanishing_poly::eval_vanishing_poly_recursively; use crate::vars::StarkEvaluationTargets; pub fn recursively_verify_stark_proof< @@ -30,14 +33,15 @@ pub fn recursively_verify_stark_proof< stark: S, proof_with_pis: StarkProofWithPublicInputsTarget, inner_config: &StarkConfig, -) where +) -> Result<()> +where C::Hasher: AlgebraicHasher, [(); S::COLUMNS]:, [(); S::PUBLIC_INPUTS]:, { assert_eq!(proof_with_pis.public_inputs.len(), S::PUBLIC_INPUTS); let degree_bits = proof_with_pis.proof.recover_degree_bits(inner_config); - let challenges = proof_with_pis.get_challenges::(builder, inner_config); + let challenges = proof_with_pis.get_challenges::(builder, &stark, inner_config)?; recursively_verify_stark_proof_with_challenges::( builder, @@ -47,6 +51,8 @@ pub fn recursively_verify_stark_proof< inner_config, degree_bits, ); + + Ok(()) } /// Recursively verifies an inner proof. @@ -104,8 +110,21 @@ fn recursively_verify_stark_proof_with_challenges< l_1, l_last, ); - stark.eval_ext_recursively(builder, vars, &mut consumer); - // TODO: Add in constraints for permutation arguments. + let permutation_data = stark + .uses_permutation_args() + .then(|| PermutationCheckDataTarget { + local_zs: permutation_zs.as_ref().unwrap().clone(), + next_zs: permutation_zs_right.as_ref().unwrap().clone(), + permutation_challenge_sets: challenges.permutation_challenge_sets, + }); + eval_vanishing_poly_recursively::( + builder, + &stark, + inner_config, + vars, + permutation_data, + &mut consumer, + ); let vanishing_polys_zeta = consumer.accumulators(); // Check each polynomial identity, of the form `vanishing(x) = Z_H(x) quotient(x)`, at zeta. @@ -222,10 +241,12 @@ fn add_stark_opening_set, S: Stark, const D: StarkOpeningSetTarget { local_values: builder.add_virtual_extension_targets(S::COLUMNS), next_values: builder.add_virtual_extension_targets(S::COLUMNS), - permutation_zs: builder - .add_virtual_extension_targets(stark.num_permutation_batches(config)), - permutation_zs_right: builder - .add_virtual_extension_targets(stark.num_permutation_batches(config)), + permutation_zs: stark + .uses_permutation_args() + .then(|| builder.add_virtual_extension_targets(stark.num_permutation_batches(config))), + permutation_zs_right: stark + .uses_permutation_args() + .then(|| builder.add_virtual_extension_targets(stark.num_permutation_batches(config))), quotient_polys: builder .add_virtual_extension_targets(stark.quotient_degree_factor() * num_challenges), } diff --git a/starky/src/vanishing_poly.rs b/starky/src/vanishing_poly.rs index dc598167..55ea7a5a 100644 --- a/starky/src/vanishing_poly.rs +++ b/starky/src/vanishing_poly.rs @@ -1,12 +1,16 @@ use plonky2::field::extension_field::{Extendable, FieldExtension}; use plonky2::hash::hash_types::RichField; +use plonky2::plonk::circuit_builder::CircuitBuilder; use plonky2::plonk::config::GenericConfig; use crate::config::StarkConfig; -use crate::constraint_consumer::ConstraintConsumer; -use crate::permutation::{eval_permutation_checks, PermutationCheckData}; +use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; +use crate::permutation::{ + eval_permutation_checks, eval_permutation_checks_recursively, PermutationCheckData, + PermutationCheckDataTarget, +}; use crate::stark::Stark; -use crate::vars::StarkEvaluationVars; +use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; pub(crate) fn eval_vanishing_poly( stark: &S, @@ -23,20 +27,40 @@ pub(crate) fn eval_vanishing_poly( [(); S::PUBLIC_INPUTS]:, { stark.eval_packed_generic(vars, consumer); - if let Some(PermutationCheckData { - local_zs, - next_zs, - permutation_challenge_sets, - }) = permutation_data - { + if let Some(permutation_data) = permutation_data { eval_permutation_checks::( stark, config, vars, - &local_zs, - &next_zs, + permutation_data, + consumer, + ); + } +} + +pub(crate) fn eval_vanishing_poly_recursively( + builder: &mut CircuitBuilder, + stark: &S, + config: &StarkConfig, + vars: StarkEvaluationTargets, + permutation_data: Option>, + consumer: &mut RecursiveConstraintConsumer, +) where + F: RichField + Extendable, + C: GenericConfig, + S: Stark, + [(); S::COLUMNS]:, + [(); S::PUBLIC_INPUTS]:, +{ + stark.eval_ext_recursively(builder, vars, consumer); + if let Some(permutation_data) = permutation_data { + eval_permutation_checks_recursively::( + builder, + stark, + config, + vars, + permutation_data, consumer, - &permutation_challenge_sets, ); } } diff --git a/starky/src/verifier.rs b/starky/src/verifier.rs index 1603b208..6bb1ac4e 100644 --- a/starky/src/verifier.rs +++ b/starky/src/verifier.rs @@ -90,7 +90,6 @@ where l_1, l_last, ); - // stark.eval_ext(vars, &mut consumer); let permutation_data = stark.uses_permutation_args().then(|| PermutationCheckData { local_zs: permutation_zs.as_ref().unwrap().clone(), next_zs: permutation_zs_right.as_ref().unwrap().clone(), From 064b3c07a829deefadb5e71a974971da845e5ab8 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Tue, 22 Feb 2022 16:18:41 +0100 Subject: [PATCH 18/32] Forgot to set permutation cap --- starky/src/prover.rs | 2 -- starky/src/recursive_verifier.rs | 6 ++++++ starky/src/stark.rs | 1 - starky/src/verifier.rs | 1 - 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/starky/src/prover.rs b/starky/src/prover.rs index 5cfcf6ea..ac6689c2 100644 --- a/starky/src/prover.rs +++ b/starky/src/prover.rs @@ -294,8 +294,6 @@ where permutation_check_data, &mut consumer, ); - // stark.eval_packed_base(vars, &mut consumer); - // TODO: Add in constraints for permutation arguments. // TODO: Fix this once we use a genuine `PackedField`. let mut constraints_evals = consumer.accumulators(); // We divide the constraints evaluations by `Z_H(x)`. diff --git a/starky/src/recursive_verifier.rs b/starky/src/recursive_verifier.rs index cc547396..608d6dc1 100644 --- a/starky/src/recursive_verifier.rs +++ b/starky/src/recursive_verifier.rs @@ -295,5 +295,11 @@ pub fn set_stark_proof_target, W, const D: usize>( &proof.openings.to_fri_openings(), ); + if let (Some(permutation_zs_cap_target), Some(permutation_zs_cap)) = + (&proof_target.permutation_zs_cap, &proof.permutation_zs_cap) + { + witness.set_cap_target(&permutation_zs_cap_target, &permutation_zs_cap); + } + set_fri_proof_target(witness, &proof_target.opening_proof, &proof.opening_proof); } diff --git a/starky/src/stark.rs b/starky/src/stark.rs index a2a2f7fd..72614574 100644 --- a/starky/src/stark.rs +++ b/starky/src/stark.rs @@ -16,7 +16,6 @@ use crate::vars::StarkEvaluationTargets; use crate::vars::StarkEvaluationVars; /// Represents a STARK system. -// TODO: Add a `constraint_degree` fn that returns the maximum constraint degree. pub trait Stark, const D: usize>: Sync { /// The total number of columns in the trace. const COLUMNS: usize; diff --git a/starky/src/verifier.rs b/starky/src/verifier.rs index 6bb1ac4e..44d3f9c7 100644 --- a/starky/src/verifier.rs +++ b/starky/src/verifier.rs @@ -102,7 +102,6 @@ where permutation_data, &mut consumer, ); - // TODO: Add in constraints for permutation arguments. let vanishing_polys_zeta = consumer.accumulators(); // Check each polynomial identity, of the form `vanishing(x) = Z_H(x) quotient(x)`, at zeta. From 4ea418a4865c7cb70b3bfdc17eab233e292a19f1 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Tue, 22 Feb 2022 16:35:20 +0100 Subject: [PATCH 19/32] Clippy --- starky/src/recursive_verifier.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/starky/src/recursive_verifier.rs b/starky/src/recursive_verifier.rs index 608d6dc1..de01d39e 100644 --- a/starky/src/recursive_verifier.rs +++ b/starky/src/recursive_verifier.rs @@ -298,7 +298,7 @@ pub fn set_stark_proof_target, W, const D: usize>( if let (Some(permutation_zs_cap_target), Some(permutation_zs_cap)) = (&proof_target.permutation_zs_cap, &proof.permutation_zs_cap) { - witness.set_cap_target(&permutation_zs_cap_target, &permutation_zs_cap); + witness.set_cap_target(permutation_zs_cap_target, permutation_zs_cap); } set_fri_proof_target(witness, &proof_target.opening_proof, &proof.opening_proof); From 150d76444081563e454e4edd554a5128c1b13271 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Tue, 22 Feb 2022 17:00:08 +0100 Subject: [PATCH 20/32] Simplification --- starky/src/fibonacci_stark.rs | 6 ++-- starky/src/get_challenges.rs | 58 +++++++++++++------------------- starky/src/proof.rs | 4 +-- starky/src/recursive_verifier.rs | 10 ++---- starky/src/verifier.rs | 4 +-- 5 files changed, 33 insertions(+), 49 deletions(-) diff --git a/starky/src/fibonacci_stark.rs b/starky/src/fibonacci_stark.rs index 10b54d69..7961ad50 100644 --- a/starky/src/fibonacci_stark.rs +++ b/starky/src/fibonacci_stark.rs @@ -13,7 +13,7 @@ use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; /// Toy STARK system used for testing. /// Computes a Fibonacci sequence with state `[x0, x1, i, j]` using the state transition /// `x0' <- x1, x1' <- x0 + x1, i' <- i+1, j' <- j+1`. -/// Note: The `i, j` columns are used to test the permutation argument. +/// Note: The `i, j` columns are only used to test the permutation argument. #[derive(Copy, Clone)] struct FibonacciStark, const D: usize> { num_rows: usize, @@ -48,7 +48,7 @@ impl, const D: usize> FibonacciStark { Some(tmp) }) .collect::>(); - trace[self.num_rows - 1][3] = F::ZERO; + trace[self.num_rows - 1][3] = F::ZERO; // So that column 2 and 3 are permutation of one another. trace } } @@ -234,7 +234,7 @@ mod tests { let pt = add_virtual_stark_proof_with_pis(&mut builder, stark, inner_config, degree_bits); set_stark_proof_with_pis_target(&mut pw, &pt, &inner_proof); - recursively_verify_stark_proof::(&mut builder, stark, pt, inner_config)?; + recursively_verify_stark_proof::(&mut builder, stark, pt, inner_config); if print_gate_counts { builder.print_gate_counts(0); diff --git a/starky/src/get_challenges.rs b/starky/src/get_challenges.rs index 8ee71667..0f4aacee 100644 --- a/starky/src/get_challenges.rs +++ b/starky/src/get_challenges.rs @@ -1,4 +1,3 @@ -use anyhow::Result; use plonky2::field::extension_field::Extendable; use plonky2::field::polynomial::PolynomialCoeffs; use plonky2::fri::proof::{FriProof, FriProofTarget}; @@ -28,7 +27,7 @@ fn get_challenges( pow_witness: F, config: &StarkConfig, degree_bits: usize, -) -> Result> +) -> StarkProofChallenges where F: RichField + Extendable, C: GenericConfig, @@ -40,20 +39,15 @@ where challenger.observe_cap(trace_cap); - let permutation_challenge_sets = if stark.uses_permutation_args() { - get_n_permutation_challenge_sets( + let permutation_challenge_sets = permutation_zs_cap.map(|permutation_zs_cap| { + let tmp = get_n_permutation_challenge_sets( &mut challenger, num_challenges, stark.permutation_batch_size(), - ) - } else { - vec![] - }; - if stark.uses_permutation_args() { - let cap = - permutation_zs_cap.ok_or_else(|| anyhow::Error::msg("expected permutation_zs_cap")); - challenger.observe_cap(cap?); - } + ); + challenger.observe_cap(permutation_zs_cap); + tmp + }); let stark_alphas = challenger.get_n_challenges(num_challenges); @@ -62,7 +56,7 @@ where challenger.observe_openings(&openings.to_fri_openings()); - Ok(StarkProofChallenges { + StarkProofChallenges { permutation_challenge_sets, stark_alphas, stark_zeta, @@ -73,7 +67,7 @@ where degree_bits, &config.fri_config, ), - }) + } } impl StarkProofWithPublicInputs @@ -86,11 +80,10 @@ where stark: &S, config: &StarkConfig, degree_bits: usize, - ) -> anyhow::Result> { - Ok(self - .get_challenges(stark, config, degree_bits)? + ) -> Vec { + self.get_challenges(stark, config, degree_bits) .fri_challenges - .fri_query_indices) + .fri_query_indices } /// Computes all Fiat-Shamir challenges used in the STARK proof. @@ -99,7 +92,7 @@ where stark: &S, config: &StarkConfig, degree_bits: usize, - ) -> Result> { + ) -> StarkProofChallenges { let StarkProof { trace_cap, permutation_zs_cap, @@ -146,7 +139,7 @@ pub(crate) fn get_challenges_target< final_poly: &PolynomialCoeffsExtTarget, pow_witness: Target, config: &StarkConfig, -) -> Result> +) -> StarkProofChallengesTarget where C::Hasher: AlgebraicHasher, { @@ -156,21 +149,16 @@ where challenger.observe_cap(trace_cap); - let permutation_challenge_sets = if stark.uses_permutation_args() { - get_n_permutation_challenge_sets_target( + let permutation_challenge_sets = permutation_zs_cap.map(|permutation_zs_cap| { + let tmp = get_n_permutation_challenge_sets_target( builder, &mut challenger, num_challenges, stark.permutation_batch_size(), - ) - } else { - vec![] - }; - if stark.uses_permutation_args() { - let cap = - permutation_zs_cap.ok_or_else(|| anyhow::Error::msg("expected permutation_zs_cap")); - challenger.observe_cap(cap?); - } + ); + challenger.observe_cap(permutation_zs_cap); + tmp + }); let stark_alphas = challenger.get_n_challenges(builder, num_challenges); @@ -179,7 +167,7 @@ where challenger.observe_openings(&openings.to_fri_openings()); - Ok(StarkProofChallengesTarget { + StarkProofChallengesTarget { permutation_challenge_sets, stark_alphas, stark_zeta, @@ -190,7 +178,7 @@ where pow_witness, &config.fri_config, ), - }) + } } impl StarkProofWithPublicInputsTarget { @@ -203,7 +191,7 @@ impl StarkProofWithPublicInputsTarget { builder: &mut CircuitBuilder, stark: &S, config: &StarkConfig, - ) -> Result> + ) -> StarkProofChallengesTarget where C::Hasher: AlgebraicHasher, { diff --git a/starky/src/proof.rs b/starky/src/proof.rs index d1f86d7e..1975b1b9 100644 --- a/starky/src/proof.rs +++ b/starky/src/proof.rs @@ -101,7 +101,7 @@ pub struct CompressedStarkProofWithPublicInputs< pub(crate) struct StarkProofChallenges, const D: usize> { /// Randomness used in any permutation arguments. - pub permutation_challenge_sets: Vec>, + pub permutation_challenge_sets: Option>>, /// Random values used to combine STARK constraints. pub stark_alphas: Vec, @@ -113,7 +113,7 @@ pub(crate) struct StarkProofChallenges, const D: us } pub(crate) struct StarkProofChallengesTarget { - pub permutation_challenge_sets: Vec>, + pub permutation_challenge_sets: Option>>, pub stark_alphas: Vec, pub stark_zeta: ExtensionTarget, pub fri_challenges: FriChallengesTarget, diff --git a/starky/src/recursive_verifier.rs b/starky/src/recursive_verifier.rs index de01d39e..6a7363ae 100644 --- a/starky/src/recursive_verifier.rs +++ b/starky/src/recursive_verifier.rs @@ -1,6 +1,5 @@ use std::iter::once; -use anyhow::Result; use itertools::Itertools; use plonky2::field::extension_field::Extendable; use plonky2::field::field_types::Field; @@ -33,15 +32,14 @@ pub fn recursively_verify_stark_proof< stark: S, proof_with_pis: StarkProofWithPublicInputsTarget, inner_config: &StarkConfig, -) -> Result<()> -where +) where C::Hasher: AlgebraicHasher, [(); S::COLUMNS]:, [(); S::PUBLIC_INPUTS]:, { assert_eq!(proof_with_pis.public_inputs.len(), S::PUBLIC_INPUTS); let degree_bits = proof_with_pis.proof.recover_degree_bits(inner_config); - let challenges = proof_with_pis.get_challenges::(builder, &stark, inner_config)?; + let challenges = proof_with_pis.get_challenges::(builder, &stark, inner_config); recursively_verify_stark_proof_with_challenges::( builder, @@ -51,8 +49,6 @@ where inner_config, degree_bits, ); - - Ok(()) } /// Recursively verifies an inner proof. @@ -115,7 +111,7 @@ fn recursively_verify_stark_proof_with_challenges< .then(|| PermutationCheckDataTarget { local_zs: permutation_zs.as_ref().unwrap().clone(), next_zs: permutation_zs_right.as_ref().unwrap().clone(), - permutation_challenge_sets: challenges.permutation_challenge_sets, + permutation_challenge_sets: challenges.permutation_challenge_sets.unwrap(), }); eval_vanishing_poly_recursively::( builder, diff --git a/starky/src/verifier.rs b/starky/src/verifier.rs index 44d3f9c7..959cbc8e 100644 --- a/starky/src/verifier.rs +++ b/starky/src/verifier.rs @@ -34,7 +34,7 @@ where { ensure!(proof_with_pis.public_inputs.len() == S::PUBLIC_INPUTS); let degree_bits = proof_with_pis.proof.recover_degree_bits(config); - let challenges = proof_with_pis.get_challenges(&stark, config, degree_bits)?; + let challenges = proof_with_pis.get_challenges(&stark, config, degree_bits); verify_stark_proof_with_challenges(stark, proof_with_pis, challenges, degree_bits, config) } @@ -93,7 +93,7 @@ where let permutation_data = stark.uses_permutation_args().then(|| PermutationCheckData { local_zs: permutation_zs.as_ref().unwrap().clone(), next_zs: permutation_zs_right.as_ref().unwrap().clone(), - permutation_challenge_sets: challenges.permutation_challenge_sets, + permutation_challenge_sets: challenges.permutation_challenge_sets.unwrap(), }); eval_vanishing_poly::( &stark, From a31c58b69d17c8899f2e86fac4070a323493bbad Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Tue, 22 Feb 2022 17:23:55 +0100 Subject: [PATCH 21/32] Use ReducingFactor --- plonky2/src/util/reducing.rs | 15 +++++++++++- starky/src/permutation.rs | 47 ++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/plonky2/src/util/reducing.rs b/plonky2/src/util/reducing.rs index f29c6d08..626668e6 100644 --- a/plonky2/src/util/reducing.rs +++ b/plonky2/src/util/reducing.rs @@ -1,6 +1,6 @@ use std::borrow::Borrow; -use plonky2_field::extension_field::Extendable; +use plonky2_field::extension_field::{Extendable, FieldExtension}; use plonky2_field::field_types::Field; use plonky2_field::polynomial::PolynomialCoeffs; @@ -35,6 +35,11 @@ impl ReducingFactor { self.base * x } + fn mul_ext, const D: usize>(&mut self, x: FE) -> FE { + self.count += 1; + x.scalar_mul(self.base) + } + fn mul_poly(&mut self, p: &mut PolynomialCoeffs) { self.count += 1; *p *= self.base; @@ -45,6 +50,14 @@ impl ReducingFactor { .fold(F::ZERO, |acc, x| self.mul(acc) + *x.borrow()) } + pub fn reduce_ext, const D: usize>( + &mut self, + iter: impl DoubleEndedIterator>, + ) -> FE { + iter.rev() + .fold(FE::ZERO, |acc, x| self.mul_ext(acc) + *x.borrow()) + } + pub fn reduce_polys( &mut self, polys: impl DoubleEndedIterator>>, diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs index 1113094d..dad4b661 100644 --- a/starky/src/permutation.rs +++ b/starky/src/permutation.rs @@ -11,6 +11,7 @@ use plonky2::iop::ext_target::ExtensionTarget; use plonky2::iop::target::Target; use plonky2::plonk::circuit_builder::CircuitBuilder; use plonky2::plonk::config::{AlgebraicHasher, GenericConfig, Hasher}; +use plonky2::util::reducing::{ReducingFactor, ReducingFactorTarget}; use rayon::prelude::*; use crate::config::StarkConfig; @@ -283,19 +284,15 @@ pub(crate) fn eval_permutation_checks, Vec<_>) = column_pairs + .iter() + .map(|&(i, j)| (vars.local_values[i], vars.local_values[j])) + .unzip(); + ( + factor.reduce_ext(lhs.into_iter()) + FE::from_basefield(*gamma), + factor.reduce_ext(rhs.into_iter()) + FE::from_basefield(*gamma), + ) }) .unzip(); let constraint = next_zs[i] * reduced_rhs.into_iter().product() @@ -353,19 +350,17 @@ pub(crate) fn eval_permutation_checks_recursively( let zero = builder.zero_extension(); let beta_ext = builder.convert_to_ext(*beta); let gamma_ext = builder.convert_to_ext(*gamma); - let mut reduced = - column_pairs - .iter() - .rev() - .fold((zero, zero), |(lhs, rhs), &(i, j)| { - ( - builder.mul_add_extension(lhs, beta_ext, vars.local_values[i]), - builder.mul_add_extension(rhs, beta_ext, vars.local_values[j]), - ) - }); - reduced.0 = builder.add_extension(reduced.0, gamma_ext); - reduced.1 = builder.add_extension(reduced.1, gamma_ext); - reduced + let mut factor = ReducingFactorTarget::new(beta_ext); + let (lhs, rhs): (Vec<_>, Vec<_>) = column_pairs + .iter() + .map(|&(i, j)| (vars.local_values[i], vars.local_values[j])) + .unzip(); + let reduced_lhs = factor.reduce(&lhs, builder); + let reduced_rhs = factor.reduce(&rhs, builder); + ( + builder.add_extension(reduced_lhs, gamma_ext), + builder.add_extension(reduced_rhs, gamma_ext), + ) }) .unzip(); let reduced_lhs_product = builder.mul_many_extension(&reduced_lhs); From 17bbc6f3e403642296ca73be0147debb31e9e86e Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Tue, 22 Feb 2022 17:30:08 +0100 Subject: [PATCH 22/32] Minor --- starky/src/prover.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/starky/src/prover.rs b/starky/src/prover.rs index ac6689c2..fe007f05 100644 --- a/starky/src/prover.rs +++ b/starky/src/prover.rs @@ -84,7 +84,7 @@ where challenger.observe_cap(&trace_cap); // Permutation arguments. - let permutation_zs_commitment_challenges = if stark.uses_permutation_args() { + let permutation_zs_commitment_challenges = stark.uses_permutation_args().then(|| { let permutation_challenge_sets = get_n_permutation_challenge_sets( &mut challenger, config.num_challenges, @@ -101,7 +101,7 @@ where timed!( timing, "compute permutation Z commitments", - Some(( + ( PolynomialBatch::from_values( permutation_z_polys, rate_bits, @@ -111,11 +111,9 @@ where None, ), permutation_challenge_sets - )) + ) ) - } else { - None - }; + }); let permutation_zs_commitment = permutation_zs_commitment_challenges .as_ref() .map(|(comm, _)| comm); From 8c5cbbc7c66a51c805003dfcce5ed02d472cf99d Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Tue, 22 Feb 2022 17:40:48 +0100 Subject: [PATCH 23/32] Add first row Z check --- starky/src/permutation.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs index dad4b661..d2a16fc4 100644 --- a/starky/src/permutation.rs +++ b/starky/src/permutation.rs @@ -264,7 +264,12 @@ pub(crate) fn eval_permutation_checks( next_zs, permutation_challenge_sets, } = permutation_data; - // TODO: Z_1 check. + + let one = builder.one_extension(); + // Check that Z(1) = 1; + for &z in &local_zs { + let z_1 = builder.sub_extension(z, one); + consumer.constraint_first_row(builder, z_1); + } + let permutation_pairs = stark.permutation_pairs(); let permutation_batches = get_permutation_batches( From dd4cc21309c860a7be2f2e1baa830bdba7e033a6 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Wed, 23 Feb 2022 09:36:28 +0100 Subject: [PATCH 24/32] PR feedback --- starky/src/permutation.rs | 13 +++++----- starky/src/proof.rs | 2 ++ starky/src/prover.rs | 38 +++++++++++++-------------- starky/src/recursive_verifier.rs | 44 +++++++++++++++++++++++--------- starky/src/vanishing_poly.rs | 12 +++++---- starky/src/verifier.rs | 34 +++++++++++++++++++++--- 6 files changed, 95 insertions(+), 48 deletions(-) diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs index d2a16fc4..2e1d603c 100644 --- a/starky/src/permutation.rs +++ b/starky/src/permutation.rs @@ -4,6 +4,7 @@ use itertools::Itertools; use plonky2::field::batch_util::batch_multiply_inplace; use plonky2::field::extension_field::{Extendable, FieldExtension}; use plonky2::field::field_types::Field; +use plonky2::field::packed_field::PackedField; use plonky2::field::polynomial::PolynomialValues; use plonky2::hash::hash_types::RichField; use plonky2::iop::challenger::{Challenger, RecursiveChallenger}; @@ -54,7 +55,6 @@ pub(crate) struct PermutationChallengeSet { pub(crate) fn compute_permutation_z_polys( stark: &S, config: &StarkConfig, - challenger: &mut Challenger, trace_poly_values: &[PolynomialValues], permutation_challenge_sets: &[PermutationChallengeSet], ) -> Vec> @@ -239,27 +239,28 @@ pub(crate) fn get_permutation_batches<'a, T: Copy>( } // TODO: Use slices. -pub struct PermutationCheckData, const D2: usize> { +pub struct PermutationCheckVars, const D2: usize> { pub(crate) local_zs: Vec, pub(crate) next_zs: Vec, pub(crate) permutation_challenge_sets: Vec>, } -pub(crate) fn eval_permutation_checks( +pub(crate) fn eval_permutation_checks( stark: &S, config: &StarkConfig, vars: StarkEvaluationVars, - permutation_data: PermutationCheckData, + permutation_data: PermutationCheckVars, consumer: &mut ConstraintConsumer, ) where F: RichField + Extendable, FE: FieldExtension, + P: PackedField, C: GenericConfig, S: Stark, [(); S::COLUMNS]:, [(); S::PUBLIC_INPUTS]:, { - let PermutationCheckData { + let PermutationCheckVars { local_zs, next_zs, permutation_challenge_sets, @@ -350,7 +351,6 @@ pub(crate) fn eval_permutation_checks_recursively( // Each zs value corresponds to a permutation batch. for (i, instances) in permutation_batches.iter().enumerate() { - // Z(gx) * down = Z x * up let (reduced_lhs, reduced_rhs): (Vec>, Vec>) = instances .iter() @@ -359,7 +359,6 @@ pub(crate) fn eval_permutation_checks_recursively( pair: PermutationPair { column_pairs }, challenge: PermutationChallenge { beta, gamma }, } = instance; - let zero = builder.zero_extension(); let beta_ext = builder.convert_to_ext(*beta); let gamma_ext = builder.convert_to_ext(*gamma); let mut factor = ReducingFactorTarget::new(beta_ext); diff --git a/starky/src/proof.rs b/starky/src/proof.rs index 1975b1b9..dba3db3e 100644 --- a/starky/src/proof.rs +++ b/starky/src/proof.rs @@ -32,6 +32,7 @@ pub struct StarkProof, C: GenericConfig, } impl, C: GenericConfig, const D: usize> StarkProof { + /// Recover the length of the trace from a STARK proof and a STARK config. pub(crate) fn recover_degree_bits(&self, config: &StarkConfig) -> usize { let initial_merkle_proof = &self.opening_proof.query_round_proofs[0] .initial_trees_proof @@ -51,6 +52,7 @@ pub struct StarkProofTarget { } impl StarkProofTarget { + /// Recover the length of the trace from a STARK proof and a STARK config. pub(crate) fn recover_degree_bits(&self, config: &StarkConfig) -> usize { let initial_merkle_proof = &self.opening_proof.query_round_proofs[0] .initial_trees_proof diff --git a/starky/src/prover.rs b/starky/src/prover.rs index fe007f05..336b9963 100644 --- a/starky/src/prover.rs +++ b/starky/src/prover.rs @@ -18,7 +18,7 @@ use rayon::prelude::*; use crate::config::StarkConfig; use crate::constraint_consumer::ConstraintConsumer; -use crate::permutation::PermutationCheckData; +use crate::permutation::PermutationCheckVars; use crate::permutation::{ compute_permutation_z_polys, get_n_permutation_challenge_sets, PermutationChallengeSet, }; @@ -93,26 +93,23 @@ where let permutation_z_polys = compute_permutation_z_polys::( &stark, config, - &mut challenger, &trace_poly_values, &permutation_challenge_sets, ); - timed!( + let permutation_zs_commitment = timed!( timing, "compute permutation Z commitments", - ( - PolynomialBatch::from_values( - permutation_z_polys, - rate_bits, - false, - config.fri_config.cap_height, - timing, - None, - ), - permutation_challenge_sets + PolynomialBatch::from_values( + permutation_z_polys, + rate_bits, + false, + config.fri_config.cap_height, + timing, + None, ) - ) + ); + (permutation_zs_commitment, permutation_challenge_sets) }); let permutation_zs_commitment = permutation_zs_commitment_challenges .as_ref() @@ -251,6 +248,8 @@ where // Retrieve the LDE values at index `i`. let get_at_index = |comm: &'a PolynomialBatch, i: usize| -> &'a [F] { comm.get_lde_values(i * step) }; + let get_trace_at_index = |i| get_at_index(trace_commitment, i).try_into().unwrap(); + // Last element of the subgroup. let last = F::primitive_root_of_unity(degree_bits).inverse(); let size = degree << quotient_degree_bits; @@ -271,21 +270,20 @@ where lagrange_last.values[i], ); let vars = StarkEvaluationVars:: { - local_values: &get_at_index(trace_commitment, i).try_into().unwrap(), - next_values: &get_at_index(trace_commitment, (i + next_step) % size) - .try_into() - .unwrap(), + local_values: &get_trace_at_index(i), + next_values: &get_trace_at_index((i + next_step) % size), public_inputs: &public_inputs, }; let permutation_check_data = permutation_zs_commitment_challenges.as_ref().map( - |(permutation_zs_commitment, permutation_challenge_sets)| PermutationCheckData { + |(permutation_zs_commitment, permutation_challenge_sets)| PermutationCheckVars { local_zs: get_at_index(permutation_zs_commitment, i).to_vec(), next_zs: get_at_index(permutation_zs_commitment, (i + next_step) % size) .to_vec(), permutation_challenge_sets: permutation_challenge_sets.to_vec(), }, ); - eval_vanishing_poly::( + // TODO: Use packed field for F. + eval_vanishing_poly::( stark, config, vars, diff --git a/starky/src/recursive_verifier.rs b/starky/src/recursive_verifier.rs index 6a7363ae..c1abbdb0 100644 --- a/starky/src/recursive_verifier.rs +++ b/starky/src/recursive_verifier.rs @@ -1,5 +1,6 @@ use std::iter::once; +use anyhow::{ensure, Result}; use itertools::Itertools; use plonky2::field::extension_field::Extendable; use plonky2::field::field_types::Field; @@ -69,6 +70,7 @@ fn recursively_verify_stark_proof_with_challenges< [(); S::COLUMNS]:, [(); S::PUBLIC_INPUTS]:, { + check_permutation_options(&stark, &proof_with_pis, &challenges).unwrap(); let one = builder.one_extension(); let StarkProofWithPublicInputsTarget { @@ -202,18 +204,14 @@ pub fn add_virtual_stark_proof, S: Stark, con let fri_params = config.fri_params(degree_bits); let cap_height = fri_params.config.cap_height; - let num_leaves_per_oracle = if stark.uses_permutation_args() { - vec![ - S::COLUMNS, - stark.num_permutation_batches(config), - stark.quotient_degree_factor() * config.num_challenges, - ] - } else { - vec![ - S::COLUMNS, - stark.quotient_degree_factor() * config.num_challenges, - ] - }; + let num_leaves_per_oracle = once(S::COLUMNS) + .chain( + stark + .uses_permutation_args() + .then(|| stark.num_permutation_batches(config)), + ) + .chain(once(stark.quotient_degree_factor() * config.num_challenges)) + .collect_vec(); let permutation_zs_cap = stark .uses_permutation_args() @@ -299,3 +297,25 @@ pub fn set_stark_proof_target, W, const D: usize>( set_fri_proof_target(witness, &proof_target.opening_proof, &proof.opening_proof); } + +/// Utility function to check that all permutation data wrapped in `Option`s are `Some` iff +/// the Stark uses a permutation argument. +fn check_permutation_options, S: Stark, const D: usize>( + stark: &S, + proof_with_pis: &StarkProofWithPublicInputsTarget, + challenges: &StarkProofChallengesTarget, +) -> Result<()> { + let options_is_some = [ + proof_with_pis.proof.permutation_zs_cap.is_some(), + proof_with_pis.proof.openings.permutation_zs.is_some(), + proof_with_pis.proof.openings.permutation_zs_right.is_some(), + challenges.permutation_challenge_sets.is_some(), + ]; + ensure!( + options_is_some + .into_iter() + .all(|b| b == stark.uses_permutation_args()), + "Permutation data doesn't match with Stark configuration." + ); + Ok(()) +} diff --git a/starky/src/vanishing_poly.rs b/starky/src/vanishing_poly.rs index 55ea7a5a..c8c75730 100644 --- a/starky/src/vanishing_poly.rs +++ b/starky/src/vanishing_poly.rs @@ -1,4 +1,5 @@ use plonky2::field::extension_field::{Extendable, FieldExtension}; +use plonky2::field::packed_field::PackedField; use plonky2::hash::hash_types::RichField; use plonky2::plonk::circuit_builder::CircuitBuilder; use plonky2::plonk::config::GenericConfig; @@ -6,21 +7,22 @@ use plonky2::plonk::config::GenericConfig; use crate::config::StarkConfig; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use crate::permutation::{ - eval_permutation_checks, eval_permutation_checks_recursively, PermutationCheckData, - PermutationCheckDataTarget, + eval_permutation_checks, eval_permutation_checks_recursively, PermutationCheckDataTarget, + PermutationCheckVars, }; use crate::stark::Stark; use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; -pub(crate) fn eval_vanishing_poly( +pub(crate) fn eval_vanishing_poly( stark: &S, config: &StarkConfig, vars: StarkEvaluationVars, - permutation_data: Option>, + permutation_data: Option>, consumer: &mut ConstraintConsumer, ) where F: RichField + Extendable, FE: FieldExtension, + P: PackedField, C: GenericConfig, S: Stark, [(); S::COLUMNS]:, @@ -28,7 +30,7 @@ pub(crate) fn eval_vanishing_poly( { stark.eval_packed_generic(vars, consumer); if let Some(permutation_data) = permutation_data { - eval_permutation_checks::( + eval_permutation_checks::( stark, config, vars, diff --git a/starky/src/verifier.rs b/starky/src/verifier.rs index 959cbc8e..a9bf897c 100644 --- a/starky/src/verifier.rs +++ b/starky/src/verifier.rs @@ -11,7 +11,7 @@ use plonky2::plonk::plonk_common::reduce_with_powers; use crate::config::StarkConfig; use crate::constraint_consumer::ConstraintConsumer; -use crate::permutation::PermutationCheckData; +use crate::permutation::PermutationCheckVars; use crate::proof::{StarkOpeningSet, StarkProofChallenges, StarkProofWithPublicInputs}; use crate::stark::Stark; use crate::vanishing_poly::eval_vanishing_poly; @@ -55,6 +55,7 @@ where [(); S::PUBLIC_INPUTS]:, [(); C::Hasher::HASH_SIZE]:, { + check_permutation_options(&stark, &proof_with_pis, &challenges)?; let StarkProofWithPublicInputs { proof, public_inputs, @@ -90,12 +91,12 @@ where l_1, l_last, ); - let permutation_data = stark.uses_permutation_args().then(|| PermutationCheckData { + let permutation_data = stark.uses_permutation_args().then(|| PermutationCheckVars { local_zs: permutation_zs.as_ref().unwrap().clone(), next_zs: permutation_zs_right.as_ref().unwrap().clone(), permutation_challenge_sets: challenges.permutation_challenge_sets.unwrap(), }); - eval_vanishing_poly::( + eval_vanishing_poly::( &stark, config, vars, @@ -153,7 +154,32 @@ fn eval_l_1_and_l_last(log_n: usize, x: F) -> (F, F) { (z_x * invs[0], z_x * invs[1]) } -/// Recover the length of the trace from a STARK proof and a STARK config. +/// Utility function to check that all permutation data wrapped in `Option`s are `Some` iff +/// the Stark uses a permutation argument. +fn check_permutation_options< + F: RichField + Extendable, + C: GenericConfig, + S: Stark, + const D: usize, +>( + stark: &S, + proof_with_pis: &StarkProofWithPublicInputs, + challenges: &StarkProofChallenges, +) -> Result<()> { + let options_is_some = [ + proof_with_pis.proof.permutation_zs_cap.is_some(), + proof_with_pis.proof.openings.permutation_zs.is_some(), + proof_with_pis.proof.openings.permutation_zs_right.is_some(), + challenges.permutation_challenge_sets.is_some(), + ]; + ensure!( + options_is_some + .into_iter() + .all(|b| b == stark.uses_permutation_args()), + "Permutation data doesn't match with Stark configuration." + ); + Ok(()) +} #[cfg(test)] mod tests { From 383b8b68b3cc5410175ea148e86574a4cae67032 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 24 Feb 2022 10:01:38 -0800 Subject: [PATCH 25/32] secret_to_public fn --- plonky2/src/curve/ecdsa.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/plonky2/src/curve/ecdsa.rs b/plonky2/src/curve/ecdsa.rs index cabe038a..52262830 100644 --- a/plonky2/src/curve/ecdsa.rs +++ b/plonky2/src/curve/ecdsa.rs @@ -16,6 +16,10 @@ pub struct ECDSASecretKey(pub C::ScalarField); #[derive(Copy, Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)] pub struct ECDSAPublicKey(pub AffinePoint); +pub fn secret_to_public(sk: ECDSASecretKey) -> ECDSAPublicKey { + ECDSAPublicKey((CurveScalar(sk.0) * C::GENERATOR_PROJECTIVE).to_affine()) +} + pub fn sign_message(msg: C::ScalarField, sk: ECDSASecretKey) -> ECDSASignature { let (k, rr) = { let mut k = C::ScalarField::rand(); @@ -57,8 +61,7 @@ pub fn verify_message( #[cfg(test)] mod tests { - use crate::curve::curve_types::{Curve, CurveScalar}; - use crate::curve::ecdsa::{sign_message, verify_message, ECDSAPublicKey, ECDSASecretKey}; + use crate::curve::ecdsa::{secret_to_public, sign_message, verify_message, ECDSASecretKey}; use crate::curve::secp256k1::Secp256K1; use crate::field::field_types::Field; use crate::field::secp256k1_scalar::Secp256K1Scalar; @@ -68,8 +71,8 @@ mod tests { type C = Secp256K1; let msg = Secp256K1Scalar::rand(); - let sk = ECDSASecretKey(Secp256K1Scalar::rand()); - let pk = ECDSAPublicKey((CurveScalar(sk.0) * C::GENERATOR_PROJECTIVE).to_affine()); + let sk = ECDSASecretKey::(Secp256K1Scalar::rand()); + let pk = secret_to_public(sk); let sig = sign_message(msg, sk); let result = verify_message(msg, sig, pk); From bd7f43adc2d314d2f3fd18415af089529d1e336b Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Thu, 24 Feb 2022 10:19:16 -0800 Subject: [PATCH 26/32] visibility --- plonky2/src/gadgets/ecdsa.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plonky2/src/gadgets/ecdsa.rs b/plonky2/src/gadgets/ecdsa.rs index 0a95e189..64f37e1f 100644 --- a/plonky2/src/gadgets/ecdsa.rs +++ b/plonky2/src/gadgets/ecdsa.rs @@ -8,10 +8,10 @@ use crate::hash::hash_types::RichField; use crate::plonk::circuit_builder::CircuitBuilder; #[derive(Clone, Debug)] -pub struct ECDSASecretKeyTarget(NonNativeTarget); +pub struct ECDSASecretKeyTarget(pub NonNativeTarget); #[derive(Clone, Debug)] -pub struct ECDSAPublicKeyTarget(AffinePointTarget); +pub struct ECDSAPublicKeyTarget(pub AffinePointTarget); #[derive(Clone, Debug)] pub struct ECDSASignatureTarget { From 2644f5f74a241244de19e306ae75b7d07a029e2a Mon Sep 17 00:00:00 2001 From: Hamish Ivey-Law <426294+unzvfu@users.noreply.github.com> Date: Thu, 3 Mar 2022 14:18:19 +1100 Subject: [PATCH 27/32] System Zero subtraction operation (#508) * First draft of subtraction operation. * Daniel comments. * Fix constraint calculation. * cargo fmt * Align native and recursive eval functions; fix typo. --- system_zero/src/alu/subtraction.rs | 53 ++++++++++++++++++++++++++++-- system_zero/src/registers/alu.rs | 12 +++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/system_zero/src/alu/subtraction.rs b/system_zero/src/alu/subtraction.rs index 8f8bb810..8b795cbb 100644 --- a/system_zero/src/alu/subtraction.rs +++ b/system_zero/src/alu/subtraction.rs @@ -10,7 +10,18 @@ use crate::registers::alu::*; use crate::registers::NUM_COLUMNS; pub(crate) fn generate_subtraction(values: &mut [F; NUM_COLUMNS]) { - // TODO + let in_1 = values[COL_SUB_INPUT_0].to_canonical_u64() as u32; + let in_2 = values[COL_SUB_INPUT_1].to_canonical_u64() as u32; + + // in_1 - in_2 == diff - br*2^32 + let (diff, br) = in_1.overflowing_sub(in_2); + + let diff_1 = F::from_canonical_u16(diff as u16); + let diff_2 = F::from_canonical_u16((diff >> 16) as u16); + + values[COL_SUB_OUTPUT_0] = F::from_canonical_u16(diff as u16); + values[COL_SUB_OUTPUT_1] = F::from_canonical_u16((diff >> 16) as u16); + values[COL_SUB_OUTPUT_BORROW] = F::from_canonical_u16(br as u16); } pub(crate) fn eval_subtraction>( @@ -18,7 +29,23 @@ pub(crate) fn eval_subtraction>( yield_constr: &mut ConstraintConsumer

, ) { let is_sub = local_values[IS_SUB]; - // TODO + let in_1 = local_values[COL_SUB_INPUT_0]; + let in_2 = local_values[COL_SUB_INPUT_1]; + let out_1 = local_values[COL_SUB_OUTPUT_0]; + let out_2 = local_values[COL_SUB_OUTPUT_1]; + let out_br = local_values[COL_SUB_OUTPUT_BORROW]; + + let base = F::from_canonical_u64(1 << 16); + let base_sqr = F::from_canonical_u64(1 << 32); + + let out_br = out_br * base_sqr; + let lhs = (out_br + in_1) - in_2; + let rhs = out_1 + out_2 * base; + + yield_constr.constraint(is_sub * (lhs - rhs)); + + // We don't need to check that out_br is in {0, 1} because it's + // checked by boolean::col_bit(0) in the ALU. } pub(crate) fn eval_subtraction_recursively, const D: usize>( @@ -27,5 +54,25 @@ pub(crate) fn eval_subtraction_recursively, const D yield_constr: &mut RecursiveConstraintConsumer, ) { let is_sub = local_values[IS_SUB]; - // TODO + let in_1 = local_values[COL_SUB_INPUT_0]; + let in_2 = local_values[COL_SUB_INPUT_1]; + let out_1 = local_values[COL_SUB_OUTPUT_0]; + let out_2 = local_values[COL_SUB_OUTPUT_1]; + let out_br = local_values[COL_SUB_OUTPUT_BORROW]; + + let base = builder.constant_extension(F::Extension::from_canonical_u64(1 << 16)); + let base_sqr = builder.constant_extension(F::Extension::from_canonical_u64(1 << 32)); + + // lhs = (out_br + in_1) - in_2 + let lhs = builder.add_extension(out_br, in_1); + let lhs = builder.sub_extension(lhs, in_2); + + // rhs = out_1 + base * out_2 + let rhs = builder.mul_add_extension(out_2, base, out_1); + + // filtered_diff = is_sub * (lhs - rhs) + let diff = builder.sub_extension(lhs, rhs); + let filtered_diff = builder.mul_extension(is_sub, diff); + + yield_constr.constraint(builder, filtered_diff); } diff --git a/system_zero/src/registers/alu.rs b/system_zero/src/registers/alu.rs index e678d8e4..6a9412a1 100644 --- a/system_zero/src/registers/alu.rs +++ b/system_zero/src/registers/alu.rs @@ -34,6 +34,18 @@ pub(crate) const COL_ADD_OUTPUT_1: usize = super::range_check_16::col_rc_16_inpu /// The third 16-bit chunk of the output, based on little-endian ordering. pub(crate) const COL_ADD_OUTPUT_2: usize = super::range_check_16::col_rc_16_input(2); +/// Inputs for subtraction; the second value is subtracted from the +/// first; inputs treated as an unsigned u32. +pub(crate) const COL_SUB_INPUT_0: usize = shared_col(0); +pub(crate) const COL_SUB_INPUT_1: usize = shared_col(1); + +/// The first 16-bit chunk of the output, based on little-endian ordering. +pub(crate) const COL_SUB_OUTPUT_0: usize = super::range_check_16::col_rc_16_input(0); +/// The second 16-bit chunk of the output, based on little-endian ordering. +pub(crate) const COL_SUB_OUTPUT_1: usize = super::range_check_16::col_rc_16_input(1); +/// The borrow output +pub(crate) const COL_SUB_OUTPUT_BORROW: usize = super::boolean::col_bit(0); + /// The first value to be multiplied; treated as an unsigned u32. pub(crate) const COL_MUL_ADD_FACTOR_0: usize = shared_col(0); /// The second value to be multiplied; treated as an unsigned u32. From 7329dade9490a247c532cce7dee6e91a03100070 Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Wed, 2 Mar 2022 22:49:57 -0800 Subject: [PATCH 28/32] IS_MUL -> IS_MUL_ADD (#510) --- system_zero/src/alu/mod.rs | 6 +++--- system_zero/src/alu/mul_add.rs | 4 ++-- system_zero/src/registers/alu.rs | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/system_zero/src/alu/mod.rs b/system_zero/src/alu/mod.rs index 730ca302..b1b05dc2 100644 --- a/system_zero/src/alu/mod.rs +++ b/system_zero/src/alu/mod.rs @@ -28,7 +28,7 @@ pub(crate) fn generate_alu(values: &mut [F; NUM_COLUMNS]) { generate_addition(values); } else if values[IS_SUB].is_one() { generate_subtraction(values); - } else if values[IS_MUL].is_one() { + } else if values[IS_MUL_ADD].is_one() { generate_mul_add(values); } else if values[IS_DIV].is_one() { generate_division(values); @@ -42,7 +42,7 @@ pub(crate) fn eval_alu>( let local_values = &vars.local_values; // Check that the operation flag values are binary. - for col in [IS_ADD, IS_SUB, IS_MUL, IS_DIV] { + for col in [IS_ADD, IS_SUB, IS_MUL_ADD, IS_DIV] { let val = local_values[col]; yield_constr.constraint(val * val - val); } @@ -61,7 +61,7 @@ pub(crate) fn eval_alu_recursively, const D: usize> let local_values = &vars.local_values; // Check that the operation flag values are binary. - for col in [IS_ADD, IS_SUB, IS_MUL, IS_DIV] { + for col in [IS_ADD, IS_SUB, IS_MUL_ADD, IS_DIV] { let val = local_values[col]; let constraint = builder.mul_sub_extension(val, val, val); yield_constr.constraint(builder, constraint); diff --git a/system_zero/src/alu/mul_add.rs b/system_zero/src/alu/mul_add.rs index 53ba34a2..b84cafbf 100644 --- a/system_zero/src/alu/mul_add.rs +++ b/system_zero/src/alu/mul_add.rs @@ -36,7 +36,7 @@ pub(crate) fn eval_mul_add>( local_values: &[P; NUM_COLUMNS], yield_constr: &mut ConstraintConsumer

, ) { - let is_mul = local_values[IS_MUL]; + let is_mul = local_values[IS_MUL_ADD]; let factor_0 = local_values[COL_MUL_ADD_FACTOR_0]; let factor_1 = local_values[COL_MUL_ADD_FACTOR_1]; let addend = local_values[COL_MUL_ADD_ADDEND]; @@ -63,7 +63,7 @@ pub(crate) fn eval_mul_add_recursively, const D: us local_values: &[ExtensionTarget; NUM_COLUMNS], yield_constr: &mut RecursiveConstraintConsumer, ) { - let is_mul = local_values[IS_MUL]; + let is_mul = local_values[IS_MUL_ADD]; let factor_0 = local_values[COL_MUL_ADD_FACTOR_0]; let factor_1 = local_values[COL_MUL_ADD_FACTOR_1]; let addend = local_values[COL_MUL_ADD_ADDEND]; diff --git a/system_zero/src/registers/alu.rs b/system_zero/src/registers/alu.rs index 6a9412a1..585ecab1 100644 --- a/system_zero/src/registers/alu.rs +++ b/system_zero/src/registers/alu.rs @@ -2,8 +2,8 @@ pub(crate) const IS_ADD: usize = super::START_ALU; pub(crate) const IS_SUB: usize = IS_ADD + 1; -pub(crate) const IS_MUL: usize = IS_SUB + 1; -pub(crate) const IS_DIV: usize = IS_MUL + 1; +pub(crate) const IS_MUL_ADD: usize = IS_SUB + 1; +pub(crate) const IS_DIV: usize = IS_MUL_ADD + 1; const START_SHARED_COLS: usize = IS_DIV + 1; From 310493c293436e73096a75d0e5755816fbfe3d58 Mon Sep 17 00:00:00 2001 From: Hamish Ivey-Law <426294+unzvfu@users.noreply.github.com> Date: Fri, 4 Mar 2022 09:34:31 +1100 Subject: [PATCH 29/32] Faster extension field multiplication (#500) * Initial implementation of quintic extensions. * Update to/from_biguint() methods. * Draft of fast multiplication on quintic extensions over 64-bit base. * cargo fmt * Typo. * Document functions (a bit). * Refactor reduction step. * Change multiplication call so that LLVM generates better assembly. * Use one main accumulator instead of two minor ones; faster reduce. * Use one main accumulator in square too; clean up redundant code. * Call faster routines from Mul and Square impls. * Fix reduction function. * Fix square calculation. * Slightly faster reduction. * Clean up names and types. * cargo fmt * Move extension field mul/sqr specialisations to their own file. * Rename functions to have unique prefix. * Add faster quadratic multiplication/squaring. * Faster quartic multiplication and squaring. * cargo fmt * clippy * Alternative reduce160 function. * Typo. * Remove alternative reduction function. * Remove delayed reduction implementation of squaring. * Enforce assumptions about extension generators. * Make the accumulation variable a u32 instead of u64. * Add test to trigger carry branch in reduce160. * cargo fmt * Some documentation. * Clippy; improved comments. * cargo fmt * Remove redundant Square specialisations. * Fix reduce*() visibility. * Faster reduce160 from Jakub. * Change mul-by-const functions to operate on 160 bits instead of 128. * Move code for extensions of GoldilocksField to its own file. --- field/Cargo.toml | 1 + field/src/extension_field/quadratic.rs | 2 +- field/src/extension_field/quartic.rs | 2 +- field/src/extension_field/quintic.rs | 2 +- field/src/goldilocks_extensions.rs | 495 +++++++++++++++++++++++++ field/src/goldilocks_field.rs | 93 ++--- field/src/lib.rs | 1 + plonky2/benches/field_arithmetic.rs | 2 + 8 files changed, 530 insertions(+), 68 deletions(-) create mode 100644 field/src/goldilocks_extensions.rs diff --git a/field/Cargo.toml b/field/Cargo.toml index 6abffc5d..748b65ac 100644 --- a/field/Cargo.toml +++ b/field/Cargo.toml @@ -12,3 +12,4 @@ num = { version = "0.4", features = [ "rand" ] } rand = "0.8.4" serde = { version = "1.0", features = ["derive"] } unroll = "0.1.5" +static_assertions = "1.1.0" diff --git a/field/src/extension_field/quadratic.rs b/field/src/extension_field/quadratic.rs index 488304d2..9cdc01c3 100644 --- a/field/src/extension_field/quadratic.rs +++ b/field/src/extension_field/quadratic.rs @@ -170,7 +170,7 @@ impl> Mul for QuadraticExtension { type Output = Self; #[inline] - fn mul(self, rhs: Self) -> Self { + default fn mul(self, rhs: Self) -> Self { let Self([a0, a1]) = self; let Self([b0, b1]) = rhs; diff --git a/field/src/extension_field/quartic.rs b/field/src/extension_field/quartic.rs index 7b4a6950..09e35a4f 100644 --- a/field/src/extension_field/quartic.rs +++ b/field/src/extension_field/quartic.rs @@ -201,7 +201,7 @@ impl> Mul for QuarticExtension { type Output = Self; #[inline] - fn mul(self, rhs: Self) -> Self { + default fn mul(self, rhs: Self) -> Self { let Self([a0, a1, a2, a3]) = self; let Self([b0, b1, b2, b3]) = rhs; diff --git a/field/src/extension_field/quintic.rs b/field/src/extension_field/quintic.rs index d2c29ffe..1600107d 100644 --- a/field/src/extension_field/quintic.rs +++ b/field/src/extension_field/quintic.rs @@ -201,7 +201,7 @@ impl> Mul for QuinticExtension { type Output = Self; #[inline] - fn mul(self, rhs: Self) -> Self { + default fn mul(self, rhs: Self) -> Self { let Self([a0, a1, a2, a3, a4]) = self; let Self([b0, b1, b2, b3, b4]) = rhs; let w = >::W; diff --git a/field/src/goldilocks_extensions.rs b/field/src/goldilocks_extensions.rs new file mode 100644 index 00000000..95265fe3 --- /dev/null +++ b/field/src/goldilocks_extensions.rs @@ -0,0 +1,495 @@ +use std::ops::Mul; + +use static_assertions::const_assert; + +use crate::extension_field::quadratic::QuadraticExtension; +use crate::extension_field::quartic::QuarticExtension; +use crate::extension_field::quintic::QuinticExtension; +use crate::extension_field::{Extendable, Frobenius}; +use crate::field_types::Field; +use crate::goldilocks_field::{reduce160, GoldilocksField}; + +impl Frobenius<1> for GoldilocksField {} + +impl Extendable<2> for GoldilocksField { + type Extension = QuadraticExtension; + + // Verifiable in Sage with + // `R. = GF(p)[]; assert (x^2 - 7).is_irreducible()`. + const W: Self = Self(7); + + // DTH_ROOT = W^((ORDER - 1)/2) + const DTH_ROOT: Self = Self(18446744069414584320); + + const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 2] = + [Self(18081566051660590251), Self(16121475356294670766)]; + + const EXT_POWER_OF_TWO_GENERATOR: [Self; 2] = [Self(0), Self(15659105665374529263)]; +} + +impl Mul for QuadraticExtension { + #[inline] + fn mul(self, rhs: Self) -> Self { + let Self([a0, a1]) = self; + let Self([b0, b1]) = rhs; + let c = ext2_mul([a0.0, a1.0], [b0.0, b1.0]); + Self(c) + } +} + +impl Extendable<4> for GoldilocksField { + type Extension = QuarticExtension; + + const W: Self = Self(7); + + // DTH_ROOT = W^((ORDER - 1)/4) + const DTH_ROOT: Self = Self(281474976710656); + + const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 4] = [ + Self(5024755240244648895), + Self(13227474371289740625), + Self(3912887029498544536), + Self(3900057112666848848), + ]; + + const EXT_POWER_OF_TWO_GENERATOR: [Self; 4] = + [Self(0), Self(0), Self(0), Self(12587610116473453104)]; +} + +impl Mul for QuarticExtension { + #[inline] + fn mul(self, rhs: Self) -> Self { + let Self([a0, a1, a2, a3]) = self; + let Self([b0, b1, b2, b3]) = rhs; + let c = ext4_mul([a0.0, a1.0, a2.0, a3.0], [b0.0, b1.0, b2.0, b3.0]); + Self(c) + } +} + +impl Extendable<5> for GoldilocksField { + type Extension = QuinticExtension; + + const W: Self = Self(3); + + // DTH_ROOT = W^((ORDER - 1)/5) + const DTH_ROOT: Self = Self(1041288259238279555); + + const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 5] = [ + Self(2899034827742553394), + Self(13012057356839176729), + Self(14593811582388663055), + Self(7722900811313895436), + Self(4557222484695340057), + ]; + + const EXT_POWER_OF_TWO_GENERATOR: [Self; 5] = [ + Self::POWER_OF_TWO_GENERATOR, + Self(0), + Self(0), + Self(0), + Self(0), + ]; +} + +impl Mul for QuinticExtension { + #[inline] + fn mul(self, rhs: Self) -> Self { + let Self([a0, a1, a2, a3, a4]) = self; + let Self([b0, b1, b2, b3, b4]) = rhs; + let c = ext5_mul( + [a0.0, a1.0, a2.0, a3.0, a4.0], + [b0.0, b1.0, b2.0, b3.0, b4.0], + ); + Self(c) + } +} + +/* + * The functions extD_add_prods[0-4] are helper functions for + * computing products for extensions of degree D over the Goldilocks + * field. They are faster than the generic method because all + * reductions are delayed until the end which means only one per + * result coefficient is necessary. + */ + +/// Return a, b such that a + b*2^128 = 3*x with a < 2^128 and b < 2^32. +#[inline(always)] +fn u160_times_3(x: u128, y: u32) -> (u128, u32) { + let (s, cy) = x.overflowing_add(x << 1); + (s, 3 * y + (x >> 127) as u32 + cy as u32) +} + +/// Return a, b such that a + b*2^128 = 7*x with a < 2^128 and b < 2^32. +#[inline(always)] +fn u160_times_7(x: u128, y: u32) -> (u128, u32) { + let (d, br) = (x << 3).overflowing_sub(x); + // NB: subtracting the borrow can't underflow + (d, 7 * y + (x >> (128 - 3)) as u32 - br as u32) +} + +/* + * Quadratic multiplication and squaring + */ + +#[inline(always)] +fn ext2_add_prods0(a: &[u64; 2], b: &[u64; 2]) -> GoldilocksField { + // Computes a0 * b0 + W * a1 * b1; + let [a0, a1] = *a; + let [b0, b1] = *b; + + let cy; + + // W * a1 * b1 + let (mut cumul_lo, mut cumul_hi) = u160_times_7((a1 as u128) * (b1 as u128), 0u32); + + // a0 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b0 as u128)); + cumul_hi += cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +#[inline(always)] +fn ext2_add_prods1(a: &[u64; 2], b: &[u64; 2]) -> GoldilocksField { + // Computes a0 * b1 + a1 * b0; + let [a0, a1] = *a; + let [b0, b1] = *b; + + let cy; + + // a0 * b1 + let mut cumul_lo = (a0 as u128) * (b1 as u128); + + // a1 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b0 as u128)); + let cumul_hi = cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +/// Multiply a and b considered as elements of GF(p^2). +#[inline(always)] +pub(crate) fn ext2_mul(a: [u64; 2], b: [u64; 2]) -> [GoldilocksField; 2] { + // The code in ext2_add_prods[01] assumes the quadratic extension + // generator is 7. + const_assert!(>::W.0 == 7u64); + + let c0 = ext2_add_prods0(&a, &b); + let c1 = ext2_add_prods1(&a, &b); + [c0, c1] +} + +/* + * Quartic multiplication and squaring + */ + +#[inline(always)] +fn ext4_add_prods0(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField { + // Computes c0 = a0 * b0 + W * (a1 * b3 + a2 * b2 + a3 * b1) + + let [a0, a1, a2, a3] = *a; + let [b0, b1, b2, b3] = *b; + + let mut cy; + + // a1 * b3 + let mut cumul_lo = (a1 as u128) * (b3 as u128); + + // a2 * b2 + (cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b2 as u128)); + let mut cumul_hi = cy as u32; + + // a3 * b1 + (cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b1 as u128)); + cumul_hi += cy as u32; + + // * W + (cumul_lo, cumul_hi) = u160_times_7(cumul_lo, cumul_hi); + + // a0 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b0 as u128)); + cumul_hi += cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +#[inline(always)] +fn ext4_add_prods1(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField { + // Computes c1 = a0 * b1 + a1 * b0 + W * (a2 * b3 + a3 * b2); + + let [a0, a1, a2, a3] = *a; + let [b0, b1, b2, b3] = *b; + + let mut cy; + + // a2 * b3 + let mut cumul_lo = (a2 as u128) * (b3 as u128); + + // a3 * b2 + (cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b2 as u128)); + let mut cumul_hi = cy as u32; + + // * W + (cumul_lo, cumul_hi) = u160_times_7(cumul_lo, cumul_hi); + + // a0 * b1 + (cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b1 as u128)); + cumul_hi += cy as u32; + + // a1 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b0 as u128)); + cumul_hi += cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +#[inline(always)] +fn ext4_add_prods2(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField { + // Computes c2 = a0 * b2 + a1 * b1 + a2 * b0 + W * a3 * b3; + + let [a0, a1, a2, a3] = *a; + let [b0, b1, b2, b3] = *b; + + let mut cy; + + // W * a3 * b3 + let (mut cumul_lo, mut cumul_hi) = u160_times_7((a3 as u128) * (b3 as u128), 0u32); + + // a0 * b2 + (cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b2 as u128)); + cumul_hi += cy as u32; + + // a1 * b1 + (cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b1 as u128)); + cumul_hi += cy as u32; + + // a2 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b0 as u128)); + cumul_hi += cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +#[inline(always)] +fn ext4_add_prods3(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField { + // Computes c3 = a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0; + + let [a0, a1, a2, a3] = *a; + let [b0, b1, b2, b3] = *b; + + let mut cy; + + // a0 * b3 + let mut cumul_lo = (a0 as u128) * (b3 as u128); + + // a1 * b2 + (cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b2 as u128)); + let mut cumul_hi = cy as u32; + + // a2 * b1 + (cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b1 as u128)); + cumul_hi += cy as u32; + + // a3 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b0 as u128)); + cumul_hi += cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +/// Multiply a and b considered as elements of GF(p^4). +#[inline(always)] +pub(crate) fn ext4_mul(a: [u64; 4], b: [u64; 4]) -> [GoldilocksField; 4] { + // The code in ext4_add_prods[0-3] assumes the quartic extension + // generator is 7. + const_assert!(>::W.0 == 7u64); + + let c0 = ext4_add_prods0(&a, &b); + let c1 = ext4_add_prods1(&a, &b); + let c2 = ext4_add_prods2(&a, &b); + let c3 = ext4_add_prods3(&a, &b); + [c0, c1, c2, c3] +} + +/* + * Quintic multiplication and squaring + */ + +#[inline(always)] +fn ext5_add_prods0(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField { + // Computes c0 = a0 * b0 + W * (a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1) + + let [a0, a1, a2, a3, a4] = *a; + let [b0, b1, b2, b3, b4] = *b; + + let mut cy; + + // a1 * b4 + let mut cumul_lo = (a1 as u128) * (b4 as u128); + + // a2 * b3 + (cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b3 as u128)); + let mut cumul_hi = cy as u32; + + // a3 * b2 + (cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b2 as u128)); + cumul_hi += cy as u32; + + // a4 * b1 + (cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b1 as u128)); + cumul_hi += cy as u32; + + // * W + (cumul_lo, cumul_hi) = u160_times_3(cumul_lo, cumul_hi); + + // a0 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b0 as u128)); + cumul_hi += cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +#[inline(always)] +fn ext5_add_prods1(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField { + // Computes c1 = a0 * b1 + a1 * b0 + W * (a2 * b4 + a3 * b3 + a4 * b2); + + let [a0, a1, a2, a3, a4] = *a; + let [b0, b1, b2, b3, b4] = *b; + + let mut cy; + + // a2 * b4 + let mut cumul_lo = (a2 as u128) * (b4 as u128); + + // a3 * b3 + (cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b3 as u128)); + let mut cumul_hi = cy as u32; + + // a4 * b2 + (cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b2 as u128)); + cumul_hi += cy as u32; + + // * W + (cumul_lo, cumul_hi) = u160_times_3(cumul_lo, cumul_hi); + + // a0 * b1 + (cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b1 as u128)); + cumul_hi += cy as u32; + + // a1 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b0 as u128)); + cumul_hi += cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +#[inline(always)] +fn ext5_add_prods2(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField { + // Computes c2 = a0 * b2 + a1 * b1 + a2 * b0 + W * (a3 * b4 + a4 * b3); + + let [a0, a1, a2, a3, a4] = *a; + let [b0, b1, b2, b3, b4] = *b; + + let mut cy; + + // a3 * b4 + let mut cumul_lo = (a3 as u128) * (b4 as u128); + + // a4 * b3 + (cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b3 as u128)); + let mut cumul_hi = cy as u32; + + // * W + (cumul_lo, cumul_hi) = u160_times_3(cumul_lo, cumul_hi); + + // a0 * b2 + (cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b2 as u128)); + cumul_hi += cy as u32; + + // a1 * b1 + (cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b1 as u128)); + cumul_hi += cy as u32; + + // a2 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b0 as u128)); + cumul_hi += cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +#[inline(always)] +fn ext5_add_prods3(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField { + // Computes c3 = a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0 + W * a4 * b4; + + let [a0, a1, a2, a3, a4] = *a; + let [b0, b1, b2, b3, b4] = *b; + + let mut cy; + + // W * a4 * b4 + let (mut cumul_lo, mut cumul_hi) = u160_times_3((a4 as u128) * (b4 as u128), 0u32); + + // a0 * b3 + (cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b3 as u128)); + cumul_hi += cy as u32; + + // a1 * b2 + (cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b2 as u128)); + cumul_hi += cy as u32; + + // a2 * b1 + (cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b1 as u128)); + cumul_hi += cy as u32; + + // a3 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b0 as u128)); + cumul_hi += cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +#[inline(always)] +fn ext5_add_prods4(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField { + // Computes c4 = a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0; + + let [a0, a1, a2, a3, a4] = *a; + let [b0, b1, b2, b3, b4] = *b; + + let mut cy; + + // a0 * b4 + let mut cumul_lo = (a0 as u128) * (b4 as u128); + + // a1 * b3 + (cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b3 as u128)); + let mut cumul_hi = cy as u32; + + // a2 * b2 + (cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b2 as u128)); + cumul_hi += cy as u32; + + // a3 * b1 + (cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b1 as u128)); + cumul_hi += cy as u32; + + // a4 * b0 + (cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b0 as u128)); + cumul_hi += cy as u32; + + unsafe { reduce160(cumul_lo, cumul_hi) } +} + +/// Multiply a and b considered as elements of GF(p^5). +#[inline(always)] +pub(crate) fn ext5_mul(a: [u64; 5], b: [u64; 5]) -> [GoldilocksField; 5] { + // The code in ext5_add_prods[0-4] assumes the quintic extension + // generator is 3. + const_assert!(>::W.0 == 3u64); + + let c0 = ext5_add_prods0(&a, &b); + let c1 = ext5_add_prods1(&a, &b); + let c2 = ext5_add_prods2(&a, &b); + let c3 = ext5_add_prods3(&a, &b); + let c4 = ext5_add_prods4(&a, &b); + [c0, c1, c2, c3, c4] +} diff --git a/field/src/goldilocks_field.rs b/field/src/goldilocks_field.rs index af958629..4ed32a0d 100644 --- a/field/src/goldilocks_field.rs +++ b/field/src/goldilocks_field.rs @@ -9,10 +9,6 @@ use plonky2_util::{assume, branch_hint}; use rand::Rng; use serde::{Deserialize, Serialize}; -use crate::extension_field::quadratic::QuadraticExtension; -use crate::extension_field::quartic::QuarticExtension; -use crate::extension_field::quintic::QuinticExtension; -use crate::extension_field::{Extendable, Frobenius}; use crate::field_types::{Field, Field64, PrimeField, PrimeField64}; use crate::inversion::try_inverse_u64; @@ -283,66 +279,6 @@ impl DivAssign for GoldilocksField { } } -impl Extendable<2> for GoldilocksField { - type Extension = QuadraticExtension; - - // Verifiable in Sage with - // `R. = GF(p)[]; assert (x^2 - 7).is_irreducible()`. - const W: Self = Self(7); - - // DTH_ROOT = W^((ORDER - 1)/2) - const DTH_ROOT: Self = Self(18446744069414584320); - - const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 2] = - [Self(18081566051660590251), Self(16121475356294670766)]; - - const EXT_POWER_OF_TWO_GENERATOR: [Self; 2] = [Self(0), Self(15659105665374529263)]; -} - -impl Extendable<4> for GoldilocksField { - type Extension = QuarticExtension; - - const W: Self = Self(7); - - // DTH_ROOT = W^((ORDER - 1)/4) - const DTH_ROOT: Self = Self(281474976710656); - - const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 4] = [ - Self(5024755240244648895), - Self(13227474371289740625), - Self(3912887029498544536), - Self(3900057112666848848), - ]; - - const EXT_POWER_OF_TWO_GENERATOR: [Self; 4] = - [Self(0), Self(0), Self(0), Self(12587610116473453104)]; -} - -impl Extendable<5> for GoldilocksField { - type Extension = QuinticExtension; - - const W: Self = Self(3); - - // DTH_ROOT = W^((ORDER - 1)/5) - const DTH_ROOT: Self = Self(1041288259238279555); - - const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 5] = [ - Self(2899034827742553394), - Self(13012057356839176729), - Self(14593811582388663055), - Self(7722900811313895436), - Self(4557222484695340057), - ]; - - const EXT_POWER_OF_TWO_GENERATOR: [Self; 5] = [ - Self::POWER_OF_TWO_GENERATOR, - Self(0), - Self(0), - Self(0), - Self(0), - ]; -} - /// Fast addition modulo ORDER for x86-64. /// This function is marked unsafe for the following reasons: /// - It is only correct if x + y < 2**64 + ORDER = 0x1ffffffff00000001. @@ -407,7 +343,34 @@ fn split(x: u128) -> (u64, u64) { (x as u64, (x >> 64) as u64) } -impl Frobenius<1> for GoldilocksField {} +/// Reduce the value x_lo + x_hi * 2^128 to an element in the +/// Goldilocks field. +/// +/// This function is marked 'unsafe' because correctness relies on the +/// unchecked assumption that x < 2^160 - 2^128 + 2^96. Further, +/// performance may degrade as x_hi increases beyond 2**40 or so. +#[inline(always)] +pub(crate) unsafe fn reduce160(x_lo: u128, x_hi: u32) -> GoldilocksField { + let x_hi = (x_lo >> 96) as u64 + ((x_hi as u64) << 32); // shld to form x_hi + let x_mid = (x_lo >> 64) as u32; // shr to form x_mid + let x_lo = x_lo as u64; + + // sub + jc (should fuse) + let (mut t0, borrow) = x_lo.overflowing_sub(x_hi); + if borrow { + // The maximum possible value of x is (2^64 - 1)^2 * 4 * 7 < 2^133, + // so x_hi < 2^37. A borrow will happen roughly one in 134 million + // times, so it's best to branch. + branch_hint(); + // NB: this assumes that x < 2^160 - 2^128 + 2^96. + t0 -= EPSILON; // Cannot underflow if x_hi is canonical. + } + // imul + let t1 = (x_mid as u64) * EPSILON; + // add, sbb, add + let t2 = add_no_canonicalize_trashing_input(t0, t1); + GoldilocksField(t2) +} #[cfg(test)] mod tests { diff --git a/field/src/lib.rs b/field/src/lib.rs index 2c89aab3..e54f2aa7 100644 --- a/field/src/lib.rs +++ b/field/src/lib.rs @@ -15,6 +15,7 @@ pub mod cosets; pub mod extension_field; pub mod fft; pub mod field_types; +pub mod goldilocks_extensions; pub mod goldilocks_field; pub mod interpolation; mod inversion; diff --git a/plonky2/benches/field_arithmetic.rs b/plonky2/benches/field_arithmetic.rs index 0e4383ee..7b74ae52 100644 --- a/plonky2/benches/field_arithmetic.rs +++ b/plonky2/benches/field_arithmetic.rs @@ -1,4 +1,5 @@ use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; +use plonky2::field::extension_field::quadratic::QuadraticExtension; use plonky2::field::extension_field::quartic::QuarticExtension; use plonky2::field::extension_field::quintic::QuinticExtension; use plonky2::field::field_types::Field; @@ -175,6 +176,7 @@ pub(crate) fn bench_field(c: &mut Criterion) { fn criterion_benchmark(c: &mut Criterion) { bench_field::(c); + bench_field::>(c); bench_field::>(c); bench_field::>(c); } From cc9a43b57433f3c6724b1764bb8c64a9c3bdf395 Mon Sep 17 00:00:00 2001 From: wborgeaud Date: Tue, 15 Mar 2022 17:14:45 +0100 Subject: [PATCH 30/32] Fix salt issues --- plonky2/src/plonk/recursive_verifier.rs | 10 ++++++---- plonky2/src/util/serialization.rs | 11 +++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/plonky2/src/plonk/recursive_verifier.rs b/plonky2/src/plonk/recursive_verifier.rs index 6210bb29..2fe7d648 100644 --- a/plonky2/src/plonk/recursive_verifier.rs +++ b/plonky2/src/plonk/recursive_verifier.rs @@ -4,6 +4,7 @@ use crate::hash::hash_types::{HashOutTarget, RichField}; use crate::plonk::circuit_builder::CircuitBuilder; use crate::plonk::circuit_data::{CommonCircuitData, VerifierCircuitTarget}; use crate::plonk::config::{AlgebraicHasher, GenericConfig}; +use crate::plonk::plonk_common::salt_size; use crate::plonk::proof::{ OpeningSetTarget, ProofChallengesTarget, ProofTarget, ProofWithPublicInputsTarget, }; @@ -141,11 +142,12 @@ impl, const D: usize> CircuitBuilder { let fri_params = &common_data.fri_params; let cap_height = fri_params.config.cap_height; + let salt = salt_size(common_data.fri_params.hiding); let num_leaves_per_oracle = &[ common_data.num_preprocessed_polys(), - config.num_wires, - common_data.num_zs_partial_products_polys(), - common_data.num_quotient_polys(), + config.num_wires + salt, + common_data.num_zs_partial_products_polys() + salt, + common_data.num_quotient_polys() + salt, ]; ProofTarget { @@ -200,7 +202,7 @@ mod tests { const D: usize = 2; type C = PoseidonGoldilocksConfig; type F = >::F; - let config = CircuitConfig::standard_recursion_config(); + let config = CircuitConfig::standard_recursion_zk_config(); let (proof, vd, cd) = dummy_proof::(&config, 4_000)?; let (proof, _vd, cd) = diff --git a/plonky2/src/util/serialization.rs b/plonky2/src/util/serialization.rs index d0326073..ce5b1270 100644 --- a/plonky2/src/util/serialization.rs +++ b/plonky2/src/util/serialization.rs @@ -15,6 +15,7 @@ use crate::hash::merkle_proofs::MerkleProof; use crate::hash::merkle_tree::MerkleCap; use crate::plonk::circuit_data::CommonCircuitData; use crate::plonk::config::{GenericConfig, GenericHashOut, Hasher}; +use crate::plonk::plonk_common::salt_size; use crate::plonk::proof::{ CompressedProof, CompressedProofWithPublicInputs, OpeningSet, Proof, ProofWithPublicInputs, }; @@ -235,6 +236,7 @@ impl Buffer { common_data: &CommonCircuitData, ) -> Result> { let config = &common_data.config; + let salt = salt_size(common_data.fri_params.hiding); let mut evals_proofs = Vec::with_capacity(4); let constants_sigmas_v = @@ -242,17 +244,18 @@ impl Buffer { let constants_sigmas_p = self.read_merkle_proof()?; evals_proofs.push((constants_sigmas_v, constants_sigmas_p)); - let wires_v = self.read_field_vec(config.num_wires)?; + let wires_v = self.read_field_vec(config.num_wires + salt)?; let wires_p = self.read_merkle_proof()?; evals_proofs.push((wires_v, wires_p)); - let zs_partial_v = - self.read_field_vec(config.num_challenges * (1 + common_data.num_partial_products))?; + let zs_partial_v = self.read_field_vec( + config.num_challenges * (1 + common_data.num_partial_products) + salt, + )?; let zs_partial_p = self.read_merkle_proof()?; evals_proofs.push((zs_partial_v, zs_partial_p)); let quotient_v = - self.read_field_vec(config.num_challenges * common_data.quotient_degree_factor)?; + self.read_field_vec(config.num_challenges * common_data.quotient_degree_factor + salt)?; let quotient_p = self.read_merkle_proof()?; evals_proofs.push((quotient_v, quotient_p)); From 627e80bfd502fb4add0af43b25a0a5a582ea5870 Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Tue, 15 Mar 2022 09:24:10 -0700 Subject: [PATCH 31/32] Filter mul-add constraints (#512) --- system_zero/src/alu/mul_add.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/system_zero/src/alu/mul_add.rs b/system_zero/src/alu/mul_add.rs index b84cafbf..5179faac 100644 --- a/system_zero/src/alu/mul_add.rs +++ b/system_zero/src/alu/mul_add.rs @@ -47,6 +47,7 @@ pub(crate) fn eval_mul_add>( let result_canonical_inv = local_values[COL_MUL_ADD_RESULT_CANONICAL_INV]; let computed_output = factor_0 * factor_1 + addend; + // TODO: Needs to be filtered by IS_MUL_ADD. let output = combine_u16s_check_canonical( output_1, output_2, @@ -55,7 +56,7 @@ pub(crate) fn eval_mul_add>( result_canonical_inv, yield_constr, ); - yield_constr.constraint(computed_output - output); + yield_constr.constraint(is_mul * (computed_output - output)); } pub(crate) fn eval_mul_add_recursively, const D: usize>( @@ -74,6 +75,7 @@ pub(crate) fn eval_mul_add_recursively, const D: us let result_canonical_inv = local_values[COL_MUL_ADD_RESULT_CANONICAL_INV]; let computed_output = builder.mul_add_extension(factor_0, factor_1, addend); + // TODO: Needs to be filtered by IS_MUL_ADD. let output = combine_u16s_check_canonical_circuit( builder, output_1, @@ -84,5 +86,6 @@ pub(crate) fn eval_mul_add_recursively, const D: us yield_constr, ); let diff = builder.sub_extension(computed_output, output); + let filtered_diff = builder.mul_extension(is_mul, diff); yield_constr.constraint(builder, diff); } From 7d6c0a448ddb68f5c181f9440bf3213f898519aa Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Wed, 16 Mar 2022 17:37:34 -0700 Subject: [PATCH 32/32] Halo2 style lookup arguments in System Zero (#513) * Halo2 style lookup arguments in System Zero It's a really nice and simple protocol, particularly for the verifier since the constraints are trivial (aside from the underlying batched permutation checks, which we already support). See the [Halo2 book](https://zcash.github.io/halo2/design/proving-system/lookup.html) and this [talk](https://www.youtube.com/watch?v=YlTt12s7vGE&t=5237s) by @daira. Previously we generated the whole trace in row-wise form, but it's much more efficient to generate these "permuted" columns column-wise. So I changed our STARK framework to accept the trace in column-wise form. STARK impls now have the flexibility to do some generation row-wise and some column-wise (without extra costs; there's a single transpose as before). * sorting * fixes * PR feedback * into_iter * timing --- field/src/field_types.rs | 5 + field/src/goldilocks_field.rs | 3 +- starky/src/fibonacci_stark.rs | 14 +- starky/src/lib.rs | 1 + starky/src/permutation.rs | 8 + starky/src/prover.rs | 16 +- starky/src/util.rs | 16 ++ starky/src/verifier.rs | 5 +- system_zero/Cargo.toml | 8 + system_zero/benches/lookup_permuted_cols.rs | 30 ++++ system_zero/src/lib.rs | 1 + system_zero/src/lookup.rs | 147 ++++++++++++++++++ system_zero/src/registers/lookup.rs | 24 ++- system_zero/src/registers/range_check_16.rs | 2 +- .../src/registers/range_check_degree.rs | 2 +- system_zero/src/system_zero.rs | 76 ++++++++- 16 files changed, 324 insertions(+), 34 deletions(-) create mode 100644 starky/src/util.rs create mode 100644 system_zero/benches/lookup_permuted_cols.rs create mode 100644 system_zero/src/lookup.rs diff --git a/field/src/field_types.rs b/field/src/field_types.rs index 83826b9f..4adfdbf4 100644 --- a/field/src/field_types.rs +++ b/field/src/field_types.rs @@ -462,6 +462,11 @@ pub trait PrimeField64: PrimeField + Field64 { fn to_canonical_u64(&self) -> u64; fn to_noncanonical_u64(&self) -> u64; + + #[inline(always)] + fn to_canonical(&self) -> Self { + Self::from_canonical_u64(self.to_canonical_u64()) + } } /// An iterator over the powers of a certain base element `b`: `b^0, b^1, b^2, ...`. diff --git a/field/src/goldilocks_field.rs b/field/src/goldilocks_field.rs index 4ed32a0d..c3172991 100644 --- a/field/src/goldilocks_field.rs +++ b/field/src/goldilocks_field.rs @@ -95,7 +95,7 @@ impl Field for GoldilocksField { Self(n.mod_floor(&Self::order()).to_u64_digits()[0]) } - #[inline] + #[inline(always)] fn from_canonical_u64(n: u64) -> Self { debug_assert!(n < Self::ORDER); Self(n) @@ -156,6 +156,7 @@ impl PrimeField64 for GoldilocksField { c } + #[inline(always)] fn to_noncanonical_u64(&self) -> u64 { self.0 } diff --git a/starky/src/fibonacci_stark.rs b/starky/src/fibonacci_stark.rs index 7961ad50..fa9ccd87 100644 --- a/starky/src/fibonacci_stark.rs +++ b/starky/src/fibonacci_stark.rs @@ -2,12 +2,14 @@ use std::marker::PhantomData; use plonky2::field::extension_field::{Extendable, FieldExtension}; use plonky2::field::packed_field::PackedField; +use plonky2::field::polynomial::PolynomialValues; use plonky2::hash::hash_types::RichField; use plonky2::plonk::circuit_builder::CircuitBuilder; use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use crate::permutation::PermutationPair; use crate::stark::Stark; +use crate::util::trace_rows_to_poly_values; use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars}; /// Toy STARK system used for testing. @@ -37,8 +39,8 @@ impl, const D: usize> FibonacciStark { } /// Generate the trace using `x0, x1, 0, 1` as initial state values. - fn generate_trace(&self, x0: F, x1: F) -> Vec<[F; Self::COLUMNS]> { - let mut trace = (0..self.num_rows) + fn generate_trace(&self, x0: F, x1: F) -> Vec> { + let mut trace_rows = (0..self.num_rows) .scan([x0, x1, F::ZERO, F::ONE], |acc, _| { let tmp = *acc; acc[0] = tmp[1]; @@ -48,8 +50,8 @@ impl, const D: usize> FibonacciStark { Some(tmp) }) .collect::>(); - trace[self.num_rows - 1][3] = F::ZERO; // So that column 2 and 3 are permutation of one another. - trace + trace_rows[self.num_rows - 1][3] = F::ZERO; // So that column 2 and 3 are permutation of one another. + trace_rows_to_poly_values(trace_rows) } } @@ -113,9 +115,7 @@ impl, const D: usize> Stark for FibonacciStar } fn permutation_pairs(&self) -> Vec { - vec![PermutationPair { - column_pairs: vec![(2, 3)], - }] + vec![PermutationPair::singletons(2, 3)] } } diff --git a/starky/src/lib.rs b/starky/src/lib.rs index 8249d90b..b2293443 100644 --- a/starky/src/lib.rs +++ b/starky/src/lib.rs @@ -15,6 +15,7 @@ pub mod prover; pub mod recursive_verifier; pub mod stark; pub mod stark_testing; +pub mod util; pub mod vanishing_poly; pub mod vars; pub mod verifier; diff --git a/starky/src/permutation.rs b/starky/src/permutation.rs index 2e1d603c..91b1be27 100644 --- a/starky/src/permutation.rs +++ b/starky/src/permutation.rs @@ -30,6 +30,14 @@ pub struct PermutationPair { pub column_pairs: Vec<(usize, usize)>, } +impl PermutationPair { + pub fn singletons(lhs: usize, rhs: usize) -> Self { + Self { + column_pairs: vec![(lhs, rhs)], + } + } +} + /// A single instance of a permutation check protocol. pub(crate) struct PermutationInstance<'a, T: Copy> { pub(crate) pair: &'a PermutationPair, diff --git a/starky/src/prover.rs b/starky/src/prover.rs index 336b9963..da1b5dd4 100644 --- a/starky/src/prover.rs +++ b/starky/src/prover.rs @@ -30,7 +30,7 @@ use crate::vars::StarkEvaluationVars; pub fn prove( stark: S, config: &StarkConfig, - trace: Vec<[F; S::COLUMNS]>, + trace_poly_values: Vec>, public_inputs: [F; S::PUBLIC_INPUTS], timing: &mut TimingTree, ) -> Result> @@ -42,7 +42,7 @@ where [(); S::PUBLIC_INPUTS]:, [(); C::Hasher::HASH_SIZE]:, { - let degree = trace.len(); + let degree = trace_poly_values[0].len(); let degree_bits = log2_strict(degree); let fri_params = config.fri_params(degree_bits); let rate_bits = config.fri_config.rate_bits; @@ -52,18 +52,6 @@ where "FRI total reduction arity is too large.", ); - let trace_vecs = trace.iter().map(|row| row.to_vec()).collect_vec(); - let trace_col_major: Vec> = transpose(&trace_vecs); - - let trace_poly_values: Vec> = timed!( - timing, - "compute trace polynomials", - trace_col_major - .par_iter() - .map(|column| PolynomialValues::new(column.clone())) - .collect() - ); - let trace_commitment = timed!( timing, "compute trace commitment", diff --git a/starky/src/util.rs b/starky/src/util.rs new file mode 100644 index 00000000..011a1add --- /dev/null +++ b/starky/src/util.rs @@ -0,0 +1,16 @@ +use itertools::Itertools; +use plonky2::field::field_types::Field; +use plonky2::field::polynomial::PolynomialValues; +use plonky2::util::transpose; + +/// A helper function to transpose a row-wise trace and put it in the format that `prove` expects. +pub fn trace_rows_to_poly_values( + trace_rows: Vec<[F; COLUMNS]>, +) -> Vec> { + let trace_row_vecs = trace_rows.into_iter().map(|row| row.to_vec()).collect_vec(); + let trace_col_vecs: Vec> = transpose(&trace_row_vecs); + trace_col_vecs + .into_iter() + .map(|column| PolynomialValues::new(column)) + .collect() +} diff --git a/starky/src/verifier.rs b/starky/src/verifier.rs index a9bf897c..d5071af7 100644 --- a/starky/src/verifier.rs +++ b/starky/src/verifier.rs @@ -118,7 +118,10 @@ where .chunks(stark.quotient_degree_factor()) .enumerate() { - ensure!(vanishing_polys_zeta[i] == z_h_zeta * reduce_with_powers(chunk, zeta_pow_deg)); + ensure!( + vanishing_polys_zeta[i] == z_h_zeta * reduce_with_powers(chunk, zeta_pow_deg), + "Mismatch between evaluation and opening of quotient polynomial" + ); } let merkle_caps = once(proof.trace_cap) diff --git a/system_zero/Cargo.toml b/system_zero/Cargo.toml index 032bfb53..a9029dad 100644 --- a/system_zero/Cargo.toml +++ b/system_zero/Cargo.toml @@ -10,6 +10,14 @@ plonky2_util = { path = "../util" } starky = { path = "../starky" } anyhow = "1.0.40" env_logger = "0.9.0" +itertools = "0.10.0" log = "0.4.14" rand = "0.8.4" rand_chacha = "0.3.1" + +[dev-dependencies] +criterion = "0.3.5" + +[[bench]] +name = "lookup_permuted_cols" +harness = false diff --git a/system_zero/benches/lookup_permuted_cols.rs b/system_zero/benches/lookup_permuted_cols.rs new file mode 100644 index 00000000..371b3470 --- /dev/null +++ b/system_zero/benches/lookup_permuted_cols.rs @@ -0,0 +1,30 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use itertools::Itertools; +use plonky2::field::field_types::Field; +use plonky2::field::goldilocks_field::GoldilocksField; +use rand::{thread_rng, Rng}; +use system_zero::lookup::permuted_cols; + +type F = GoldilocksField; + +fn criterion_benchmark(c: &mut Criterion) { + let mut group = c.benchmark_group("lookup-permuted-cols"); + + for size_log in [16, 17, 18] { + let size = 1 << size_log; + group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, _| { + // We could benchmark a table of random values with + // let table = F::rand_vec(size); + // But in practice we currently use tables that are pre-sorted, which makes + // permuted_cols cheaper since it will sort the table. + let table = (0..size).map(F::from_canonical_usize).collect_vec(); + let input = (0..size) + .map(|_| table[thread_rng().gen_range(0..size)]) + .collect_vec(); + b.iter(|| permuted_cols(&input, &table)); + }); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/system_zero/src/lib.rs b/system_zero/src/lib.rs index 35576cd3..81e5e9b1 100644 --- a/system_zero/src/lib.rs +++ b/system_zero/src/lib.rs @@ -4,6 +4,7 @@ mod alu; mod core_registers; +pub mod lookup; mod memory; mod permutation_unit; mod public_input_layout; diff --git a/system_zero/src/lookup.rs b/system_zero/src/lookup.rs new file mode 100644 index 00000000..5a5f0da1 --- /dev/null +++ b/system_zero/src/lookup.rs @@ -0,0 +1,147 @@ +//! Implementation of the Halo2 lookup argument. +//! +//! References: +//! - https://zcash.github.io/halo2/design/proving-system/lookup.html +//! - https://www.youtube.com/watch?v=YlTt12s7vGE&t=5237s + +use std::cmp::Ordering; + +use itertools::Itertools; +use plonky2::field::extension_field::Extendable; +use plonky2::field::field_types::{Field, PrimeField64}; +use plonky2::field::packed_field::PackedField; +use plonky2::hash::hash_types::RichField; +use plonky2::plonk::circuit_builder::CircuitBuilder; +use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; +use starky::vars::StarkEvaluationTargets; +use starky::vars::StarkEvaluationVars; + +use crate::public_input_layout::NUM_PUBLIC_INPUTS; +use crate::registers::lookup::*; +use crate::registers::NUM_COLUMNS; + +pub(crate) fn generate_lookups(trace_cols: &mut [Vec]) { + for i in 0..NUM_LOOKUPS { + let inputs = &trace_cols[col_input(i)]; + let table = &trace_cols[col_table(i)]; + let (permuted_inputs, permuted_table) = permuted_cols(inputs, table); + trace_cols[col_permuted_input(i)] = permuted_inputs; + trace_cols[col_permuted_table(i)] = permuted_table; + } +} + +/// Given an input column and a table column, generate the permuted input and permuted table columns +/// used in the Halo2 permutation argument. +pub fn permuted_cols(inputs: &[F], table: &[F]) -> (Vec, Vec) { + let n = inputs.len(); + + // The permuted inputs do not have to be ordered, but we found that sorting was faster than + // hash-based grouping. We also sort the table, as this helps us identify "unused" table + // elements efficiently. + + // To compare elements, e.g. for sorting, we first need them in canonical form. It would be + // wasteful to canonicalize in each comparison, as a single element may be involved in many + // comparisons. So we will canonicalize once upfront, then use `to_noncanonical_u64` when + // comparing elements. + + let sorted_inputs = inputs + .iter() + .map(|x| x.to_canonical()) + .sorted_unstable_by_key(|x| x.to_noncanonical_u64()) + .collect_vec(); + let sorted_table = table + .iter() + .map(|x| x.to_canonical()) + .sorted_unstable_by_key(|x| x.to_noncanonical_u64()) + .collect_vec(); + + let mut unused_table_inds = Vec::with_capacity(n); + let mut unused_table_vals = Vec::with_capacity(n); + let mut permuted_table = vec![F::ZERO; n]; + let mut i = 0; + let mut j = 0; + while (j < n) && (i < n) { + let input_val = sorted_inputs[i].to_noncanonical_u64(); + let table_val = sorted_table[j].to_noncanonical_u64(); + match input_val.cmp(&table_val) { + Ordering::Greater => { + unused_table_vals.push(sorted_table[j]); + j += 1; + } + Ordering::Less => { + if let Some(x) = unused_table_vals.pop() { + permuted_table[i] = x; + } else { + unused_table_inds.push(i); + } + i += 1; + } + Ordering::Equal => { + permuted_table[i] = sorted_table[j]; + i += 1; + j += 1; + } + } + } + + #[allow(clippy::needless_range_loop)] // indexing is just more natural here + for jj in j..n { + unused_table_vals.push(sorted_table[jj]); + } + for ii in i..n { + unused_table_inds.push(ii); + } + for (ind, val) in unused_table_inds.into_iter().zip_eq(unused_table_vals) { + permuted_table[ind] = val; + } + + (sorted_inputs, permuted_table) +} + +pub(crate) fn eval_lookups>( + vars: StarkEvaluationVars, + yield_constr: &mut ConstraintConsumer

, +) { + for i in 0..NUM_LOOKUPS { + let local_perm_input = vars.local_values[col_permuted_input(i)]; + let next_perm_table = vars.next_values[col_permuted_table(i)]; + let next_perm_input = vars.next_values[col_permuted_input(i)]; + + // A "vertical" diff between the local and next permuted inputs. + let diff_input_prev = next_perm_input - local_perm_input; + // A "horizontal" diff between the next permuted input and permuted table value. + let diff_input_table = next_perm_input - next_perm_table; + + yield_constr.constraint(diff_input_prev * diff_input_table); + + // This is actually constraining the first row, as per the spec, since `diff_input_table` + // is a diff of the next row's values. In the context of `constraint_last_row`, the next + // row is the first row. + yield_constr.constraint_last_row(diff_input_table); + } +} + +pub(crate) fn eval_lookups_recursively, const D: usize>( + builder: &mut CircuitBuilder, + vars: StarkEvaluationTargets, + yield_constr: &mut RecursiveConstraintConsumer, +) { + for i in 0..NUM_LOOKUPS { + let local_perm_input = vars.local_values[col_permuted_input(i)]; + let next_perm_table = vars.next_values[col_permuted_table(i)]; + let next_perm_input = vars.next_values[col_permuted_input(i)]; + + // A "vertical" diff between the local and next permuted inputs. + let diff_input_prev = builder.sub_extension(next_perm_input, local_perm_input); + // A "horizontal" diff between the next permuted input and permuted table value. + let diff_input_table = builder.sub_extension(next_perm_input, next_perm_table); + + let diff_product = builder.mul_extension(diff_input_prev, diff_input_table); + yield_constr.constraint(builder, diff_product); + + // This is actually constraining the first row, as per the spec, since `diff_input_table` + // is a diff of the next row's values. In the context of `constraint_last_row`, the next + // row is the first row. + yield_constr.constraint_last_row(builder, diff_input_table); + } +} diff --git a/system_zero/src/registers/lookup.rs b/system_zero/src/registers/lookup.rs index eb773acf..fd0abd43 100644 --- a/system_zero/src/registers/lookup.rs +++ b/system_zero/src/registers/lookup.rs @@ -3,19 +3,35 @@ const START_UNIT: usize = super::START_LOOKUP; -const NUM_LOOKUPS: usize = +pub(crate) const NUM_LOOKUPS: usize = super::range_check_16::NUM_RANGE_CHECKS + super::range_check_degree::NUM_RANGE_CHECKS; +pub(crate) const fn col_input(i: usize) -> usize { + if i < super::range_check_16::NUM_RANGE_CHECKS { + super::range_check_16::col_rc_16_input(i) + } else { + super::range_check_degree::col_rc_degree_input(i - super::range_check_16::NUM_RANGE_CHECKS) + } +} + /// This column contains a permutation of the input values. -const fn col_permuted_input(i: usize) -> usize { +pub(crate) const fn col_permuted_input(i: usize) -> usize { debug_assert!(i < NUM_LOOKUPS); START_UNIT + 2 * i } +pub(crate) const fn col_table(i: usize) -> usize { + if i < super::range_check_16::NUM_RANGE_CHECKS { + super::core::COL_RANGE_16 + } else { + super::core::COL_CLOCK + } +} + /// This column contains a permutation of the table values. -const fn col_permuted_table(i: usize) -> usize { +pub(crate) const fn col_permuted_table(i: usize) -> usize { debug_assert!(i < NUM_LOOKUPS); START_UNIT + 2 * i + 1 } -pub(super) const END: usize = START_UNIT + NUM_LOOKUPS; +pub(super) const END: usize = START_UNIT + NUM_LOOKUPS * 2; diff --git a/system_zero/src/registers/range_check_16.rs b/system_zero/src/registers/range_check_16.rs index c44db494..674df302 100644 --- a/system_zero/src/registers/range_check_16.rs +++ b/system_zero/src/registers/range_check_16.rs @@ -1,6 +1,6 @@ //! Range check unit which checks that values are in `[0, 2^16)`. -pub(super) const NUM_RANGE_CHECKS: usize = 5; +pub(crate) const NUM_RANGE_CHECKS: usize = 5; /// The input of the `i`th range check, i.e. the value being range checked. pub(crate) const fn col_rc_16_input(i: usize) -> usize { diff --git a/system_zero/src/registers/range_check_degree.rs b/system_zero/src/registers/range_check_degree.rs index 6d61e6e2..caad705d 100644 --- a/system_zero/src/registers/range_check_degree.rs +++ b/system_zero/src/registers/range_check_degree.rs @@ -1,6 +1,6 @@ //! Range check unit which checks that values are in `[0, degree)`. -pub(super) const NUM_RANGE_CHECKS: usize = 5; +pub(crate) const NUM_RANGE_CHECKS: usize = 5; /// The input of the `i`th range check, i.e. the value being range checked. pub(crate) const fn col_rc_degree_input(i: usize) -> usize { diff --git a/system_zero/src/system_zero.rs b/system_zero/src/system_zero.rs index c42a04a8..32c49266 100644 --- a/system_zero/src/system_zero.rs +++ b/system_zero/src/system_zero.rs @@ -2,8 +2,12 @@ use std::marker::PhantomData; use plonky2::field::extension_field::{Extendable, FieldExtension}; use plonky2::field::packed_field::PackedField; +use plonky2::field::polynomial::PolynomialValues; use plonky2::hash::hash_types::RichField; use plonky2::plonk::circuit_builder::CircuitBuilder; +use plonky2::timed; +use plonky2::util::timing::TimingTree; +use plonky2::util::transpose; use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer}; use starky::permutation::PermutationPair; use starky::stark::Stark; @@ -15,12 +19,13 @@ use crate::core_registers::{ eval_core_registers, eval_core_registers_recursively, generate_first_row_core_registers, generate_next_row_core_registers, }; +use crate::lookup::{eval_lookups, eval_lookups_recursively, generate_lookups}; use crate::memory::TransactionMemory; use crate::permutation_unit::{ eval_permutation_unit, eval_permutation_unit_recursively, generate_permutation_unit, }; use crate::public_input_layout::NUM_PUBLIC_INPUTS; -use crate::registers::NUM_COLUMNS; +use crate::registers::{lookup, NUM_COLUMNS}; /// We require at least 2^16 rows as it helps support efficient 16-bit range checks. const MIN_TRACE_ROWS: usize = 1 << 16; @@ -31,7 +36,9 @@ pub struct SystemZero, const D: usize> { } impl, const D: usize> SystemZero { - fn generate_trace(&self) -> Vec<[F; NUM_COLUMNS]> { + /// Generate the rows of the trace. Note that this does not generate the permuted columns used + /// in our lookup arguments, as those are computed after transposing to column-wise form. + fn generate_trace_rows(&self) -> Vec<[F; NUM_COLUMNS]> { let memory = TransactionMemory::default(); let mut row = [F::ZERO; NUM_COLUMNS]; @@ -59,6 +66,45 @@ impl, const D: usize> SystemZero { trace.push(row); trace } + + fn generate_trace(&self) -> Vec> { + let mut timing = TimingTree::new("generate trace", log::Level::Debug); + + // Generate the witness, except for permuted columns in the lookup argument. + let trace_rows = timed!( + &mut timing, + "generate trace rows", + self.generate_trace_rows() + ); + + // Transpose from row-wise to column-wise. + let trace_row_vecs: Vec<_> = timed!( + &mut timing, + "convert to Vecs", + trace_rows.into_iter().map(|row| row.to_vec()).collect() + ); + let mut trace_col_vecs: Vec> = + timed!(&mut timing, "transpose", transpose(&trace_row_vecs)); + + // Generate permuted columns in the lookup argument. + timed!( + &mut timing, + "generate lookup columns", + generate_lookups(&mut trace_col_vecs) + ); + + let trace_polys = timed!( + &mut timing, + "convert to PolynomialValues", + trace_col_vecs + .into_iter() + .map(|column| PolynomialValues::new(column)) + .collect() + ); + + timing.print(); + trace_polys + } } impl, const D: usize> Default for SystemZero { @@ -84,6 +130,7 @@ impl, const D: usize> Stark for SystemZero(vars, yield_constr); + eval_lookups(vars, yield_constr); // TODO: Other units } @@ -96,6 +143,7 @@ impl, const D: usize> Stark for SystemZero, const D: usize> Stark for SystemZero Vec { + let mut pairs = Vec::new(); + + for i in 0..lookup::NUM_LOOKUPS { + pairs.push(PermutationPair::singletons( + lookup::col_input(i), + lookup::col_permuted_input(i), + )); + pairs.push(PermutationPair::singletons( + lookup::col_table(i), + lookup::col_permuted_table(i), + )); + } + // TODO: Add permutation pairs for memory. - // TODO: Add permutation pairs for range checks. - vec![] + + pairs } } @@ -127,8 +188,9 @@ mod tests { use crate::system_zero::SystemZero; #[test] - #[ignore] // A bit slow. fn run() -> Result<()> { + init_logger(); + type F = GoldilocksField; type C = PoseidonGoldilocksConfig; const D: usize = 2; @@ -154,4 +216,8 @@ mod tests { let system = S::default(); test_stark_low_degree(system) } + + fn init_logger() { + let _ = env_logger::builder().format_timestamp(None).try_init(); + } }