Merge branch 'main' into glv

# Conflicts:
#	ecdsa/src/curve/ecdsa.rs
#	plonky2/src/gadgets/ecdsa.rs
This commit is contained in:
wborgeaud 2022-03-18 07:19:28 +01:00
commit c472afe12e
57 changed files with 2528 additions and 1168 deletions

View File

@ -1,8 +1,8 @@
use plonky2_field::field_types::Field;
use serde::{Deserialize, Serialize};
use crate::curve::curve_msm::msm_parallel;
use crate::curve::curve_types::{base_to_scalar, AffinePoint, Curve, CurveScalar};
use crate::field::field_types::Field;
#[derive(Copy, Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
pub struct ECDSASignature<C: Curve> {
@ -16,6 +16,10 @@ pub struct ECDSASecretKey<C: Curve>(pub C::ScalarField);
#[derive(Copy, Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
pub struct ECDSAPublicKey<C: Curve>(pub AffinePoint<C>);
pub fn secret_to_public<C: Curve>(sk: ECDSASecretKey<C>) -> ECDSAPublicKey<C> {
ECDSAPublicKey((CurveScalar(sk.0) * C::GENERATOR_PROJECTIVE).to_affine())
}
pub fn sign_message<C: Curve>(msg: C::ScalarField, sk: ECDSASecretKey<C>) -> ECDSASignature<C> {
let (k, rr) = {
let mut k = C::ScalarField::rand();
@ -57,20 +61,18 @@ pub fn verify_message<C: Curve>(
#[cfg(test)]
mod tests {
use plonky2_field::field_types::Field;
use plonky2_field::secp256k1_scalar::Secp256K1Scalar;
use crate::curve::curve_types::{Curve, CurveScalar};
use crate::curve::ecdsa::{sign_message, verify_message, ECDSAPublicKey, ECDSASecretKey};
use crate::curve::ecdsa::{secret_to_public, sign_message, verify_message, ECDSASecretKey};
use crate::curve::secp256k1::Secp256K1;
use crate::field::field_types::Field;
use crate::field::secp256k1_scalar::Secp256K1Scalar;
#[test]
fn test_ecdsa_native() {
type C = Secp256K1;
let msg = Secp256K1Scalar::rand();
let sk = ECDSASecretKey(Secp256K1Scalar::rand());
let pk = ECDSAPublicKey((CurveScalar(sk.0) * C::GENERATOR_PROJECTIVE).to_affine());
let sk = ECDSASecretKey::<C>(Secp256K1Scalar::rand());
let pk = secret_to_public(sk);
let sig = sign_message(msg, sk);
let result = verify_message(msg, sig, pk);

View File

@ -12,3 +12,4 @@ num = { version = "0.4", features = [ "rand" ] }
rand = "0.8.4"
serde = { version = "1.0", features = ["derive"] }
unroll = "0.1.5"
static_assertions = "1.1.0"

View File

@ -170,7 +170,7 @@ impl<F: Extendable<2>> Mul for QuadraticExtension<F> {
type Output = Self;
#[inline]
fn mul(self, rhs: Self) -> Self {
default fn mul(self, rhs: Self) -> Self {
let Self([a0, a1]) = self;
let Self([b0, b1]) = rhs;

View File

@ -201,7 +201,7 @@ impl<F: Extendable<4>> Mul for QuarticExtension<F> {
type Output = Self;
#[inline]
fn mul(self, rhs: Self) -> Self {
default fn mul(self, rhs: Self) -> Self {
let Self([a0, a1, a2, a3]) = self;
let Self([b0, b1, b2, b3]) = rhs;

View File

@ -201,7 +201,7 @@ impl<F: Extendable<5>> Mul for QuinticExtension<F> {
type Output = Self;
#[inline]
fn mul(self, rhs: Self) -> Self {
default fn mul(self, rhs: Self) -> Self {
let Self([a0, a1, a2, a3, a4]) = self;
let Self([b0, b1, b2, b3, b4]) = rhs;
let w = <Self as OEF<5>>::W;

View File

@ -462,6 +462,11 @@ pub trait PrimeField64: PrimeField + Field64 {
fn to_canonical_u64(&self) -> u64;
fn to_noncanonical_u64(&self) -> u64;
#[inline(always)]
fn to_canonical(&self) -> Self {
Self::from_canonical_u64(self.to_canonical_u64())
}
}
/// An iterator over the powers of a certain base element `b`: `b^0, b^1, b^2, ...`.

View File

@ -0,0 +1,495 @@
use std::ops::Mul;
use static_assertions::const_assert;
use crate::extension_field::quadratic::QuadraticExtension;
use crate::extension_field::quartic::QuarticExtension;
use crate::extension_field::quintic::QuinticExtension;
use crate::extension_field::{Extendable, Frobenius};
use crate::field_types::Field;
use crate::goldilocks_field::{reduce160, GoldilocksField};
impl Frobenius<1> for GoldilocksField {}
impl Extendable<2> for GoldilocksField {
type Extension = QuadraticExtension<Self>;
// Verifiable in Sage with
// `R.<x> = GF(p)[]; assert (x^2 - 7).is_irreducible()`.
const W: Self = Self(7);
// DTH_ROOT = W^((ORDER - 1)/2)
const DTH_ROOT: Self = Self(18446744069414584320);
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 2] =
[Self(18081566051660590251), Self(16121475356294670766)];
const EXT_POWER_OF_TWO_GENERATOR: [Self; 2] = [Self(0), Self(15659105665374529263)];
}
impl Mul for QuadraticExtension<GoldilocksField> {
#[inline]
fn mul(self, rhs: Self) -> Self {
let Self([a0, a1]) = self;
let Self([b0, b1]) = rhs;
let c = ext2_mul([a0.0, a1.0], [b0.0, b1.0]);
Self(c)
}
}
impl Extendable<4> for GoldilocksField {
type Extension = QuarticExtension<Self>;
const W: Self = Self(7);
// DTH_ROOT = W^((ORDER - 1)/4)
const DTH_ROOT: Self = Self(281474976710656);
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 4] = [
Self(5024755240244648895),
Self(13227474371289740625),
Self(3912887029498544536),
Self(3900057112666848848),
];
const EXT_POWER_OF_TWO_GENERATOR: [Self; 4] =
[Self(0), Self(0), Self(0), Self(12587610116473453104)];
}
impl Mul for QuarticExtension<GoldilocksField> {
#[inline]
fn mul(self, rhs: Self) -> Self {
let Self([a0, a1, a2, a3]) = self;
let Self([b0, b1, b2, b3]) = rhs;
let c = ext4_mul([a0.0, a1.0, a2.0, a3.0], [b0.0, b1.0, b2.0, b3.0]);
Self(c)
}
}
impl Extendable<5> for GoldilocksField {
type Extension = QuinticExtension<Self>;
const W: Self = Self(3);
// DTH_ROOT = W^((ORDER - 1)/5)
const DTH_ROOT: Self = Self(1041288259238279555);
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 5] = [
Self(2899034827742553394),
Self(13012057356839176729),
Self(14593811582388663055),
Self(7722900811313895436),
Self(4557222484695340057),
];
const EXT_POWER_OF_TWO_GENERATOR: [Self; 5] = [
Self::POWER_OF_TWO_GENERATOR,
Self(0),
Self(0),
Self(0),
Self(0),
];
}
impl Mul for QuinticExtension<GoldilocksField> {
#[inline]
fn mul(self, rhs: Self) -> Self {
let Self([a0, a1, a2, a3, a4]) = self;
let Self([b0, b1, b2, b3, b4]) = rhs;
let c = ext5_mul(
[a0.0, a1.0, a2.0, a3.0, a4.0],
[b0.0, b1.0, b2.0, b3.0, b4.0],
);
Self(c)
}
}
/*
* The functions extD_add_prods[0-4] are helper functions for
* computing products for extensions of degree D over the Goldilocks
* field. They are faster than the generic method because all
* reductions are delayed until the end which means only one per
* result coefficient is necessary.
*/
/// Return a, b such that a + b*2^128 = 3*x with a < 2^128 and b < 2^32.
#[inline(always)]
fn u160_times_3(x: u128, y: u32) -> (u128, u32) {
let (s, cy) = x.overflowing_add(x << 1);
(s, 3 * y + (x >> 127) as u32 + cy as u32)
}
/// Return a, b such that a + b*2^128 = 7*x with a < 2^128 and b < 2^32.
#[inline(always)]
fn u160_times_7(x: u128, y: u32) -> (u128, u32) {
let (d, br) = (x << 3).overflowing_sub(x);
// NB: subtracting the borrow can't underflow
(d, 7 * y + (x >> (128 - 3)) as u32 - br as u32)
}
/*
* Quadratic multiplication and squaring
*/
#[inline(always)]
fn ext2_add_prods0(a: &[u64; 2], b: &[u64; 2]) -> GoldilocksField {
// Computes a0 * b0 + W * a1 * b1;
let [a0, a1] = *a;
let [b0, b1] = *b;
let cy;
// W * a1 * b1
let (mut cumul_lo, mut cumul_hi) = u160_times_7((a1 as u128) * (b1 as u128), 0u32);
// a0 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b0 as u128));
cumul_hi += cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
#[inline(always)]
fn ext2_add_prods1(a: &[u64; 2], b: &[u64; 2]) -> GoldilocksField {
// Computes a0 * b1 + a1 * b0;
let [a0, a1] = *a;
let [b0, b1] = *b;
let cy;
// a0 * b1
let mut cumul_lo = (a0 as u128) * (b1 as u128);
// a1 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b0 as u128));
let cumul_hi = cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
/// Multiply a and b considered as elements of GF(p^2).
#[inline(always)]
pub(crate) fn ext2_mul(a: [u64; 2], b: [u64; 2]) -> [GoldilocksField; 2] {
// The code in ext2_add_prods[01] assumes the quadratic extension
// generator is 7.
const_assert!(<GoldilocksField as Extendable<2>>::W.0 == 7u64);
let c0 = ext2_add_prods0(&a, &b);
let c1 = ext2_add_prods1(&a, &b);
[c0, c1]
}
/*
* Quartic multiplication and squaring
*/
#[inline(always)]
fn ext4_add_prods0(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField {
// Computes c0 = a0 * b0 + W * (a1 * b3 + a2 * b2 + a3 * b1)
let [a0, a1, a2, a3] = *a;
let [b0, b1, b2, b3] = *b;
let mut cy;
// a1 * b3
let mut cumul_lo = (a1 as u128) * (b3 as u128);
// a2 * b2
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b2 as u128));
let mut cumul_hi = cy as u32;
// a3 * b1
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b1 as u128));
cumul_hi += cy as u32;
// * W
(cumul_lo, cumul_hi) = u160_times_7(cumul_lo, cumul_hi);
// a0 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b0 as u128));
cumul_hi += cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
#[inline(always)]
fn ext4_add_prods1(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField {
// Computes c1 = a0 * b1 + a1 * b0 + W * (a2 * b3 + a3 * b2);
let [a0, a1, a2, a3] = *a;
let [b0, b1, b2, b3] = *b;
let mut cy;
// a2 * b3
let mut cumul_lo = (a2 as u128) * (b3 as u128);
// a3 * b2
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b2 as u128));
let mut cumul_hi = cy as u32;
// * W
(cumul_lo, cumul_hi) = u160_times_7(cumul_lo, cumul_hi);
// a0 * b1
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b1 as u128));
cumul_hi += cy as u32;
// a1 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b0 as u128));
cumul_hi += cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
#[inline(always)]
fn ext4_add_prods2(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField {
// Computes c2 = a0 * b2 + a1 * b1 + a2 * b0 + W * a3 * b3;
let [a0, a1, a2, a3] = *a;
let [b0, b1, b2, b3] = *b;
let mut cy;
// W * a3 * b3
let (mut cumul_lo, mut cumul_hi) = u160_times_7((a3 as u128) * (b3 as u128), 0u32);
// a0 * b2
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b2 as u128));
cumul_hi += cy as u32;
// a1 * b1
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b1 as u128));
cumul_hi += cy as u32;
// a2 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b0 as u128));
cumul_hi += cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
#[inline(always)]
fn ext4_add_prods3(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField {
// Computes c3 = a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
let [a0, a1, a2, a3] = *a;
let [b0, b1, b2, b3] = *b;
let mut cy;
// a0 * b3
let mut cumul_lo = (a0 as u128) * (b3 as u128);
// a1 * b2
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b2 as u128));
let mut cumul_hi = cy as u32;
// a2 * b1
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b1 as u128));
cumul_hi += cy as u32;
// a3 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b0 as u128));
cumul_hi += cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
/// Multiply a and b considered as elements of GF(p^4).
#[inline(always)]
pub(crate) fn ext4_mul(a: [u64; 4], b: [u64; 4]) -> [GoldilocksField; 4] {
// The code in ext4_add_prods[0-3] assumes the quartic extension
// generator is 7.
const_assert!(<GoldilocksField as Extendable<4>>::W.0 == 7u64);
let c0 = ext4_add_prods0(&a, &b);
let c1 = ext4_add_prods1(&a, &b);
let c2 = ext4_add_prods2(&a, &b);
let c3 = ext4_add_prods3(&a, &b);
[c0, c1, c2, c3]
}
/*
* Quintic multiplication and squaring
*/
#[inline(always)]
fn ext5_add_prods0(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField {
// Computes c0 = a0 * b0 + W * (a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1)
let [a0, a1, a2, a3, a4] = *a;
let [b0, b1, b2, b3, b4] = *b;
let mut cy;
// a1 * b4
let mut cumul_lo = (a1 as u128) * (b4 as u128);
// a2 * b3
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b3 as u128));
let mut cumul_hi = cy as u32;
// a3 * b2
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b2 as u128));
cumul_hi += cy as u32;
// a4 * b1
(cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b1 as u128));
cumul_hi += cy as u32;
// * W
(cumul_lo, cumul_hi) = u160_times_3(cumul_lo, cumul_hi);
// a0 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b0 as u128));
cumul_hi += cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
#[inline(always)]
fn ext5_add_prods1(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField {
// Computes c1 = a0 * b1 + a1 * b0 + W * (a2 * b4 + a3 * b3 + a4 * b2);
let [a0, a1, a2, a3, a4] = *a;
let [b0, b1, b2, b3, b4] = *b;
let mut cy;
// a2 * b4
let mut cumul_lo = (a2 as u128) * (b4 as u128);
// a3 * b3
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b3 as u128));
let mut cumul_hi = cy as u32;
// a4 * b2
(cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b2 as u128));
cumul_hi += cy as u32;
// * W
(cumul_lo, cumul_hi) = u160_times_3(cumul_lo, cumul_hi);
// a0 * b1
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b1 as u128));
cumul_hi += cy as u32;
// a1 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b0 as u128));
cumul_hi += cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
#[inline(always)]
fn ext5_add_prods2(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField {
// Computes c2 = a0 * b2 + a1 * b1 + a2 * b0 + W * (a3 * b4 + a4 * b3);
let [a0, a1, a2, a3, a4] = *a;
let [b0, b1, b2, b3, b4] = *b;
let mut cy;
// a3 * b4
let mut cumul_lo = (a3 as u128) * (b4 as u128);
// a4 * b3
(cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b3 as u128));
let mut cumul_hi = cy as u32;
// * W
(cumul_lo, cumul_hi) = u160_times_3(cumul_lo, cumul_hi);
// a0 * b2
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b2 as u128));
cumul_hi += cy as u32;
// a1 * b1
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b1 as u128));
cumul_hi += cy as u32;
// a2 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b0 as u128));
cumul_hi += cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
#[inline(always)]
fn ext5_add_prods3(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField {
// Computes c3 = a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0 + W * a4 * b4;
let [a0, a1, a2, a3, a4] = *a;
let [b0, b1, b2, b3, b4] = *b;
let mut cy;
// W * a4 * b4
let (mut cumul_lo, mut cumul_hi) = u160_times_3((a4 as u128) * (b4 as u128), 0u32);
// a0 * b3
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b3 as u128));
cumul_hi += cy as u32;
// a1 * b2
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b2 as u128));
cumul_hi += cy as u32;
// a2 * b1
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b1 as u128));
cumul_hi += cy as u32;
// a3 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b0 as u128));
cumul_hi += cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
#[inline(always)]
fn ext5_add_prods4(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField {
// Computes c4 = a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
let [a0, a1, a2, a3, a4] = *a;
let [b0, b1, b2, b3, b4] = *b;
let mut cy;
// a0 * b4
let mut cumul_lo = (a0 as u128) * (b4 as u128);
// a1 * b3
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b3 as u128));
let mut cumul_hi = cy as u32;
// a2 * b2
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b2 as u128));
cumul_hi += cy as u32;
// a3 * b1
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b1 as u128));
cumul_hi += cy as u32;
// a4 * b0
(cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b0 as u128));
cumul_hi += cy as u32;
unsafe { reduce160(cumul_lo, cumul_hi) }
}
/// Multiply a and b considered as elements of GF(p^5).
#[inline(always)]
pub(crate) fn ext5_mul(a: [u64; 5], b: [u64; 5]) -> [GoldilocksField; 5] {
// The code in ext5_add_prods[0-4] assumes the quintic extension
// generator is 3.
const_assert!(<GoldilocksField as Extendable<5>>::W.0 == 3u64);
let c0 = ext5_add_prods0(&a, &b);
let c1 = ext5_add_prods1(&a, &b);
let c2 = ext5_add_prods2(&a, &b);
let c3 = ext5_add_prods3(&a, &b);
let c4 = ext5_add_prods4(&a, &b);
[c0, c1, c2, c3, c4]
}

View File

@ -9,10 +9,6 @@ use plonky2_util::{assume, branch_hint};
use rand::Rng;
use serde::{Deserialize, Serialize};
use crate::extension_field::quadratic::QuadraticExtension;
use crate::extension_field::quartic::QuarticExtension;
use crate::extension_field::quintic::QuinticExtension;
use crate::extension_field::{Extendable, Frobenius};
use crate::field_types::{Field, Field64, PrimeField, PrimeField64};
use crate::inversion::try_inverse_u64;
@ -99,7 +95,7 @@ impl Field for GoldilocksField {
Self(n.mod_floor(&Self::order()).to_u64_digits()[0])
}
#[inline]
#[inline(always)]
fn from_canonical_u64(n: u64) -> Self {
debug_assert!(n < Self::ORDER);
Self(n)
@ -160,6 +156,7 @@ impl PrimeField64 for GoldilocksField {
c
}
#[inline(always)]
fn to_noncanonical_u64(&self) -> u64 {
self.0
}
@ -283,66 +280,6 @@ impl DivAssign for GoldilocksField {
}
}
impl Extendable<2> for GoldilocksField {
type Extension = QuadraticExtension<Self>;
// Verifiable in Sage with
// `R.<x> = GF(p)[]; assert (x^2 - 7).is_irreducible()`.
const W: Self = Self(7);
// DTH_ROOT = W^((ORDER - 1)/2)
const DTH_ROOT: Self = Self(18446744069414584320);
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 2] =
[Self(18081566051660590251), Self(16121475356294670766)];
const EXT_POWER_OF_TWO_GENERATOR: [Self; 2] = [Self(0), Self(15659105665374529263)];
}
impl Extendable<4> for GoldilocksField {
type Extension = QuarticExtension<Self>;
const W: Self = Self(7);
// DTH_ROOT = W^((ORDER - 1)/4)
const DTH_ROOT: Self = Self(281474976710656);
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 4] = [
Self(5024755240244648895),
Self(13227474371289740625),
Self(3912887029498544536),
Self(3900057112666848848),
];
const EXT_POWER_OF_TWO_GENERATOR: [Self; 4] =
[Self(0), Self(0), Self(0), Self(12587610116473453104)];
}
impl Extendable<5> for GoldilocksField {
type Extension = QuinticExtension<Self>;
const W: Self = Self(3);
// DTH_ROOT = W^((ORDER - 1)/5)
const DTH_ROOT: Self = Self(1041288259238279555);
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 5] = [
Self(2899034827742553394),
Self(13012057356839176729),
Self(14593811582388663055),
Self(7722900811313895436),
Self(4557222484695340057),
];
const EXT_POWER_OF_TWO_GENERATOR: [Self; 5] = [
Self::POWER_OF_TWO_GENERATOR,
Self(0),
Self(0),
Self(0),
Self(0),
];
}
/// Fast addition modulo ORDER for x86-64.
/// This function is marked unsafe for the following reasons:
/// - It is only correct if x + y < 2**64 + ORDER = 0x1ffffffff00000001.
@ -407,7 +344,34 @@ fn split(x: u128) -> (u64, u64) {
(x as u64, (x >> 64) as u64)
}
impl Frobenius<1> for GoldilocksField {}
/// Reduce the value x_lo + x_hi * 2^128 to an element in the
/// Goldilocks field.
///
/// This function is marked 'unsafe' because correctness relies on the
/// unchecked assumption that x < 2^160 - 2^128 + 2^96. Further,
/// performance may degrade as x_hi increases beyond 2**40 or so.
#[inline(always)]
pub(crate) unsafe fn reduce160(x_lo: u128, x_hi: u32) -> GoldilocksField {
let x_hi = (x_lo >> 96) as u64 + ((x_hi as u64) << 32); // shld to form x_hi
let x_mid = (x_lo >> 64) as u32; // shr to form x_mid
let x_lo = x_lo as u64;
// sub + jc (should fuse)
let (mut t0, borrow) = x_lo.overflowing_sub(x_hi);
if borrow {
// The maximum possible value of x is (2^64 - 1)^2 * 4 * 7 < 2^133,
// so x_hi < 2^37. A borrow will happen roughly one in 134 million
// times, so it's best to branch.
branch_hint();
// NB: this assumes that x < 2^160 - 2^128 + 2^96.
t0 -= EPSILON; // Cannot underflow if x_hi is canonical.
}
// imul
let t1 = (x_mid as u64) * EPSILON;
// add, sbb, add
let t2 = add_no_canonicalize_trashing_input(t0, t1);
GoldilocksField(t2)
}
#[cfg(test)]
mod tests {

View File

@ -15,6 +15,7 @@ pub mod cosets;
pub mod extension_field;
pub mod fft;
pub mod field_types;
pub mod goldilocks_extensions;
pub mod goldilocks_field;
pub mod interpolation;
mod inversion;

View File

@ -1,4 +1,5 @@
use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
use plonky2::field::extension_field::quadratic::QuadraticExtension;
use plonky2::field::extension_field::quartic::QuarticExtension;
use plonky2::field::extension_field::quintic::QuinticExtension;
use plonky2::field::field_types::Field;
@ -175,6 +176,7 @@ pub(crate) fn bench_field<F: Field>(c: &mut Criterion) {
fn criterion_benchmark(c: &mut Criterion) {
bench_field::<GoldilocksField>(c);
bench_field::<QuadraticExtension<GoldilocksField>>(c);
bench_field::<QuarticExtension<GoldilocksField>>(c);
bench_field::<QuinticExtension<GoldilocksField>>(c);
}

View File

@ -7,22 +7,15 @@ use plonky2_field::goldilocks_field::GoldilocksField;
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;
// For historical reasons, we sample from 0..0xffffffff70000001, which is slightly larger than the
// range of GoldilocksField, then verify that each constant also fits in GoldilocksField.
const SAMPLE_RANGE_END: u64 = 0xffffffff70000001;
const SAMPLE_RANGE_END: u64 = GoldilocksField::ORDER;
// const N: usize = 8 * 30; // For Posiedon-8
const N: usize = 12 * 30; // For Posiedon-12
const N: usize = 12 * 30; // For Poseidon-12
pub(crate) fn main() {
let mut rng = ChaCha8Rng::seed_from_u64(0);
let mut constants = [0u64; N];
for i in 0..N {
constants[i] = rng.gen_range(0..SAMPLE_RANGE_END);
// Make sure the constant fits in Goldilocks. If so, we also have random numbers in
// GoldilocksField::ORDER. This may be viewed as rejection sampling, except that we never
// encounter a rejection in practice, so we don't bother handling it.
assert!(constants[i] < GoldilocksField::ORDER);
}
// Print the constants in the format we prefer in our code.

View File

@ -35,6 +35,7 @@ impl FriConfig {
let reduction_arity_bits = self.reduction_strategy.reduction_arity_bits(
degree_bits,
self.rate_bits,
self.cap_height,
self.num_query_rounds,
);
FriParams {
@ -67,7 +68,7 @@ pub struct FriParams {
}
impl FriParams {
pub(crate) fn total_arities(&self) -> usize {
pub fn total_arities(&self) -> usize {
self.reduction_arity_bits.iter().sum()
}

View File

@ -9,9 +9,10 @@ pub enum FriReductionStrategy {
Fixed(Vec<usize>),
/// `ConstantArityBits(arity_bits, final_poly_bits)` applies reductions of arity `2^arity_bits`
/// until the polynomial degree is `2^final_poly_bits` or less. This tends to work well in the
/// recursive setting, as it avoids needing multiple configurations of gates used in FRI
/// verification, such as `InterpolationGate`.
/// until the polynomial degree is less than or equal to `2^final_poly_bits` or until any further
/// `arity_bits`-reduction makes the last FRI tree have height less than `cap_height`.
/// This tends to work well in the recursive setting, as it avoids needing multiple configurations
/// of gates used in FRI verification, such as `InterpolationGate`.
ConstantArityBits(usize, usize),
/// `MinSize(opt_max_arity_bits)` searches for an optimal sequence of reduction arities, with an
@ -26,17 +27,20 @@ impl FriReductionStrategy {
&self,
mut degree_bits: usize,
rate_bits: usize,
cap_height: usize,
num_queries: usize,
) -> Vec<usize> {
match self {
FriReductionStrategy::Fixed(reduction_arity_bits) => reduction_arity_bits.to_vec(),
FriReductionStrategy::ConstantArityBits(arity_bits, final_poly_bits) => {
&FriReductionStrategy::ConstantArityBits(arity_bits, final_poly_bits) => {
let mut result = Vec::new();
while degree_bits > *final_poly_bits {
result.push(*arity_bits);
assert!(degree_bits >= *arity_bits);
degree_bits -= *arity_bits;
while degree_bits > final_poly_bits
&& degree_bits + rate_bits - arity_bits >= cap_height
{
result.push(arity_bits);
assert!(degree_bits >= arity_bits);
degree_bits -= arity_bits;
}
result.shrink_to_fit();
result

View File

View File

@ -51,9 +51,13 @@ impl<F: RichField + Extendable<D> + Poseidon, const D: usize> PoseidonMdsGate<F,
let mut res = ExtensionAlgebra::ZERO;
for i in 0..SPONGE_WIDTH {
let coeff = F::Extension::from_canonical_u64(1 << <F as Poseidon>::MDS_MATRIX_EXPS[i]);
let coeff = F::Extension::from_canonical_u64(<F as Poseidon>::MDS_MATRIX_CIRC[i]);
res += v[(i + r) % SPONGE_WIDTH].scalar_mul(coeff);
}
{
let coeff = F::Extension::from_canonical_u64(<F as Poseidon>::MDS_MATRIX_DIAG[r]);
res += v[r].scalar_mul(coeff);
}
res
}
@ -69,10 +73,16 @@ impl<F: RichField + Extendable<D> + Poseidon, const D: usize> PoseidonMdsGate<F,
for i in 0..SPONGE_WIDTH {
let coeff = builder.constant_extension(F::Extension::from_canonical_u64(
1 << <F as Poseidon>::MDS_MATRIX_EXPS[i],
<F as Poseidon>::MDS_MATRIX_CIRC[i],
));
res = builder.scalar_mul_add_ext_algebra(coeff, v[(i + r) % SPONGE_WIDTH], res);
}
{
let coeff = builder.constant_extension(F::Extension::from_canonical_u64(
<F as Poseidon>::MDS_MATRIX_DIAG[r],
));
res = builder.scalar_mul_add_ext_algebra(coeff, v[r], res);
}
res
}

View File

@ -2,37 +2,24 @@
use std::arch::aarch64::*;
use std::arch::asm;
use std::mem::transmute;
use plonky2_field::field_types::Field64;
use plonky2_field::goldilocks_field::GoldilocksField;
use plonky2_util::branch_hint;
use static_assertions::const_assert;
use unroll::unroll_for_loops;
use crate::hash::poseidon::{
Poseidon, ALL_ROUND_CONSTANTS, HALF_N_FULL_ROUNDS, N_PARTIAL_ROUNDS, N_ROUNDS,
};
use crate::hash::poseidon::Poseidon;
// ========================================== CONSTANTS ===========================================
const WIDTH: usize = 12;
// The order below is arbitrary. Repeated coefficients have been removed so these constants fit in
// two registers.
// TODO: ensure this is aligned to 16 bytes (for vector loads), ideally on the same cacheline
const MDS_CONSTS: [u32; 8] = [
0xffffffff,
1 << 1,
1 << 3,
1 << 5,
1 << 8,
1 << 10,
1 << 12,
1 << 16,
];
const EPSILON: u64 = 0xffffffff;
// The round constants to be applied by the second set of full rounds. These are just the usual round constants,
// shifted by one round, with zeros shifted in.
// The round constants to be applied by the second set of full rounds. These are just the usual
// round constants, shifted by one round, with zeros shifted in.
/*
const fn make_final_round_constants() -> [u64; WIDTH * HALF_N_FULL_ROUNDS] {
let mut res = [0; WIDTH * HALF_N_FULL_ROUNDS];
let mut i: usize = 0;
@ -43,6 +30,7 @@ const fn make_final_round_constants() -> [u64; WIDTH * HALF_N_FULL_ROUNDS] {
res
}
const FINAL_ROUND_CONSTANTS: [u64; WIDTH * HALF_N_FULL_ROUNDS] = make_final_round_constants();
*/
// ===================================== COMPILE-TIME CHECKS ======================================
@ -52,9 +40,12 @@ const FINAL_ROUND_CONSTANTS: [u64; WIDTH * HALF_N_FULL_ROUNDS] = make_final_roun
const fn check_mds_matrix() -> bool {
// Can't == two arrays in a const_assert! (:
let mut i = 0;
let wanted_matrix_exps = [0, 0, 1, 0, 3, 5, 1, 8, 12, 3, 16, 10];
let wanted_matrix_circ = [17, 15, 41, 16, 2, 28, 13, 13, 39, 18, 34, 20];
let wanted_matrix_diag = [8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
while i < WIDTH {
if <GoldilocksField as Poseidon>::MDS_MATRIX_EXPS[i] != wanted_matrix_exps[i] {
if <GoldilocksField as Poseidon>::MDS_MATRIX_CIRC[i] != wanted_matrix_circ[i]
|| <GoldilocksField as Poseidon>::MDS_MATRIX_DIAG[i] != wanted_matrix_diag[i]
{
return false;
}
i += 1;
@ -63,37 +54,10 @@ const fn check_mds_matrix() -> bool {
}
const_assert!(check_mds_matrix());
/// The maximum amount by which the MDS matrix will multiply the input.
/// i.e. max(MDS(state)) <= mds_matrix_inf_norm() * max(state).
const fn mds_matrix_inf_norm() -> u64 {
let mut cumul = 0;
let mut i = 0;
while i < WIDTH {
cumul += 1 << <GoldilocksField as Poseidon>::MDS_MATRIX_EXPS[i];
i += 1;
}
cumul
}
/// Ensure that adding round constants to the low result of the MDS multiplication can never
/// overflow.
#[allow(dead_code)]
const fn check_round_const_bounds_mds() -> bool {
let max_mds_res = mds_matrix_inf_norm() * (u32::MAX as u64);
let mut i = WIDTH; // First const layer is handled specially.
while i < WIDTH * N_ROUNDS {
if ALL_ROUND_CONSTANTS[i].overflowing_add(max_mds_res).1 {
return false;
}
i += 1;
}
true
}
const_assert!(check_round_const_bounds_mds());
/// Ensure that the first WIDTH round constants are in canonical* form. This is required because
/// the first constant layer does not handle double overflow.
/// *: round_const == GoldilocksField::ORDER is safe.
/*
#[allow(dead_code)]
const fn check_round_const_bounds_init() -> bool {
let mut i = 0;
@ -106,11 +70,9 @@ const fn check_round_const_bounds_init() -> bool {
true
}
const_assert!(check_round_const_bounds_init());
*/
// ====================================== SCALAR ARITHMETIC =======================================
const EPSILON: u64 = 0xffffffff;
/// Addition modulo ORDER accounting for wraparound. Correct only when a + b < 2**64 + ORDER.
#[inline(always)]
unsafe fn add_with_wraparound(a: u64, b: u64) -> u64 {
@ -133,7 +95,16 @@ unsafe fn add_with_wraparound(a: u64, b: u64) -> u64 {
/// Subtraction of a and (b >> 32) modulo ORDER accounting for wraparound.
#[inline(always)]
unsafe fn sub_with_wraparound_lsr32(a: u64, b: u64) -> u64 {
let b_hi = b >> 32;
let mut b_hi = b >> 32;
// Make sure that LLVM emits two separate instructions for the shift and the subtraction. This
// reduces pressure on the execution units with access to the flags, as they are no longer
// responsible for the shift. The hack is to insert a fake computation between the two
// instructions with an `asm` block to make LLVM think that they can't be merged.
asm!(
"/* {0} */", // Make Rust think we're using the register.
inlateout(reg) b_hi,
options(nomem, nostack, preserves_flags, pure),
);
// This could be done with a.overflowing_add(b_hi), but `checked_sub` signals to the compiler
// that overflow is unlikely (note: this is a standard library implementation detail, not part
// of the spec).
@ -153,7 +124,8 @@ unsafe fn sub_with_wraparound_lsr32(a: u64, b: u64) -> u64 {
unsafe fn mul_epsilon(x: u64) -> u64 {
let res;
asm!(
// Use UMULL to save one instruction. The compiler emits two: extract the low word and then multiply.
// Use UMULL to save one instruction. The compiler emits two: extract the low word and then
// multiply.
"umull {res}, {x:w}, {epsilon:w}",
x = in(reg) x,
epsilon = in(reg) EPSILON,
@ -179,8 +151,9 @@ unsafe fn multiply(x: u64, y: u64) -> u64 {
// ==================================== STANDALONE CONST LAYER =====================================
/// Standalone const layer. Run only once, at the start of round 1. Remaining const layers are fused with the preceeding
/// MDS matrix multiplication.
/// Standalone const layer. Run only once, at the start of round 1. Remaining const layers are fused
/// with the preceeding MDS matrix multiplication.
/*
#[inline(always)]
#[unroll_for_loops]
unsafe fn const_layer_full(
@ -195,15 +168,15 @@ unsafe fn const_layer_full(
}
state
}
*/
// ========================================== FULL ROUNDS ==========================================
/// Full S-box.
#[inline(always)]
#[unroll_for_loops]
unsafe fn sbox_layer_full(state: [u64; WIDTH]) -> [u64; WIDTH] {
// This is done in scalar. S-boxes in vector are only slightly slower throughput-wise but have an insane latency
// (~100 cycles) on the M1.
// This is done in scalar. S-boxes in vector are only slightly slower throughput-wise but have
// an insane latency (~100 cycles) on the M1.
let mut state2 = [0u64; WIDTH];
assert!(WIDTH == 12);
@ -228,297 +201,227 @@ unsafe fn sbox_layer_full(state: [u64; WIDTH]) -> [u64; WIDTH] {
state7
}
// Aliases for readability. E.g. MDS[5] can be found in mdsv5[MDSI5].
const MDSI2: i32 = 1; // MDS[2] == 1
const MDSI4: i32 = 2; // MDS[4] == 3
const MDSI5: i32 = 3; // MDS[5] == 5
const MDSI6: i32 = 1; // MDS[6] == 1
const MDSI7: i32 = 0; // MDS[7] == 8
const MDSI8: i32 = 2; // MDS[8] == 12
const MDSI9: i32 = 2; // MDS[9] == 3
const MDSI10: i32 = 3; // MDS[10] == 16
const MDSI11: i32 = 1; // MDS[11] == 10
#[inline(always)]
unsafe fn mds_reduce(
[[cumul0_a, cumul0_b], [cumul1_a, cumul1_b]]: [[uint64x2_t; 2]; 2],
// `cumul_a` and `cumul_b` represent two separate field elements. We take advantage of
// vectorization by reducing them simultaneously.
[cumul_a, cumul_b]: [uint32x4_t; 2],
) -> uint64x2_t {
// mds_consts0 == [0xffffffff, 1 << 1, 1 << 3, 1 << 5]
let mds_consts0: uint32x4_t = vld1q_u32((&MDS_CONSTS[0..4]).as_ptr().cast::<u32>());
// Merge accumulators
let cumul0 = vaddq_u64(cumul0_a, cumul0_b);
let cumul1 = vaddq_u64(cumul1_a, cumul1_b);
// Swizzle
let res_lo = vzip1q_u64(cumul0, cumul1);
let res_hi = vzip2q_u64(cumul0, cumul1);
// Reduce from u96
let res_hi = vsraq_n_u64::<32>(res_hi, res_lo);
let res_lo = vsliq_n_u64::<32>(res_lo, res_hi);
// Extract high 32-bits.
let res_hi_hi = vget_low_u32(vuzp2q_u32(
vreinterpretq_u32_u64(res_hi),
vreinterpretq_u32_u64(res_hi),
));
// Multiply by EPSILON and accumulate.
let res_unadj = vmlal_laneq_u32::<0>(res_lo, res_hi_hi, mds_consts0);
let res_adj = vcgtq_u64(res_lo, res_unadj);
vsraq_n_u64::<32>(res_unadj, res_adj)
// Form:
// `lo = [cumul_a[0] + cumul_a[2] * 2**32, cumul_b[0] + cumul_b[2] * 2**32]`
// `hi = [cumul_a[1] + cumul_a[3] * 2**32, cumul_b[1] + cumul_b[3] * 2**32]`
// Observe that the result `== lo + hi * 2**16 (mod Goldilocks)`.
let mut lo = vreinterpretq_u64_u32(vuzp1q_u32(cumul_a, cumul_b));
let mut hi = vreinterpretq_u64_u32(vuzp2q_u32(cumul_a, cumul_b));
// Add the high 48 bits of `lo` to `hi`. This cannot overflow.
hi = vsraq_n_u64::<16>(hi, lo);
// Now, result `== lo.bits[0..16] + hi * 2**16 (mod Goldilocks)`.
// Set the high 48 bits of `lo` to the low 48 bits of `hi`.
lo = vsliq_n_u64::<16>(lo, hi);
// At this point, result `== lo + hi.bits[48..64] * 2**64 (mod Goldilocks)`.
// It remains to fold `hi.bits[48..64]` into `lo`.
let top = {
// Extract the top 16 bits of `hi` as a `u32`.
// Interpret `hi` as a vector of bytes, so we can use a table lookup instruction.
let hi_u8 = vreinterpretq_u8_u64(hi);
// Indices defining the permutation. `0xff` is out of bounds, producing `0`.
let top_idx =
transmute::<[u8; 8], uint8x8_t>([0x06, 0x07, 0xff, 0xff, 0x0e, 0x0f, 0xff, 0xff]);
let top_u8 = vqtbl1_u8(hi_u8, top_idx);
vreinterpret_u32_u8(top_u8)
};
// result `== lo + top * 2**64 (mod Goldilocks)`.
let adj_lo = vmlal_n_u32(lo, top, EPSILON as u32);
let wraparound_mask = vcgtq_u64(lo, adj_lo);
vsraq_n_u64::<32>(adj_lo, wraparound_mask) // Add epsilon on overflow.
}
#[inline(always)]
unsafe fn mds_const_layers_full(
state: [u64; WIDTH],
round_constants: &[u64; WIDTH],
) -> [u64; WIDTH] {
// mds_consts0 == [0xffffffff, 1 << 1, 1 << 3, 1 << 5]
// mds_consts1 == [1 << 8, 1 << 10, 1 << 12, 1 << 16]
let mds_consts0: uint32x4_t = vld1q_u32((&MDS_CONSTS[0..4]).as_ptr().cast::<u32>());
let mds_consts1: uint32x4_t = vld1q_u32((&MDS_CONSTS[4..8]).as_ptr().cast::<u32>());
unsafe fn mds_layer_full(state: [u64; WIDTH]) -> [u64; WIDTH] {
// This function performs an MDS multiplication in complex FFT space.
// However, instead of performing a width-12 FFT, we perform three width-4 FFTs, which is
// cheaper. The 12x12 matrix-vector multiplication (a convolution) becomes two 3x3 real
// matrix-vector multiplications and one 3x3 complex matrix-vector multiplication.
// Aliases for readability. E.g. MDS[5] can be found in mdsv5[mdsi5]. MDS[0], MDS[1], and
// MDS[3] are 0, so they are not needed.
let mdsv2 = mds_consts0; // MDS[2] == 1
let mdsv4 = mds_consts0; // MDS[4] == 3
let mdsv5 = mds_consts0; // MDS[5] == 5
let mdsv6 = mds_consts0; // MDS[6] == 1
let mdsv7 = mds_consts1; // MDS[7] == 8
let mdsv8 = mds_consts1; // MDS[8] == 12
let mdsv9 = mds_consts0; // MDS[9] == 3
let mdsv10 = mds_consts1; // MDS[10] == 16
let mdsv11 = mds_consts1; // MDS[11] == 10
// We split each 64-bit into four chunks of 16 bits. To prevent overflow, each chunk is 32 bits
// long. Each NEON vector below represents one field element and consists of four 32-bit chunks:
// `elem == vector[0] + vector[1] * 2**16 + vector[2] * 2**32 + vector[3] * 2**48`.
// For i even, we combine state[i] and state[i + 1] into one vector to save on registers.
// Thus, state1 actually contains state0 and state1 but is only used in the intrinsics that
// access the high high doubleword.
let state1: uint32x4_t =
vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[0]), vcreate_u64(state[1])));
let state3: uint32x4_t =
vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[2]), vcreate_u64(state[3])));
let state5: uint32x4_t =
vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[4]), vcreate_u64(state[5])));
let state7: uint32x4_t =
vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[6]), vcreate_u64(state[7])));
let state9: uint32x4_t =
vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[8]), vcreate_u64(state[9])));
let state11: uint32x4_t =
vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(state[10]), vcreate_u64(state[11])));
// state0 is an alias to the low doubleword of state1. The compiler should use one register for both.
let state0: uint32x2_t = vget_low_u32(state1);
let state2: uint32x2_t = vget_low_u32(state3);
let state4: uint32x2_t = vget_low_u32(state5);
let state6: uint32x2_t = vget_low_u32(state7);
let state8: uint32x2_t = vget_low_u32(state9);
let state10: uint32x2_t = vget_low_u32(state11);
// Constants that we multiply by.
let mut consts: uint32x4_t = transmute::<[u32; 4], _>([2, 4, 8, 16]);
// Two accumulators per output to hide latency. Each accumulator is a vector of two u64s,
// containing the result for the low 32 bits and the high 32 bits. Thus, the final result at
// index i is (cumuli_a[0] + cumuli_b[0]) + (cumuli_a[1] + cumuli_b[1]) * 2**32.
// Prevent LLVM from turning fused multiply (by power of 2)-add (1 instruction) into shift and
// add (two instructions). This fake `asm` block means that LLVM no longer knows the contents of
// `consts`.
asm!("/* {0:v} */", // Make Rust think the register is being used.
inout(vreg) consts,
options(pure, nomem, nostack, preserves_flags),
);
// Start by loading the round constants.
let mut cumul0_a = vcombine_u64(vld1_u64(&round_constants[0]), vcreate_u64(0));
let mut cumul1_a = vcombine_u64(vld1_u64(&round_constants[1]), vcreate_u64(0));
let mut cumul2_a = vcombine_u64(vld1_u64(&round_constants[2]), vcreate_u64(0));
let mut cumul3_a = vcombine_u64(vld1_u64(&round_constants[3]), vcreate_u64(0));
let mut cumul4_a = vcombine_u64(vld1_u64(&round_constants[4]), vcreate_u64(0));
let mut cumul5_a = vcombine_u64(vld1_u64(&round_constants[5]), vcreate_u64(0));
let mut cumul6_a = vcombine_u64(vld1_u64(&round_constants[6]), vcreate_u64(0));
let mut cumul7_a = vcombine_u64(vld1_u64(&round_constants[7]), vcreate_u64(0));
let mut cumul8_a = vcombine_u64(vld1_u64(&round_constants[8]), vcreate_u64(0));
let mut cumul9_a = vcombine_u64(vld1_u64(&round_constants[9]), vcreate_u64(0));
let mut cumul10_a = vcombine_u64(vld1_u64(&round_constants[10]), vcreate_u64(0));
let mut cumul11_a = vcombine_u64(vld1_u64(&round_constants[11]), vcreate_u64(0));
// Four length-3 complex FFTs.
let mut state_fft = [vdupq_n_u32(0); 12];
for i in 0..3 {
// Interpret each field element as a 4-vector of `u16`s.
let x0 = vcreate_u16(state[i]);
let x1 = vcreate_u16(state[i + 3]);
let x2 = vcreate_u16(state[i + 6]);
let x3 = vcreate_u16(state[i + 9]);
// Now the matrix multiplication.
// MDS exps: [0, 0, 1, 0, 3, 5, 1, 8, 12, 3, 16, 10]
// out[i] += in[j] << mds[j - i]
// `vaddl_u16` and `vsubl_u16` yield 4-vectors of `u32`s.
let y0 = vaddl_u16(x0, x2);
let y1 = vaddl_u16(x1, x3);
let y2 = vsubl_u16(x0, x2);
let y3 = vsubl_u16(x1, x3);
let mut cumul0_b = vshll_n_u32::<0>(state0); // MDS[0]
let mut cumul1_b = vshll_n_u32::<10>(state0); // MDS[11]
let mut cumul2_b = vshll_n_u32::<16>(state0); // MDS[10]
let mut cumul3_b = vshll_n_u32::<3>(state0); // MDS[9]
let mut cumul4_b = vshll_n_u32::<12>(state0); // MDS[8]
let mut cumul5_b = vshll_n_u32::<8>(state0); // MDS[7]
let mut cumul6_b = vshll_n_u32::<1>(state0); // MDS[6]
let mut cumul7_b = vshll_n_u32::<5>(state0); // MDS[5]
let mut cumul8_b = vshll_n_u32::<3>(state0); // MDS[4]
let mut cumul9_b = vshll_n_u32::<0>(state0); // MDS[3]
let mut cumul10_b = vshll_n_u32::<1>(state0); // MDS[2]
let mut cumul11_b = vshll_n_u32::<0>(state0); // MDS[1]
let z0 = vaddq_u32(y0, y1);
let z1 = vsubq_u32(y0, y1);
let z2 = y2;
let z3 = y3;
cumul0_a = vaddw_high_u32(cumul0_a, state1); // MDS[1]
cumul1_a = vaddw_high_u32(cumul1_a, state1); // MDS[0]
cumul2_a = vmlal_high_laneq_u32::<MDSI11>(cumul2_a, state1, mdsv11); // MDS[11]
cumul3_a = vmlal_high_laneq_u32::<MDSI10>(cumul3_a, state1, mdsv10); // MDS[10]
cumul4_a = vmlal_high_laneq_u32::<MDSI9>(cumul4_a, state1, mdsv9); // MDS[9]
cumul5_a = vmlal_high_laneq_u32::<MDSI8>(cumul5_a, state1, mdsv8); // MDS[8]
cumul6_a = vmlal_high_laneq_u32::<MDSI7>(cumul6_a, state1, mdsv7); // MDS[7]
cumul7_a = vmlal_high_laneq_u32::<MDSI6>(cumul7_a, state1, mdsv6); // MDS[6]
cumul8_a = vmlal_high_laneq_u32::<MDSI5>(cumul8_a, state1, mdsv5); // MDS[5]
cumul9_a = vmlal_high_laneq_u32::<MDSI4>(cumul9_a, state1, mdsv4); // MDS[4]
cumul10_a = vaddw_high_u32(cumul10_a, state1); // MDS[3]
cumul11_a = vmlal_high_laneq_u32::<MDSI2>(cumul11_a, state1, mdsv2); // MDS[2]
// The FFT is `[z0, z2 + z3 i, z1, z2 - z3 i]`.
cumul0_b = vmlal_laneq_u32::<MDSI2>(cumul0_b, state2, mdsv2); // MDS[2]
cumul1_b = vaddw_u32(cumul1_b, state2); // MDS[1]
cumul2_b = vaddw_u32(cumul2_b, state2); // MDS[0]
cumul3_b = vmlal_laneq_u32::<MDSI11>(cumul3_b, state2, mdsv11); // MDS[11]
cumul4_b = vmlal_laneq_u32::<MDSI10>(cumul4_b, state2, mdsv10); // MDS[10]
cumul5_b = vmlal_laneq_u32::<MDSI9>(cumul5_b, state2, mdsv9); // MDS[9]
cumul6_b = vmlal_laneq_u32::<MDSI8>(cumul6_b, state2, mdsv8); // MDS[8]
cumul7_b = vmlal_laneq_u32::<MDSI7>(cumul7_b, state2, mdsv7); // MDS[7]
cumul8_b = vmlal_laneq_u32::<MDSI6>(cumul8_b, state2, mdsv6); // MDS[6]
cumul9_b = vmlal_laneq_u32::<MDSI5>(cumul9_b, state2, mdsv5); // MDS[5]
cumul10_b = vmlal_laneq_u32::<MDSI4>(cumul10_b, state2, mdsv4); // MDS[4]
cumul11_b = vaddw_u32(cumul11_b, state2); // MDS[3]
state_fft[i] = z0;
state_fft[i + 3] = z1;
state_fft[i + 6] = z2;
state_fft[i + 9] = z3;
}
cumul0_a = vaddw_high_u32(cumul0_a, state3); // MDS[3]
cumul1_a = vmlal_high_laneq_u32::<MDSI2>(cumul1_a, state3, mdsv2); // MDS[2]
cumul2_a = vaddw_high_u32(cumul2_a, state3); // MDS[1]
cumul3_a = vaddw_high_u32(cumul3_a, state3); // MDS[0]
cumul4_a = vmlal_high_laneq_u32::<MDSI11>(cumul4_a, state3, mdsv11); // MDS[11]
cumul5_a = vmlal_high_laneq_u32::<MDSI10>(cumul5_a, state3, mdsv10); // MDS[10]
cumul6_a = vmlal_high_laneq_u32::<MDSI9>(cumul6_a, state3, mdsv9); // MDS[9]
cumul7_a = vmlal_high_laneq_u32::<MDSI8>(cumul7_a, state3, mdsv8); // MDS[8]
cumul8_a = vmlal_high_laneq_u32::<MDSI7>(cumul8_a, state3, mdsv7); // MDS[7]
cumul9_a = vmlal_high_laneq_u32::<MDSI6>(cumul9_a, state3, mdsv6); // MDS[6]
cumul10_a = vmlal_high_laneq_u32::<MDSI5>(cumul10_a, state3, mdsv5); // MDS[5]
cumul11_a = vmlal_high_laneq_u32::<MDSI4>(cumul11_a, state3, mdsv4); // MDS[4]
// 3x3 real matrix-vector mul for component 0 of the FFTs.
// Multiply the vector `[x0, x1, x2]` by the matrix
// `[[ 64, 64, 128],`
// ` [128, 64, 64],`
// ` [ 64, 128, 64]]`
// The results are divided by 4 (this ends up cancelling out some later computations).
{
let x0 = state_fft[0];
let x1 = state_fft[1];
let x2 = state_fft[2];
cumul0_b = vmlal_laneq_u32::<MDSI4>(cumul0_b, state4, mdsv4); // MDS[4]
cumul1_b = vaddw_u32(cumul1_b, state4); // MDS[3]
cumul2_b = vmlal_laneq_u32::<MDSI2>(cumul2_b, state4, mdsv2); // MDS[2]
cumul3_b = vaddw_u32(cumul3_b, state4); // MDS[1]
cumul4_b = vaddw_u32(cumul4_b, state4); // MDS[0]
cumul5_b = vmlal_laneq_u32::<MDSI11>(cumul5_b, state4, mdsv11); // MDS[11]
cumul6_b = vmlal_laneq_u32::<MDSI10>(cumul6_b, state4, mdsv10); // MDS[10]
cumul7_b = vmlal_laneq_u32::<MDSI9>(cumul7_b, state4, mdsv9); // MDS[9]
cumul8_b = vmlal_laneq_u32::<MDSI8>(cumul8_b, state4, mdsv8); // MDS[8]
cumul9_b = vmlal_laneq_u32::<MDSI7>(cumul9_b, state4, mdsv7); // MDS[7]
cumul10_b = vmlal_laneq_u32::<MDSI6>(cumul10_b, state4, mdsv6); // MDS[6]
cumul11_b = vmlal_laneq_u32::<MDSI5>(cumul11_b, state4, mdsv5); // MDS[5]
let t = vshlq_n_u32::<4>(x0);
let u = vaddq_u32(x1, x2);
cumul0_a = vmlal_high_laneq_u32::<MDSI5>(cumul0_a, state5, mdsv5); // MDS[5]
cumul1_a = vmlal_high_laneq_u32::<MDSI4>(cumul1_a, state5, mdsv4); // MDS[4]
cumul2_a = vaddw_high_u32(cumul2_a, state5); // MDS[3]
cumul3_a = vmlal_high_laneq_u32::<MDSI2>(cumul3_a, state5, mdsv2); // MDS[2]
cumul4_a = vaddw_high_u32(cumul4_a, state5); // MDS[1]
cumul5_a = vaddw_high_u32(cumul5_a, state5); // MDS[0]
cumul6_a = vmlal_high_laneq_u32::<MDSI11>(cumul6_a, state5, mdsv11); // MDS[11]
cumul7_a = vmlal_high_laneq_u32::<MDSI10>(cumul7_a, state5, mdsv10); // MDS[10]
cumul8_a = vmlal_high_laneq_u32::<MDSI9>(cumul8_a, state5, mdsv9); // MDS[9]
cumul9_a = vmlal_high_laneq_u32::<MDSI8>(cumul9_a, state5, mdsv8); // MDS[8]
cumul10_a = vmlal_high_laneq_u32::<MDSI7>(cumul10_a, state5, mdsv7); // MDS[7]
cumul11_a = vmlal_high_laneq_u32::<MDSI6>(cumul11_a, state5, mdsv6); // MDS[6]
let y0 = vshlq_n_u32::<4>(u);
let y1 = vmlaq_laneq_u32::<3>(t, x2, consts);
let y2 = vmlaq_laneq_u32::<3>(t, x1, consts);
cumul0_b = vmlal_laneq_u32::<MDSI6>(cumul0_b, state6, mdsv6); // MDS[6]
cumul1_b = vmlal_laneq_u32::<MDSI5>(cumul1_b, state6, mdsv5); // MDS[5]
cumul2_b = vmlal_laneq_u32::<MDSI4>(cumul2_b, state6, mdsv4); // MDS[4]
cumul3_b = vaddw_u32(cumul3_b, state6); // MDS[3]
cumul4_b = vmlal_laneq_u32::<MDSI2>(cumul4_b, state6, mdsv2); // MDS[2]
cumul5_b = vaddw_u32(cumul5_b, state6); // MDS[1]
cumul6_b = vaddw_u32(cumul6_b, state6); // MDS[0]
cumul7_b = vmlal_laneq_u32::<MDSI11>(cumul7_b, state6, mdsv11); // MDS[11]
cumul8_b = vmlal_laneq_u32::<MDSI10>(cumul8_b, state6, mdsv10); // MDS[10]
cumul9_b = vmlal_laneq_u32::<MDSI9>(cumul9_b, state6, mdsv9); // MDS[9]
cumul10_b = vmlal_laneq_u32::<MDSI8>(cumul10_b, state6, mdsv8); // MDS[8]
cumul11_b = vmlal_laneq_u32::<MDSI7>(cumul11_b, state6, mdsv7); // MDS[7]
state_fft[0] = vaddq_u32(y0, y1);
state_fft[1] = vaddq_u32(y1, y2);
state_fft[2] = vaddq_u32(y0, y2);
}
cumul0_a = vmlal_high_laneq_u32::<MDSI7>(cumul0_a, state7, mdsv7); // MDS[7]
cumul1_a = vmlal_high_laneq_u32::<MDSI6>(cumul1_a, state7, mdsv6); // MDS[6]
cumul2_a = vmlal_high_laneq_u32::<MDSI5>(cumul2_a, state7, mdsv5); // MDS[5]
cumul3_a = vmlal_high_laneq_u32::<MDSI4>(cumul3_a, state7, mdsv4); // MDS[4]
cumul4_a = vaddw_high_u32(cumul4_a, state7); // MDS[3]
cumul5_a = vmlal_high_laneq_u32::<MDSI2>(cumul5_a, state7, mdsv2); // MDS[2]
cumul6_a = vaddw_high_u32(cumul6_a, state7); // MDS[1]
cumul7_a = vaddw_high_u32(cumul7_a, state7); // MDS[0]
cumul8_a = vmlal_high_laneq_u32::<MDSI11>(cumul8_a, state7, mdsv11); // MDS[11]
cumul9_a = vmlal_high_laneq_u32::<MDSI10>(cumul9_a, state7, mdsv10); // MDS[10]
cumul10_a = vmlal_high_laneq_u32::<MDSI9>(cumul10_a, state7, mdsv9); // MDS[9]
cumul11_a = vmlal_high_laneq_u32::<MDSI8>(cumul11_a, state7, mdsv8); // MDS[8]
// 3x3 real matrix-vector mul for component 2 of the FFTs.
// Multiply the vector `[x0, x1, x2]` by the matrix
// `[[ -4, -8, 32],`
// ` [-32, -4, -8],`
// ` [ 8, -32, -4]]`
// The results are divided by 4 (this ends up cancelling out some later computations).
{
let x0 = state_fft[3];
let x1 = state_fft[4];
let x2 = state_fft[5];
state_fft[3] = vmlsq_laneq_u32::<2>(vmlaq_laneq_u32::<0>(x0, x1, consts), x2, consts);
state_fft[4] = vmlaq_laneq_u32::<0>(vmlaq_laneq_u32::<2>(x1, x0, consts), x2, consts);
state_fft[5] = vmlsq_laneq_u32::<0>(x2, vmlsq_laneq_u32::<1>(x0, x1, consts), consts);
}
cumul0_b = vmlal_laneq_u32::<MDSI8>(cumul0_b, state8, mdsv8); // MDS[8]
cumul1_b = vmlal_laneq_u32::<MDSI7>(cumul1_b, state8, mdsv7); // MDS[7]
cumul2_b = vmlal_laneq_u32::<MDSI6>(cumul2_b, state8, mdsv6); // MDS[6]
cumul3_b = vmlal_laneq_u32::<MDSI5>(cumul3_b, state8, mdsv5); // MDS[5]
cumul4_b = vmlal_laneq_u32::<MDSI4>(cumul4_b, state8, mdsv4); // MDS[4]
cumul5_b = vaddw_u32(cumul5_b, state8); // MDS[3]
cumul6_b = vmlal_laneq_u32::<MDSI2>(cumul6_b, state8, mdsv2); // MDS[2]
cumul7_b = vaddw_u32(cumul7_b, state8); // MDS[1]
cumul8_b = vaddw_u32(cumul8_b, state8); // MDS[0]
cumul9_b = vmlal_laneq_u32::<MDSI11>(cumul9_b, state8, mdsv11); // MDS[11]
cumul10_b = vmlal_laneq_u32::<MDSI10>(cumul10_b, state8, mdsv10); // MDS[10]
cumul11_b = vmlal_laneq_u32::<MDSI9>(cumul11_b, state8, mdsv9); // MDS[9]
// 3x3 complex matrix-vector mul for components 1 and 3 of the FFTs.
// Multiply the vector `[x0r + x0i i, x1r + x1i i, x2r + x2i i]` by the matrix
// `[[ 4 + 2i, 2 + 32i, 2 - 8i],`
// ` [-8 - 2i, 4 + 2i, 2 + 32i],`
// ` [32 - 2i, -8 - 2i, 4 + 2i]]`
// The results are divided by 2 (this ends up cancelling out some later computations).
{
let x0r = state_fft[6];
let x1r = state_fft[7];
let x2r = state_fft[8];
cumul0_a = vmlal_high_laneq_u32::<MDSI9>(cumul0_a, state9, mdsv9); // MDS[9]
cumul1_a = vmlal_high_laneq_u32::<MDSI8>(cumul1_a, state9, mdsv8); // MDS[8]
cumul2_a = vmlal_high_laneq_u32::<MDSI7>(cumul2_a, state9, mdsv7); // MDS[7]
cumul3_a = vmlal_high_laneq_u32::<MDSI6>(cumul3_a, state9, mdsv6); // MDS[6]
cumul4_a = vmlal_high_laneq_u32::<MDSI5>(cumul4_a, state9, mdsv5); // MDS[5]
cumul5_a = vmlal_high_laneq_u32::<MDSI4>(cumul5_a, state9, mdsv4); // MDS[4]
cumul6_a = vaddw_high_u32(cumul6_a, state9); // MDS[3]
cumul7_a = vmlal_high_laneq_u32::<MDSI2>(cumul7_a, state9, mdsv2); // MDS[2]
cumul8_a = vaddw_high_u32(cumul8_a, state9); // MDS[1]
cumul9_a = vaddw_high_u32(cumul9_a, state9); // MDS[0]
cumul10_a = vmlal_high_laneq_u32::<MDSI11>(cumul10_a, state9, mdsv11); // MDS[11]
cumul11_a = vmlal_high_laneq_u32::<MDSI10>(cumul11_a, state9, mdsv10); // MDS[10]
let x0i = state_fft[9];
let x1i = state_fft[10];
let x2i = state_fft[11];
cumul0_b = vmlal_laneq_u32::<MDSI10>(cumul0_b, state10, mdsv10); // MDS[10]
cumul1_b = vmlal_laneq_u32::<MDSI9>(cumul1_b, state10, mdsv9); // MDS[9]
cumul2_b = vmlal_laneq_u32::<MDSI8>(cumul2_b, state10, mdsv8); // MDS[8]
cumul3_b = vmlal_laneq_u32::<MDSI7>(cumul3_b, state10, mdsv7); // MDS[7]
cumul4_b = vmlal_laneq_u32::<MDSI6>(cumul4_b, state10, mdsv6); // MDS[6]
cumul5_b = vmlal_laneq_u32::<MDSI5>(cumul5_b, state10, mdsv5); // MDS[5]
cumul6_b = vmlal_laneq_u32::<MDSI4>(cumul6_b, state10, mdsv4); // MDS[4]
cumul7_b = vaddw_u32(cumul7_b, state10); // MDS[3]
cumul8_b = vmlal_laneq_u32::<MDSI2>(cumul8_b, state10, mdsv2); // MDS[2]
cumul9_b = vaddw_u32(cumul9_b, state10); // MDS[1]
cumul10_b = vaddw_u32(cumul10_b, state10); // MDS[0]
cumul11_b = vmlal_laneq_u32::<MDSI11>(cumul11_b, state10, mdsv11); // MDS[11]
// real part of result <- real part of input
let r0rr = vaddq_u32(vmlaq_laneq_u32::<0>(x1r, x0r, consts), x2r);
let r1rr = vmlaq_laneq_u32::<0>(x2r, vmlsq_laneq_u32::<0>(x1r, x0r, consts), consts);
let r2rr = vmlsq_laneq_u32::<0>(x2r, vmlsq_laneq_u32::<1>(x1r, x0r, consts), consts);
cumul0_a = vmlal_high_laneq_u32::<MDSI11>(cumul0_a, state11, mdsv11); // MDS[11]
cumul1_a = vmlal_high_laneq_u32::<MDSI10>(cumul1_a, state11, mdsv10); // MDS[10]
cumul2_a = vmlal_high_laneq_u32::<MDSI9>(cumul2_a, state11, mdsv9); // MDS[9]
cumul3_a = vmlal_high_laneq_u32::<MDSI8>(cumul3_a, state11, mdsv8); // MDS[8]
cumul4_a = vmlal_high_laneq_u32::<MDSI7>(cumul4_a, state11, mdsv7); // MDS[7]
cumul5_a = vmlal_high_laneq_u32::<MDSI6>(cumul5_a, state11, mdsv6); // MDS[6]
cumul6_a = vmlal_high_laneq_u32::<MDSI5>(cumul6_a, state11, mdsv5); // MDS[5]
cumul7_a = vmlal_high_laneq_u32::<MDSI4>(cumul7_a, state11, mdsv4); // MDS[4]
cumul8_a = vaddw_high_u32(cumul8_a, state11); // MDS[3]
cumul9_a = vmlal_high_laneq_u32::<MDSI2>(cumul9_a, state11, mdsv2); // MDS[2]
cumul10_a = vaddw_high_u32(cumul10_a, state11); // MDS[1]
cumul11_a = vaddw_high_u32(cumul11_a, state11); // MDS[0]
// real part of result <- imaginary part of input
let r0ri = vmlsq_laneq_u32::<1>(vmlaq_laneq_u32::<3>(x0i, x1i, consts), x2i, consts);
let r1ri = vmlsq_laneq_u32::<3>(vsubq_u32(x0i, x1i), x2i, consts);
let r2ri = vsubq_u32(vaddq_u32(x0i, x1i), x2i);
let reduced = [
mds_reduce([[cumul0_a, cumul0_b], [cumul1_a, cumul1_b]]),
mds_reduce([[cumul2_a, cumul2_b], [cumul3_a, cumul3_b]]),
mds_reduce([[cumul4_a, cumul4_b], [cumul5_a, cumul5_b]]),
mds_reduce([[cumul6_a, cumul6_b], [cumul7_a, cumul7_b]]),
mds_reduce([[cumul8_a, cumul8_b], [cumul9_a, cumul9_b]]),
mds_reduce([[cumul10_a, cumul10_b], [cumul11_a, cumul11_b]]),
];
[
vgetq_lane_u64::<0>(reduced[0]),
vgetq_lane_u64::<1>(reduced[0]),
vgetq_lane_u64::<0>(reduced[1]),
vgetq_lane_u64::<1>(reduced[1]),
vgetq_lane_u64::<0>(reduced[2]),
vgetq_lane_u64::<1>(reduced[2]),
vgetq_lane_u64::<0>(reduced[3]),
vgetq_lane_u64::<1>(reduced[3]),
vgetq_lane_u64::<0>(reduced[4]),
vgetq_lane_u64::<1>(reduced[4]),
vgetq_lane_u64::<0>(reduced[5]),
vgetq_lane_u64::<1>(reduced[5]),
]
// real part of result (total)
let r0r = vsubq_u32(r0rr, r0ri);
let r1r = vaddq_u32(r1rr, r1ri);
let r2r = vmlaq_laneq_u32::<0>(r2ri, r2rr, consts);
// imaginary part of result <- real part of input
let r0ir = vmlsq_laneq_u32::<1>(vmlaq_laneq_u32::<3>(x0r, x1r, consts), x2r, consts);
let r1ir = vmlaq_laneq_u32::<3>(vsubq_u32(x1r, x0r), x2r, consts);
let r2ir = vsubq_u32(x2r, vaddq_u32(x0r, x1r));
// imaginary part of result <- imaginary part of input
let r0ii = vaddq_u32(vmlaq_laneq_u32::<0>(x1i, x0i, consts), x2i);
let r1ii = vmlaq_laneq_u32::<0>(x2i, vmlsq_laneq_u32::<0>(x1i, x0i, consts), consts);
let r2ii = vmlsq_laneq_u32::<0>(x2i, vmlsq_laneq_u32::<1>(x1i, x0i, consts), consts);
// imaginary part of result (total)
let r0i = vaddq_u32(r0ir, r0ii);
let r1i = vaddq_u32(r1ir, r1ii);
let r2i = vmlaq_laneq_u32::<0>(r2ir, r2ii, consts);
state_fft[6] = r0r;
state_fft[7] = r1r;
state_fft[8] = r2r;
state_fft[9] = r0i;
state_fft[10] = r1i;
state_fft[11] = r2i;
}
// Three length-4 inverse FFTs.
// Normally, such IFFT would divide by 4, but we've already taken care of that.
for i in 0..3 {
let z0 = state_fft[i];
let z1 = state_fft[i + 3];
let z2 = state_fft[i + 6];
let z3 = state_fft[i + 9];
let y0 = vsubq_u32(z0, z1);
let y1 = vaddq_u32(z0, z1);
let y2 = z2;
let y3 = z3;
let x0 = vaddq_u32(y0, y2);
let x1 = vaddq_u32(y1, y3);
let x2 = vsubq_u32(y0, y2);
let x3 = vsubq_u32(y1, y3);
state_fft[i] = x0;
state_fft[i + 3] = x1;
state_fft[i + 6] = x2;
state_fft[i + 9] = x3;
}
// Perform `res[0] += state[0] * 8` for the diagonal component of the MDS matrix.
state_fft[0] = vmlal_laneq_u16::<4>(
state_fft[0],
vcreate_u16(state[0]), // Each 16-bit chunk gets zero-extended.
vreinterpretq_u16_u32(consts), // Hack: these constants fit in `u16s`, so we can bit-cast.
);
let mut res_arr = [0; 12];
for i in 0..6 {
let res = mds_reduce([state_fft[2 * i], state_fft[2 * i + 1]]);
res_arr[2 * i] = vgetq_lane_u64::<0>(res);
res_arr[2 * i + 1] = vgetq_lane_u64::<1>(res);
}
res_arr
}
// ======================================== PARTIAL ROUNDS =========================================
/*
#[rustfmt::skip]
macro_rules! mds_reduce_asm {
($c0:literal, $c1:literal, $out:literal, $consts:literal) => {
@ -961,13 +864,15 @@ unsafe fn partial_round(
[res23, res45, res67, res89, res1011],
)
}
*/
// ========================================== GLUE CODE ===========================================
/*
#[inline(always)]
unsafe fn full_round(state: [u64; 12], round_constants: &[u64; WIDTH]) -> [u64; 12] {
let state = sbox_layer_full(state);
mds_const_layers_full(state, round_constants)
mds_layer_full(state, round_constants)
}
#[inline]
@ -1001,43 +906,19 @@ unsafe fn partial_rounds(
}
state.0
}
*/
#[inline(always)]
fn unwrap_state(state: [GoldilocksField; 12]) -> [u64; 12] {
[
state[0].0,
state[1].0,
state[2].0,
state[3].0,
state[4].0,
state[5].0,
state[6].0,
state[7].0,
state[8].0,
state[9].0,
state[10].0,
state[11].0,
]
state.map(|s| s.0)
}
#[inline(always)]
fn wrap_state(state: [u64; 12]) -> [GoldilocksField; 12] {
[
GoldilocksField(state[0]),
GoldilocksField(state[1]),
GoldilocksField(state[2]),
GoldilocksField(state[3]),
GoldilocksField(state[4]),
GoldilocksField(state[5]),
GoldilocksField(state[6]),
GoldilocksField(state[7]),
GoldilocksField(state[8]),
GoldilocksField(state[9]),
GoldilocksField(state[10]),
GoldilocksField(state[11]),
]
state.map(GoldilocksField)
}
/*
#[inline(always)]
pub unsafe fn poseidon(state: [GoldilocksField; 12]) -> [GoldilocksField; 12] {
let state = unwrap_state(state);
@ -1058,6 +939,7 @@ pub unsafe fn poseidon(state: [GoldilocksField; 12]) -> [GoldilocksField; 12] {
let state = full_rounds(state, &FINAL_ROUND_CONSTANTS);
wrap_state(state)
}
*/
#[inline(always)]
pub unsafe fn sbox_layer(state: &mut [GoldilocksField; WIDTH]) {
@ -1067,8 +949,6 @@ pub unsafe fn sbox_layer(state: &mut [GoldilocksField; WIDTH]) {
#[inline(always)]
pub unsafe fn mds_layer(state: &[GoldilocksField; WIDTH]) -> [GoldilocksField; WIDTH] {
let state = unwrap_state(*state);
// We want to do an MDS layer without the constant layer.
let round_consts = [0u64; WIDTH];
let state = mds_const_layers_full(state, &round_consts);
let state = mds_layer_full(state);
wrap_state(state)
}

View File

@ -1,5 +1,5 @@
// Requires:
// - AVX2
// - BMI2 (for MULX and SHRX)
#[cfg(all(target_feature = "avx2", target_feature = "bmi2"))]
pub(crate) mod poseidon_goldilocks_avx2_bmi2;
// // Requires:
// // - AVX2
// // - BMI2 (for MULX and SHRX)
// #[cfg(all(target_feature = "avx2", target_feature = "bmi2"))]
// pub(crate) mod poseidon_goldilocks_avx2_bmi2;

View File

@ -55,96 +55,96 @@ pub const ALL_ROUND_CONSTANTS: [u64; MAX_WIDTH * N_ROUNDS] = [
// WARNING: If these are changed in any way, then all the
// implementations of Poseidon must be regenerated. See comments
// in `poseidon_goldilocks.rs`.
0xb585f767417ee042, 0x7746a55f77c10331, 0xb2fb0d321d356f7a, 0x0f6760a486f1621f,
0xe10d6666b36abcdf, 0x8cae14cb455cc50b, 0xd438539cf2cee334, 0xef781c7d4c1fd8b4,
0xcdc4a23a0aca4b1f, 0x277fa208d07b52e3, 0xe17653a300493d38, 0xc54302f27c287dc1,
0x8628782231d47d10, 0x59cd1a8a690b49f2, 0xc3b919ad9efec0b0, 0xa484c4c637641d97,
0x308bbd23f191398b, 0x6e4a40c1bf713cf1, 0x9a2eedb7510414fb, 0xe360c6e111c2c63b,
0xd5c771901d4d89aa, 0xc35eae076e7d6b2f, 0x849c2656d0a09cad, 0xc0572c8c5cf1df2b,
0xe9fa634a883b8bf3, 0xf56f6d4900fb1fdd, 0xf7d713e872a72a1b, 0x8297132b6ba47612,
0xad6805e12ee8af1c, 0xac51d9f6485c22b9, 0x502ad7dc3bd56bf8, 0x57a1550c3761c577,
0x66bbd30e99d311da, 0x0da2abef5e948f87, 0xf0612750443f8e94, 0x28b8ec3afb937d8c,
0x92a756e6be54ca18, 0x70e741ec304e925d, 0x019d5ee2b037c59f, 0x6f6f2ed7a30707d1,
0x7cf416d01e8c169c, 0x61df517bb17617df, 0x85dc499b4c67dbaa, 0x4b959b48dad27b23,
0xe8be3e5e0dd779a0, 0xf5c0bc1e525ed8e6, 0x40b12cbf263cf853, 0xa637093f13e2ea3c,
0x3cc3f89232e3b0c8, 0x2e479dc16bfe86c0, 0x6f49de07d6d39469, 0x213ce7beecc232de,
0x5b043134851fc00a, 0xa2de45784a861506, 0x7103aaf97bed8dd5, 0x5326fc0dbb88a147,
0xa9ceb750364cb77a, 0x27f8ec88cc9e991f, 0xfceb4fda8c93fb83, 0xfac6ff13b45b260e,
0x7131aa455813380b, 0x93510360d5d68119, 0xad535b24fb96e3db, 0x4627f5c6b7efc045,
0x645cf794e4da78a9, 0x241c70ed1ac2877f, 0xacb8e076b009e825, 0x3737e9db6477bd9d,
0xe7ea5e344cd688ed, 0x90dee4a009214640, 0xd1b1edf7c77e74af, 0x0b65481bab42158e,
0x99ad1aab4b4fe3e7, 0x438a7c91f1a360cd, 0xb60de3bd159088bf, 0xc99cab6b47a3e3bb,
0x69a5ed92d5677cef, 0x5e7b329c482a9396, 0x5fc0ac0829f893c9, 0x32db82924fb757ea,
0x0ade699c5cf24145, 0x7cc5583b46d7b5bb, 0x85df9ed31bf8abcb, 0x6604df501ad4de64,
0xeb84f60941611aec, 0xda60883523989bd4, 0x8f97fe40bf3470bf, 0xa93f485ce0ff2b32,
0x6704e8eebc2afb4b, 0xcee3e9ac788ad755, 0x510d0e66062a270d, 0xf6323f48d74634a0,
0x0b508cdf04990c90, 0xf241708a4ef7ddf9, 0x60e75c28bb368f82, 0xa6217d8c3f0f9989,
0x7159cd30f5435b53, 0x839b4e8fe97ec79f, 0x0d3f3e5e885db625, 0x8f7d83be1daea54b,
0x780f22441e8dbc04, 0xeb9158465aedacd3, 0xd19e120d826c1b6c, 0x016ee53a7f007110,
0xcb5fd54ed22dd1ca, 0xacb84178c58de144, 0x9c22190c2c463227, 0x5d693c1bcc98406d,
0xdcef0798235f321a, 0x3d639263f55e0b1e, 0xe273fd977edb8fda, 0x418f027049d10fe7,
0x8c25fda3f253a284, 0x2cbaed4dc25a884e, 0x5f58e6aff78dc2af, 0x284650ac6fb9d206,
0x635b337f1391c13c, 0x9f9a036f1ac6361f, 0xb93e260cff6747b4, 0xb0a7eae8c7272e33,
0xd0762cbce7da0a9f, 0x34c6efb829c754d6, 0x40bf0ab6166855c1, 0xb6b570fccc46a242,
0x5a27b90055549545, 0xb1a5b166048b306f, 0x8722e0ad24f1006d, 0x788ee3b3b315049a,
0x14a726661e5b0351, 0x98b7672fe1c3f13e, 0xbb93ae77bdc3aa8f, 0x28fd3b04756fc222,
0x30a46805a86d7109, 0x337dc00c7844a0e7, 0xd5eca245253c861b, 0x77626382990d8546,
0xc1e434bf33c3ae7a, 0x0299351a54dbf35e, 0xb2d456e4fb620184, 0x3e9ed1fdc00265ea,
0x2972a92bb672e8db, 0x20216dd789f333ec, 0xadffe8cf746494a1, 0x1c4dbb1c5889d420,
0x15a16a8a8c9972f5, 0x388a128b98960e26, 0x2300e5d6ca3e5589, 0x2f63aa865c9ceb9f,
0xf1c36ce8d894420f, 0x271811252953f84a, 0xe5840293d5466a8e, 0x4d9bbc3e24e5f20e,
0xea35bc29cfa2794b, 0x18e21b4bf59e2d28, 0x1e3b9fc632ef6adb, 0x25d643627a05e678,
0x5a3f1bb1ecb63263, 0xdb7f0238ca031e31, 0xb462065960bfc4c4, 0x49c24ae463c280f4,
0xd793862c6f7b901a, 0xaadd1106bdce475e, 0xc43b6e0eed8ad58f, 0xe29024c1f2060cb7,
0x5e50c2755efbe17a, 0x10383f20ac183625, 0x38e8ee9d8a8a435d, 0xdd511837bcc52452,
0x7750059861a7da6a, 0x86ab99b518d1dbef, 0xb1204f608ccfe33b, 0xef61ac84d8dfca49,
0x1bbcd90f1f4eff36, 0x0cd1dabd9be9850a, 0x11a3ae5bf354bb11, 0xf755bfef11bb5516,
0xa3b832506e2f3adb, 0x516306f4b617e6ba, 0xddb4ac4a2aeead3a, 0x64bb6dec62af4430,
0xf9cc95c29895a152, 0x08d37f75632771b9, 0xeec49b619cee6b56, 0xf143933b56b3711a,
0xe4c5dd82b9f6570c, 0xe7ad775756eefdc4, 0x92c2318bc834ef78, 0x739c25f93007aa0a,
0x5636caca1725f788, 0xdd8f909af47cd0b6, 0xc6401fe16bc24d4e, 0x8ad97b342e6b3a3c,
0x0c49366bb7be8ce2, 0x0784d3d2f4b39fb5, 0x530fb67ec5d77a58, 0x41049229b8221f3b,
0x139542347cb606a3, 0x9cb0bd5ee62e6438, 0x02e3f615c4d3054a, 0x985d4f4adefb64a0,
0x775b9feb32053cde, 0x304265a64d6c1ba6, 0x593664c3be7acd42, 0x4f0a2e5fd2bd6718,
0xdd611f10619bf1da, 0xd8185f9b3e74f9a4, 0xef87139d126ec3b3, 0x3ba71336dd67f99b,
0x7d3a455d8d808091, 0x660d32e15cbdecc7, 0x297a863f5af2b9ff, 0x90e0a736e6b434df,
0x549f80ce7a12182e, 0x0f73b29235fb5b84, 0x16bf1f74056e3a01, 0x6d1f5a593019a39f,
0x02ff876fa73f6305, 0xc5cb72a2fb9a5bd7, 0x8470f39d674dfaa3, 0x25abb3f1e41aea30,
0x23eb8cc9c32951c7, 0xd687ba56242ac4ea, 0xda8d9e915d2de6b7, 0xe3cbdc7d938d8f1e,
0xb9a8c9b4001efad6, 0xc0d28a5c64f2285c, 0x45d7ac9b878575b8, 0xeeb76e39d8da283e,
0x3d06c8bd2fc7daac, 0x9c9c9820c13589f5, 0x65700b51db40bae3, 0x911f451579044242,
0x7ae6849ff1fee8cc, 0x3bb340ebba896ae5, 0xb46e9d8bb71f0b4b, 0x8dcf22f9e1bde2a3,
0x77bdaeda8cc55427, 0xf19e400ababa0e12, 0xc368a34939eb5c7f, 0x9ef1cd612c03bc5e,
0xe89cd8553b94bbd8, 0x5cd377dcb4550713, 0xa7b0fb78cd4c5665, 0x7684403ef76c7128,
0x5fa3f06f79c4f483, 0x8df57ac159dbade6, 0x2db01efa321b2625, 0x54846de4cfd58cb6,
0xba674538aa20f5cd, 0x541d4963699f9777, 0xe9096784dadaa548, 0xdfe8992458bf85ff,
0xece5a71e74a35593, 0x5ff98fd5ff1d14fd, 0x83e89419524c06e1, 0x5922040b6ef03286,
0xf97d750eab002858, 0x5080d4c2dba7b3ec, 0xa7de115ba038b508, 0x6a9242acb5f37ec0,
0xf7856ef865619ed0, 0x2265fc930dbd7a89, 0x17dfc8e5022c723b, 0x9001a64248f2d676,
0x90004c13b0b8b50e, 0xb932b7cfc63485b0, 0xa0b1df81fd4c2bc5, 0x8ef1dd26b594c383,
0x0541a4f9d20ba562, 0x9e611061be0a3c5b, 0xb3767e80e1e1624a, 0x0098d57820a88c6b,
0x31d191cd71e01691, 0x410fefafbf90a57a, 0xbdf8f2433633aea8, 0x9e8cd55b9cc11c28,
0xde122bec4acb869f, 0x4d001fd5b0b03314, 0xca66370067416209, 0x2f2339d6399888c6,
0x6d1a7918f7c98a13, 0xdf9a493995f688f3, 0xebc2151f4ded22ca, 0x03cc2ba8a2bab82f,
0xd341d03844ad9a9b, 0x387cb5d273ab3f58, 0xbba2515f74a7a221, 0x7248fe7737f37d9c,
0x4d61e56a7437f6b9, 0x262e963c9e54bef8, 0x59e89b097477d296, 0x055d5b52b9e47452,
0x82b27eb36e430708, 0xd30094caf3080f94, 0xcf5cb38227c2a3be, 0xfeed4db701262c7c,
0x41703f5391dd0154, 0x5eeea9412666f57b, 0x4cd1f1b196abdbc4, 0x4a20358594b3662b,
0x1478d361e4b47c26, 0x6f02dc0801d2c79f, 0x296a202eeb03c4b6, 0x2afd6799aec20c38,
0x7acfd96f3050383d, 0x6798ba0c380dfdd3, 0x34c6f57b3de02c88, 0x5736e1baf82eb8a0,
0x20057d2a0e58b8de, 0x3dea5bd5eb6e1404, 0x16e50d89874a6a98, 0x29bff3eccbfba19a,
0x475cd3207974793c, 0x18a42105cde34cfa, 0x023e7414b0618331, 0x151471081b52594b,
0xe4a3dff23bdeb0f3, 0x01a8d1a588c232ef, 0x11b4c74ee221d621, 0xe587cc0dce129c8c,
0x1ff7327025a65080, 0x594e29c44b8602b1, 0xf6f31db1f5a56fd3, 0xc02ac5e4c7258a5e,
0xe70201e9c5dc598f, 0x6f90ff3b9b3560b2, 0x42747a7262faf016, 0xd1f507e496927d26,
0x1c86d265fdd24cd9, 0x3996ce73f6b5266e, 0x8e7fba02d68a061e, 0xba0dec71548b7546,
0x9e9cbd785b8d8f40, 0xdae86459f6b3828c, 0xdebe08541314f71d, 0xa49229d29501358f,
0x7be5ba0010c4df7c, 0xa3c95eaf09ecc39c, 0x0230bca8f5d457cd, 0x4135c2bedc68cdf9,
0x166fc0cc4d5b20cc, 0x3762b59aa3236e6e, 0xe8928a4ceed163d2, 0x2a440b51b71223d9,
0x80cefd2bb5f48e46, 0xbb9879c738328b71, 0x6e7c8f1ab47cced0, 0x164bb2de257ffc0a,
0xf3c12fe5b800ea30, 0x40b9e92309e8c7e1, 0x551f5b0fe3b8d017, 0x25032aa7d4fc7aba,
0xaaed340795de0a0a, 0x8ffd96bc38c8ba0f, 0x70fc91eb8aa58833, 0x7f795e2a97566d73,
0x4543d9df72c4831d, 0xf172d73e69f20739, 0xdfd1c4ff1eb3d868, 0xbc8dfb62d26376f7,
0xb585f766f2144405, 0x7746a55f43921ad7, 0xb2fb0d31cee799b4, 0x0f6760a4803427d7,
0xe10d666650f4e012, 0x8cae14cb07d09bf1, 0xd438539c95f63e9f, 0xef781c7ce35b4c3d,
0xcdc4a239b0c44426, 0x277fa208bf337bff, 0xe17653a29da578a1, 0xc54302f225db2c76,
0x86287821f722c881, 0x59cd1a8a41c18e55, 0xc3b919ad495dc574, 0xa484c4c5ef6a0781,
0x308bbd23dc5416cc, 0x6e4a40c18f30c09c, 0x9a2eedb70d8f8cfa, 0xe360c6e0ae486f38,
0xd5c7718fbfc647fb, 0xc35eae071903ff0b, 0x849c2656969c4be7, 0xc0572c8c08cbbbad,
0xe9fa634a21de0082, 0xf56f6d48959a600d, 0xf7d713e806391165, 0x8297132b32825daf,
0xad6805e0e30b2c8a, 0xac51d9f5fcf8535e, 0x502ad7dc18c2ad87, 0x57a1550c110b3041,
0x66bbd30e6ce0e583, 0x0da2abef589d644e, 0xf061274fdb150d61, 0x28b8ec3ae9c29633,
0x92a756e67e2b9413, 0x70e741ebfee96586, 0x019d5ee2af82ec1c, 0x6f6f2ed772466352,
0x7cf416cfe7e14ca1, 0x61df517b86a46439, 0x85dc499b11d77b75, 0x4b959b48b9c10733,
0xe8be3e5da8043e57, 0xf5c0bc1de6da8699, 0x40b12cbf09ef74bf, 0xa637093ecb2ad631,
0x3cc3f892184df408, 0x2e479dc157bf31bb, 0x6f49de07a6234346, 0x213ce7bede378d7b,
0x5b0431345d4dea83, 0xa2de45780344d6a1, 0x7103aaf94a7bf308, 0x5326fc0d97279301,
0xa9ceb74fec024747, 0x27f8ec88bb21b1a3, 0xfceb4fda1ded0893, 0xfac6ff1346a41675,
0x7131aa45268d7d8c, 0x9351036095630f9f, 0xad535b24afc26bfb, 0x4627f5c6993e44be,
0x645cf794b8f1cc58, 0x241c70ed0af61617, 0xacb8e076647905f1, 0x3737e9db4c4f474d,
0xe7ea5e33e75fffb6, 0x90dee49fc9bfc23a, 0xd1b1edf76bc09c92, 0x0b65481ba645c602,
0x99ad1aab0814283b, 0x438a7c91d416ca4d, 0xb60de3bcc5ea751c, 0xc99cab6aef6f58bc,
0x69a5ed92a72ee4ff, 0x5e7b329c1ed4ad71, 0x5fc0ac0800144885, 0x32db829239774eca,
0x0ade699c5830f310, 0x7cc5583b10415f21, 0x85df9ed2e166d64f, 0x6604df4fee32bcb1,
0xeb84f608da56ef48, 0xda608834c40e603d, 0x8f97fe408061f183, 0xa93f485c96f37b89,
0x6704e8ee8f18d563, 0xcee3e9ac1e072119, 0x510d0e65e2b470c1, 0xf6323f486b9038f0,
0x0b508cdeffa5ceef, 0xf2417089e4fb3cbd, 0x60e75c2890d15730, 0xa6217d8bf660f29c,
0x7159cd30c3ac118e, 0x839b4e8fafead540, 0x0d3f3e5e82920adc, 0x8f7d83bddee7bba8,
0x780f2243ea071d06, 0xeb915845f3de1634, 0xd19e120d26b6f386, 0x016ee53a7e5fecc6,
0xcb5fd54e7933e477, 0xacb8417879fd449f, 0x9c22190be7f74732, 0x5d693c1ba3ba3621,
0xdcef0797c2b69ec7, 0x3d639263da827b13, 0xe273fd971bc8d0e7, 0x418f02702d227ed5,
0x8c25fda3b503038c, 0x2cbaed4daec8c07c, 0x5f58e6afcdd6ddc2, 0x284650ac5e1b0eba,
0x635b337ee819dab5, 0x9f9a036ed4f2d49f, 0xb93e260cae5c170e, 0xb0a7eae879ddb76d,
0xd0762cbc8ca6570c, 0x34c6efb812b04bf5, 0x40bf0ab5fa14c112, 0xb6b570fc7c5740d3,
0x5a27b9002de33454, 0xb1a5b165b6d2b2d2, 0x8722e0ace9d1be22, 0x788ee3b37e5680fb,
0x14a726661551e284, 0x98b7672f9ef3b419, 0xbb93ae776bb30e3a, 0x28fd3b046380f850,
0x30a4680593258387, 0x337dc00c61bd9ce1, 0xd5eca244c7a4ff1d, 0x7762638264d279bd,
0xc1e434bedeefd767, 0x0299351a53b8ec22, 0xb2d456e4ad251b80, 0x3e9ed1fda49cea0b,
0x2972a92ba450bed8, 0x20216dd77be493de, 0xadffe8cf28449ec6, 0x1c4dbb1c4c27d243,
0x15a16a8a8322d458, 0x388a128b7fd9a609, 0x2300e5d6baedf0fb, 0x2f63aa8647e15104,
0xf1c36ce86ecec269, 0x27181125183970c9, 0xe584029370dca96d, 0x4d9bbc3e02f1cfb2,
0xea35bc29692af6f8, 0x18e21b4beabb4137, 0x1e3b9fc625b554f4, 0x25d64362697828fd,
0x5a3f1bb1c53a9645, 0xdb7f023869fb8d38, 0xb462065911d4e1fc, 0x49c24ae4437d8030,
0xd793862c112b0566, 0xaadd1106730d8feb, 0xc43b6e0e97b0d568, 0xe29024c18ee6fca2,
0x5e50c27535b88c66, 0x10383f20a4ff9a87, 0x38e8ee9d71a45af8, 0xdd5118375bf1a9b9,
0x775005982d74d7f7, 0x86ab99b4dde6c8b0, 0xb1204f603f51c080, 0xef61ac8470250ecf,
0x1bbcd90f132c603f, 0x0cd1dabd964db557, 0x11a3ae5beb9d1ec9, 0xf755bfeea585d11d,
0xa3b83250268ea4d7, 0x516306f4927c93af, 0xddb4ac49c9efa1da, 0x64bb6dec369d4418,
0xf9cc95c22b4c1fcc, 0x08d37f755f4ae9f6, 0xeec49b613478675b, 0xf143933aed25e0b0,
0xe4c5dd8255dfc622, 0xe7ad7756f193198e, 0x92c2318b87fff9cb, 0x739c25f8fd73596d,
0x5636cac9f16dfed0, 0xdd8f909a938e0172, 0xc6401fe115063f5b, 0x8ad97b33f1ac1455,
0x0c49366bb25e8513, 0x0784d3d2f1698309, 0x530fb67ea1809a81, 0x410492299bb01f49,
0x139542347424b9ac, 0x9cb0bd5ea1a1115e, 0x02e3f615c38f49a1, 0x985d4f4a9c5291ef,
0x775b9feafdcd26e7, 0x304265a6384f0f2d, 0x593664c39773012c, 0x4f0a2e5fb028f2ce,
0xdd611f1000c17442, 0xd8185f9adfea4fd0, 0xef87139ca9a3ab1e, 0x3ba71336c34ee133,
0x7d3a455d56b70238, 0x660d32e130182684, 0x297a863f48cd1f43, 0x90e0a736a751ebb7,
0x549f80ce550c4fd3, 0x0f73b2922f38bd64, 0x16bf1f73fb7a9c3f, 0x6d1f5a59005bec17,
0x02ff876fa5ef97c4, 0xc5cb72a2a51159b0, 0x8470f39d2d5c900e, 0x25abb3f1d39fcb76,
0x23eb8cc9b372442f, 0xd687ba55c64f6364, 0xda8d9e90fd8ff158, 0xe3cbdc7d2fe45ea7,
0xb9a8c9b3aee52297, 0xc0d28a5c10960bd3, 0x45d7ac9b68f71a34, 0xeeb76e397069e804,
0x3d06c8bd1514e2d9, 0x9c9c98207cb10767, 0x65700b51aedfb5ef, 0x911f451539869408,
0x7ae6849fbc3a0ec6, 0x3bb340eba06afe7e, 0xb46e9d8b682ea65e, 0x8dcf22f9a3b34356,
0x77bdaeda586257a7, 0xf19e400a5104d20d, 0xc368a348e46d950f, 0x9ef1cd60e679f284,
0xe89cd854d5d01d33, 0x5cd377dc8bb882a2, 0xa7b0fb7883eee860, 0x7684403ec392950d,
0x5fa3f06f4fed3b52, 0x8df57ac11bc04831, 0x2db01efa1e1e1897, 0x54846de4aadb9ca2,
0xba6745385893c784, 0x541d496344d2c75b, 0xe909678474e687fe, 0xdfe89923f6c9c2ff,
0xece5a71e0cfedc75, 0x5ff98fd5d51fe610, 0x83e8941918964615, 0x5922040b47f150c1,
0xf97d750e3dd94521, 0x5080d4c2b86f56d7, 0xa7de115b56c78d70, 0x6a9242ac87538194,
0xf7856ef7f9173e44, 0x2265fc92feb0dc09, 0x17dfc8e4f7ba8a57, 0x9001a64209f21db8,
0x90004c1371b893c5, 0xb932b7cf752e5545, 0xa0b1df81b6fe59fc, 0x8ef1dd26770af2c2,
0x0541a4f9cfbeed35, 0x9e61106178bfc530, 0xb3767e80935d8af2, 0x0098d5782065af06,
0x31d191cd5c1466c7, 0x410fefafa319ac9d, 0xbdf8f242e316c4ab, 0x9e8cd55b57637ed0,
0xde122bebe9a39368, 0x4d001fd58f002526, 0xca6637000eb4a9f8, 0x2f2339d624f91f78,
0x6d1a7918c80df518, 0xdf9a4939342308e9, 0xebc2151ee6c8398c, 0x03cc2ba8a1116515,
0xd341d037e840cf83, 0x387cb5d25af4afcc, 0xbba2515f22909e87, 0x7248fe7705f38e47,
0x4d61e56a525d225a, 0x262e963c8da05d3d, 0x59e89b094d220ec2, 0x055d5b52b78b9c5e,
0x82b27eb33514ef99, 0xd30094ca96b7ce7b, 0xcf5cb381cd0a1535, 0xfeed4db6919e5a7c,
0x41703f53753be59f, 0x5eeea940fcde8b6f, 0x4cd1f1b175100206, 0x4a20358574454ec0,
0x1478d361dbbf9fac, 0x6f02dc07d141875c, 0x296a202ed8e556a2, 0x2afd67999bf32ee5,
0x7acfd96efa95491d, 0x6798ba0c0abb2c6d, 0x34c6f57b26c92122, 0x5736e1bad206b5de,
0x20057d2a0056521b, 0x3dea5bd5d0578bd7, 0x16e50d897d4634ac, 0x29bff3ecb9b7a6e3,
0x475cd3205a3bdcde, 0x18a42105c31b7e88, 0x023e7414af663068, 0x15147108121967d7,
0xe4a3dff1d7d6fef9, 0x01a8d1a588085737, 0x11b4c74eda62beef, 0xe587cc0d69a73346,
0x1ff7327017aa2a6e, 0x594e29c42473d06b, 0xf6f31db1899b12d5, 0xc02ac5e47312d3ca,
0xe70201e960cb78b8, 0x6f90ff3b6a65f108, 0x42747a7245e7fa84, 0xd1f507e43ab749b2,
0x1c86d265f15750cd, 0x3996ce73dd832c1c, 0x8e7fba02983224bd, 0xba0dec7103255dd4,
0x9e9cbd781628fc5b, 0xdae8645996edd6a5, 0xdebe0853b1a1d378, 0xa49229d24d014343,
0x7be5b9ffda905e1c, 0xa3c95eaec244aa30, 0x0230bca8f4df0544, 0x4135c2bebfe148c6,
0x166fc0cc438a3c72, 0x3762b59a8ae83efa, 0xe8928a4c89114750, 0x2a440b51a4945ee5,
0x80cefd2b7d99ff83, 0xbb9879c6e61fd62a, 0x6e7c8f1a84265034, 0x164bb2de1bbeddc8,
0xf3c12fe54d5c653b, 0x40b9e922ed9771e2, 0x551f5b0fbe7b1840, 0x25032aa7c4cb1811,
0xaaed34074b164346, 0x8ffd96bbf9c9c81d, 0x70fc91eb5937085c, 0x7f795e2a5f915440,
0x4543d9df5476d3cb, 0xf172d73e004fc90d, 0xdfd1c4febcc81238, 0xbc8dfb627fe558fc,
];
const WIDTH: usize = SPONGE_WIDTH;
@ -153,9 +153,10 @@ pub trait Poseidon: PrimeField64 {
// times number of rounds.
const N_ROUND_CONSTANTS: usize = WIDTH * N_ROUNDS;
// Use the MDS matrix which is circulant with entries 2^x for each
// x in MDS_MATRIX_EXPS.
const MDS_MATRIX_EXPS: [u64; WIDTH];
// The MDS matrix we use is C + D, where C is the circulant matrix whose first row is given by
// `MDS_MATRIX_CIRC`, and D is the diagonal matrix whose diagonal is given by `MDS_MATRIX_DIAG`.
const MDS_MATRIX_CIRC: [u64; WIDTH];
const MDS_MATRIX_DIAG: [u64; WIDTH];
// Precomputed constants for the fast Poseidon calculation. See
// the paper.
@ -169,9 +170,10 @@ pub trait Poseidon: PrimeField64 {
#[unroll_for_loops]
fn mds_row_shf(r: usize, v: &[u64; WIDTH]) -> u128 {
debug_assert!(r < WIDTH);
// The values of MDS_MATRIX_EXPS are known to be small, so we can
// accumulate all the products for each row and reduce just once
// at the end (done by the caller).
// The values of `MDS_MATRIX_CIRC` and `MDS_MATRIX_DIAG` are
// known to be small, so we can accumulate all the products for
// each row and reduce just once at the end (done by the
// caller).
// NB: Unrolling this, calculating each term independently, and
// summing at the end, didn't improve performance for me.
@ -180,9 +182,10 @@ pub trait Poseidon: PrimeField64 {
// This is a hacky way of fully unrolling the loop.
for i in 0..12 {
if i < WIDTH {
res += (v[(i + r) % WIDTH] as u128) << Self::MDS_MATRIX_EXPS[i];
res += (v[(i + r) % WIDTH] as u128) * (Self::MDS_MATRIX_CIRC[i] as u128);
}
}
res += (v[r] as u128) * (Self::MDS_MATRIX_DIAG[r] as u128);
res
}
@ -196,8 +199,9 @@ pub trait Poseidon: PrimeField64 {
let mut res = F::ZERO;
for i in 0..WIDTH {
res += v[(i + r) % WIDTH] * F::from_canonical_u64(1 << Self::MDS_MATRIX_EXPS[i]);
res += v[(i + r) % WIDTH] * F::from_canonical_u64(Self::MDS_MATRIX_CIRC[i]);
}
res += v[r] * F::from_canonical_u64(Self::MDS_MATRIX_DIAG[r]);
res
}
@ -215,9 +219,13 @@ pub trait Poseidon: PrimeField64 {
let mut res = builder.zero_extension();
for i in 0..WIDTH {
let c = Self::from_canonical_u64(1 << <Self as Poseidon>::MDS_MATRIX_EXPS[i]);
let c = Self::from_canonical_u64(<Self as Poseidon>::MDS_MATRIX_CIRC[i]);
res = builder.mul_const_add_extension(c, v[(i + r) % WIDTH], res);
}
{
let c = Self::from_canonical_u64(<Self as Poseidon>::MDS_MATRIX_DIAG[r]);
res = builder.mul_const_add_extension(c, v[r], res);
}
res
}
@ -395,7 +403,8 @@ pub trait Poseidon: PrimeField64 {
}
}
let s0 = state[0].to_noncanonical_u64() as u128;
d_sum = add_u160_u128(d_sum, s0 << Self::MDS_MATRIX_EXPS[0]);
let mds0to0 = (Self::MDS_MATRIX_CIRC[0] + Self::MDS_MATRIX_DIAG[0]) as u128;
d_sum = add_u160_u128(d_sum, s0 * mds0to0);
let d = reduce_u160::<Self>(d_sum);
// result = [d] concat [state[0] * v + state[shift up by 1]]
@ -416,7 +425,8 @@ pub trait Poseidon: PrimeField64 {
r: usize,
) -> [F; WIDTH] {
let s0 = state[0];
let mut d = s0 * F::from_canonical_u64(1 << Self::MDS_MATRIX_EXPS[0]);
let mds0to0 = Self::MDS_MATRIX_CIRC[0] + Self::MDS_MATRIX_DIAG[0];
let mut d = s0 * F::from_canonical_u64(mds0to0);
for i in 1..WIDTH {
let t = F::from_canonical_u64(Self::FAST_PARTIAL_ROUND_W_HATS[r][i - 1]);
d += state[i] * t;
@ -442,10 +452,8 @@ pub trait Poseidon: PrimeField64 {
Self: RichField + Extendable<D>,
{
let s0 = state[0];
let mut d = builder.mul_const_extension(
Self::from_canonical_u64(1 << <Self as Poseidon>::MDS_MATRIX_EXPS[0]),
s0,
);
let mds0to0 = Self::MDS_MATRIX_CIRC[0] + Self::MDS_MATRIX_DIAG[0];
let mut d = builder.mul_const_extension(Self::from_canonical_u64(mds0to0), s0);
for i in 1..WIDTH {
let t = <Self as Poseidon>::FAST_PARTIAL_ROUND_W_HATS[r][i - 1];
let t = Self::Extension::from_canonical_u64(t);

View File

@ -10,8 +10,8 @@ use crate::hash::poseidon::{Poseidon, N_PARTIAL_ROUNDS};
#[rustfmt::skip]
impl Poseidon for GoldilocksField {
// The MDS matrix we use is the circulant matrix with first row given by the vector
// [ 2^x for x in MDS_MATRIX_EXPS] = [1, 1, 2, 1, 8, 32, 2, 256, 4096, 8, 65536, 1024]
// The MDS matrix we use is C + D, where C is the circulant matrix whose first row is given by
// `MDS_MATRIX_CIRC`, and D is the diagonal matrix whose diagonal is given by `MDS_MATRIX_DIAG`.
//
// WARNING: If the MDS matrix is changed, then the following
// constants need to be updated accordingly:
@ -19,237 +19,238 @@ impl Poseidon for GoldilocksField {
// - FAST_PARTIAL_ROUND_VS
// - FAST_PARTIAL_ROUND_W_HATS
// - FAST_PARTIAL_ROUND_INITIAL_MATRIX
const MDS_MATRIX_EXPS: [u64; 12] = [0, 0, 1, 0, 3, 5, 1, 8, 12, 3, 16, 10];
const MDS_MATRIX_CIRC: [u64; 12] = [17, 15, 41, 16, 2, 28, 13, 13, 39, 18, 34, 20];
const MDS_MATRIX_DIAG: [u64; 12] = [8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
const FAST_PARTIAL_FIRST_ROUND_CONSTANT: [u64; 12] = [
0x3cc3f89232e3b0c8, 0x3a8304bc56985013, 0x2a9f75c2280d2a8e, 0x53b9e0fac07c9b2b,
0x276ef5190ab36dd6, 0xdccc95c1f434ce8d, 0x28d717d689301db6, 0x2662f1723650b872,
0xc6b0375cf47850da, 0xbdfcca7661d81f17, 0x911992a4f6d9591f, 0xb718e4720c9f542f,
0x3cc3f892184df408, 0xe993fd841e7e97f1, 0xf2831d3575f0f3af, 0xd2500e0a350994ca,
0xc5571f35d7288633, 0x91d89c5184109a02, 0xf37f925d04e5667b, 0x2d6e448371955a69,
0x740ef19ce01398a1, 0x694d24c0752fdf45, 0x60936af96ee2f148, 0xc33448feadc78f0c,
];
const FAST_PARTIAL_ROUND_CONSTANTS: [u64; N_PARTIAL_ROUNDS] = [
0x1c92804be083d129, 0x81d932f4620fcfc6, 0x29f58a72045f76a0, 0x434472d6c6e34f30,
0xc82c90fad781bb5c, 0xe6dfefae3135c450, 0xd0a0c9c9fff4798f, 0x97517f4034e7c8e6,
0xae8b5030952e5949, 0xf77251b77cc297e2, 0x879c3a97606f1160, 0xed4e1e98780bdc19,
0x5a9120e0c05b1660, 0xc4b244ea04b27221, 0x7fe9d55a335d7b82, 0xd69ff91c66ec999a,
0x4c389b1b8180f1f5, 0x1b289f8c7fdeea1e, 0x3d464c75140b20e7, 0x74d158e1be40eb73,
0xfc787193d2a84ea4, 0x0,
0x74cb2e819ae421ab, 0xd2559d2370e7f663, 0x62bf78acf843d17c, 0xd5ab7b67e14d1fb4,
0xb9fe2ae6e0969bdc, 0xe33fdf79f92a10e8, 0x0ea2bb4c2b25989b, 0xca9121fbf9d38f06,
0xbdd9b0aa81f58fa4, 0x83079fa4ecf20d7e, 0x650b838edfcc4ad3, 0x77180c88583c76ac,
0xaf8c20753143a180, 0xb8ccfe9989a39175, 0x954a1729f60cc9c5, 0xdeb5b550c4dca53b,
0xf01bb0b00f77011e, 0xa1ebb404b676afd9, 0x860b6e1597a0173e, 0x308bb65a036acbce,
0x1aca78f31c97c876, 0x0,
];
const FAST_PARTIAL_ROUND_VS: [[u64; 12 - 1]; N_PARTIAL_ROUNDS] = [
[0x9a5dd25dc32e6569, 0xd4b82de00e7510fa, 0x165bdcd7b344404a, 0xa85b4c126b8edfd4,
0xcd2735bf92ab4f96, 0xdc07742c7da8ac41, 0x953fc266fc5ae49f, 0x0a151c20bfc847bf,
0x0c550caef5afedb5, 0x74d28901888c5fa8, 0xdc51b68c30cc1741, ],
[0x4f765e0a4246c828, 0xbbdc8cbadd477a84, 0x052a5abd7de2344c, 0xab88daa04d9c7fab,
0xbc8fd7acbee798ef, 0xe55d796c0d8a7a09, 0x40824732ed2c556c, 0x298a94d56eabeaa4,
0x719fcd5e11312b6c, 0x1ec9a560131d1ac7, 0xabc54a42497f7fd1, ],
[0xb51f81e6eeeeb0d6, 0xc6f3c34e7161d1ef, 0x1e93b9e2255eed5b, 0xa78338e63ec48cc2,
0xea6e89d1c7220a56, 0xaa52f6a1c2814bc5, 0x5896b6395e09fba0, 0xf7fc97a18d5f1eee,
0xf2712e64111823e8, 0x4f84821bf1f857f4, 0x02041415d72da206, ],
[0x39286a4a4a391e77, 0x4ac16c7bebc97214, 0x7427cbbcb895a01f, 0x2ef8491d0b14759b,
0xbec7625ee20fa616, 0x7c64393faf749b6f, 0x0f61c751c9826dc5, 0x700e6f3ee8ccb8a7,
0x5bdea3b447ef8667, 0xa0f569a5a6e97588, 0xcc9e78115d7cae2d, ],
[0x0933079ab678e5ee, 0xed6861bf33c54a28, 0x62503e6e1749a497, 0x745a9c65dea83ac6,
0x20ce351f6e700cf0, 0x2ec0b18d30fafb8a, 0x0312f54c22b5f299, 0x5222977218fd6cd5,
0x82662e8445868eec, 0xc4cab6335040265d, 0x12e5790e9efb9217, ],
[0x0d829aec63871f55, 0x384d8a425086dd8c, 0x13e78b54657bfd3e, 0x2a45a17a03093566,
0x7b6872656233b9be, 0xddc0281bb12bbb4c, 0xa224ebff0652d7c8, 0xc5ca97207780ea5c,
0x484236194d3586ba, 0x432a56d44a44f3f7, 0xc41f926f862fc532, ],
[0x9366cd7ed9ef5e06, 0xd7f941098175f223, 0x9af7dda3e1c9f2b1, 0x9a0ec6d0a03525f5,
0x3ab244f4fb0fb387, 0xd8c4e357eb1d5778, 0xe62157e2e25edbbb, 0xafcd6630f841f1f8,
0xc3969199738708fb, 0xa8224d311e6a551f, 0xc2c0a01fc655fd9f, ],
[0xd78498f2013cd9b6, 0x675d21a200b2908c, 0x70bfd23b9e88c707, 0x85472dcbcfd078e3,
0x5658c961cfffd574, 0x89e05a2cda3ca315, 0x1b51ae1ff8186a9f, 0xca648f8c6c7822cb,
0x7233c92647957f4d, 0x520bf21c62d37ffa, 0x897496c7407a2ca7, ],
[0x8e80cf5bca4eee19, 0x754779126bc1afcf, 0x07e887764b379cb0, 0x7dc7c14e12f91d5e,
0xc8f5dab5fb6b0264, 0x1c842cf8021f9176, 0x69b56a7e2e2db2c0, 0xf30253f77fef3445,
0x14bb3a62919efb99, 0xff9976d424a5d89c, 0x59dde7be0331a202, ],
[0xdbe04b62126330a2, 0x0409b2138da1eaec, 0x7bd4558eb2262691, 0xafa86cfa8d52b05b,
0xb83f570197d8c584, 0xb3ded6cc13990ac1, 0xfd33937cb072c9e1, 0xe3b3989341d92952,
0xd26e76d6ca949ad9, 0x35c89a8548f88e86, 0x8af785bd940c3b43, ],
[0xcbf3b86701c790da, 0x63634f67e29f4005, 0x008f903982363b81, 0xc2b07f99d6eb0229,
0xa8344b83d15e2558, 0x880f4e5fd103b7b0, 0xd40eddb0a5929072, 0x476e27ccee571f49,
0xe71439b4b989f9eb, 0x97e55074f852b2fe, 0xdd258c2137e1a2c5, ],
[0x982b90366d23259b, 0xb2667eacaa76b306, 0xecf233e82020ede1, 0x3cee7ac07d4a88c7,
0x31428be2fe5a5854, 0xf1beea1d55c4c4db, 0x584fd6b580f1ffd2, 0x6e2381c3c8ba0d0b,
0x21ab749cbafc0611, 0x8ed389f39aba3001, 0xa24ba694f2b42f13, ],
[0xdb30cd9db02606f9, 0x1b0d6736682ba257, 0x0d3bcdecf5808443, 0x31c330001dbd3dbd,
0x9684d22370447946, 0xde0e24e6426c6935, 0xf487270dd081ef69, 0xd943f4ef48f2b252,
0x4c52a7fdd1c52d24, 0xc293082029ea139d, 0xc2ba73ab3da0468a, ],
[0xd093bd0dcc74e0d1, 0xe91428f9ce6a98e5, 0x673dee716909dc21, 0xf22e3223548219d7,
0x3297978d881a1300, 0x51157b1e8218d77c, 0x0e3b0a5c07843889, 0x273b48dfa36752b6,
0x5dbf2c6323576866, 0x1c032b70763df9a7, 0x1a8d7ed4159ecbf4, ],
[0x8e40b29fa6c4f3ad, 0x43bc06dba91daa9b, 0x445df1620dd6d846, 0xae1e72ed68c45c46,
0x496ee4e593ade46d, 0x1d3642eddce9118f, 0x71a88114bd8fd755, 0x4a10d6b22514943d,
0x56dca305d4d72fee, 0xe2e4d9ce95fa62bf, 0xfb6bfffd47b50b0a, ],
[0x4c6c14946cc557ee, 0x9b1bcbaac7ba3226, 0xdd7410361fa0dd20, 0x9c8a098cbaf95b26,
0x3da4f26593503adf, 0xffb07b45cd3bf859, 0xaf034373af54a559, 0xd6b9bace407146bb,
0x7b92c04c972f4ec6, 0xfe71df71165b9845, 0xad0134b9dc9ebe51, ],
[0xfdaa64ceec88aa7c, 0x565342e2d815525c, 0xe382458f259429a8, 0x0f6ba5afd5d1d1ca,
0xcba85de412439a41, 0x212d3c62049ccb1a, 0x930c0bf5950267e3, 0x60f87fe43fc560d8,
0x8f1fbdbcd878a33b, 0xd28b789abf9af16f, 0xd921f0434fa0eb07, ],
[0xd69c2c80635e7c18, 0x5a3d78c8772f293f, 0x844fe5e72ad1ceb5, 0x81b217e5910dc916,
0x2951409fb7c8ba85, 0x5c135dd95693e367, 0xc2e8a723f9f7ebd2, 0x10bb79bf5d63f38d,
0x34625b1550385a89, 0xdc6235328d791163, 0x1eb12b7aed4d5133, ],
[0x01426faca89577d0, 0x003ca90136ac4fd0, 0x00289223dc45a17f, 0x0009921704320612,
0x0007efae3669e451, 0x006499f206b3349d, 0x1001120d9b5dcfe1, 0x000e3aa47db4da94,
0x0320dc8339d35692, 0x4030a0a16247ecbd, 0x04368a659c160a6b, ],
[0x0000001237b408f0, 0x00000004c8f1b79c, 0x0000000446de5309, 0x00000032a3e2d4ac,
0x00000c007600eeb7, 0x000100040ee771b0, 0x00000198394d0817, 0x0000301810a981ba,
0x0000030f37d86f5a, 0x0000030ab1cc04d4, 0x000000c0e7c0b7e9, ],
[0x00000000000234a0, 0x0000000000114630, 0x000000000800260c, 0x0000000100005288,
0x0000000000900194, 0x00000000200800a3, 0x0000000002011034, 0x000000000105100e,
0x0000000000604025, 0x0000000000114a03, 0x0000000000061481, ],
[0x0000000000000400, 0x0000000000010000, 0x0000000000000008, 0x0000000000001000,
0x0000000000000100, 0x0000000000000002, 0x0000000000000020, 0x0000000000000008,
0x0000000000000001, 0x0000000000000002, 0x0000000000000001, ],
[0x94877900674181c3, 0xc6c67cc37a2a2bbd, 0xd667c2055387940f, 0x0ba63a63e94b5ff0,
0x99460cc41b8f079f, 0x7ff02375ed524bb3, 0xea0870b47a8caf0e, 0xabcad82633b7bc9d,
0x3b8d135261052241, 0xfb4515f5e5b0d539, 0x3ee8011c2b37f77c, ],
[0x0adef3740e71c726, 0xa37bf67c6f986559, 0xc6b16f7ed4fa1b00, 0x6a065da88d8bfc3c,
0x4cabc0916844b46f, 0x407faac0f02e78d1, 0x07a786d9cf0852cf, 0x42433fb6949a629a,
0x891682a147ce43b0, 0x26cfd58e7b003b55, 0x2bbf0ed7b657acb3, ],
[0x481ac7746b159c67, 0xe367de32f108e278, 0x73f260087ad28bec, 0x5cfc82216bc1bdca,
0xcaccc870a2663a0e, 0xdb69cd7b4298c45d, 0x7bc9e0c57243e62d, 0x3cc51c5d368693ae,
0x366b4e8cc068895b, 0x2bd18715cdabbca4, 0xa752061c4f33b8cf, ],
[0xb22d2432b72d5098, 0x9e18a487f44d2fe4, 0x4b39e14ce22abd3c, 0x9e77fde2eb315e0d,
0xca5e0385fe67014d, 0x0c2cb99bf1b6bddb, 0x99ec1cd2a4460bfe, 0x8577a815a2ff843f,
0x7d80a6b4fd6518a5, 0xeb6c67123eab62cb, 0x8f7851650eca21a5, ],
[0x11ba9a1b81718c2a, 0x9f7d798a3323410c, 0xa821855c8c1cf5e5, 0x535e8d6fac0031b2,
0x404e7c751b634320, 0xa729353f6e55d354, 0x4db97d92e58bb831, 0xb53926c27897bf7d,
0x965040d52fe115c5, 0x9565fa41ebd31fd7, 0xaae4438c877ea8f4, ],
[0x37f4e36af6073c6e, 0x4edc0918210800e9, 0xc44998e99eae4188, 0x9f4310d05d068338,
0x9ec7fe4350680f29, 0xc5b2c1fdc0b50874, 0xa01920c5ef8b2ebe, 0x59fa6f8bd91d58ba,
0x8bfc9eb89b515a82, 0xbe86a7a2555ae775, 0xcbb8bbaa3810babf, ],
[0x577f9a9e7ee3f9c2, 0x88c522b949ace7b1, 0x82f07007c8b72106, 0x8283d37c6675b50e,
0x98b074d9bbac1123, 0x75c56fb7758317c1, 0xfed24e206052bc72, 0x26d7c3d1bc07dae5,
0xf88c5e441e28dbb4, 0x4fe27f9f96615270, 0x514d4ba49c2b14fe, ],
[0xf02a3ac068ee110b, 0x0a3630dafb8ae2d7, 0xce0dc874eaf9b55c, 0x9a95f6cff5b55c7e,
0x626d76abfed00c7b, 0xa0c1cf1251c204ad, 0xdaebd3006321052c, 0x3d4bd48b625a8065,
0x7f1e584e071f6ed2, 0x720574f0501caed3, 0xe3260ba93d23540a, ],
[0xab1cbd41d8c1e335, 0x9322ed4c0bc2df01, 0x51c3c0983d4284e5, 0x94178e291145c231,
0xfd0f1a973d6b2085, 0xd427ad96e2b39719, 0x8a52437fecaac06b, 0xdc20ee4b8c4c9a80,
0xa2c98e9549da2100, 0x1603fe12613db5b6, 0x0e174929433c5505, ],
[0x3d4eab2b8ef5f796, 0xcfff421583896e22, 0x4143cb32d39ac3d9, 0x22365051b78a5b65,
0x6f7fd010d027c9b6, 0xd9dd36fba77522ab, 0xa44cf1cb33e37165, 0x3fc83d3038c86417,
0xc4588d418e88d270, 0xce1320f10ab80fe2, 0xdb5eadbbec18de5d, ],
[0x1183dfce7c454afd, 0x21cea4aa3d3ed949, 0x0fce6f70303f2304, 0x19557d34b55551be,
0x4c56f689afc5bbc9, 0xa1e920844334f944, 0xbad66d423d2ec861, 0xf318c785dc9e0479,
0x99e2032e765ddd81, 0x400ccc9906d66f45, 0xe1197454db2e0dd9, ],
[0x84d1ecc4d53d2ff1, 0xd8af8b9ceb4e11b6, 0x335856bb527b52f4, 0xc756f17fb59be595,
0xc0654e4ea5553a78, 0x9e9a46b61f2ea942, 0x14fc8b5b3b809127, 0xd7009f0f103be413,
0x3e0ee7b7a9fb4601, 0xa74e888922085ed7, 0xe80a7cde3d4ac526, ],
[0x238aa6daa612186d, 0x9137a5c630bad4b4, 0xc7db3817870c5eda, 0x217e4f04e5718dc9,
0xcae814e2817bd99d, 0xe3292e7ab770a8ba, 0x7bb36ef70b6b9482, 0x3c7835fb85bca2d3,
0xfe2cdf8ee3c25e86, 0x61b3915ad7274b20, 0xeab75ca7c918e4ef, ],
[0xd6e15ffc055e154e, 0xec67881f381a32bf, 0xfbb1196092bf409c, 0xdc9d2e07830ba226,
0x0698ef3245ff7988, 0x194fae2974f8b576, 0x7a5d9bea6ca4910e, 0x7aebfea95ccdd1c9,
0xf9bd38a67d5f0e86, 0xfa65539de65492d8, 0xf0dfcbe7653ff787, ],
[0x0bd87ad390420258, 0x0ad8617bca9e33c8, 0x0c00ad377a1e2666, 0x0ac6fc58b3f0518f,
0x0c0cc8a892cc4173, 0x0c210accb117bc21, 0x0b73630dbb46ca18, 0x0c8be4920cbd4a54,
0x0bfe877a21be1690, 0x0ae790559b0ded81, 0x0bf50db2f8d6ce31, ],
[0x000cf29427ff7c58, 0x000bd9b3cf49eec8, 0x000d1dc8aa81fb26, 0x000bc792d5c394ef,
0x000d2ae0b2266453, 0x000d413f12c496c1, 0x000c84128cfed618, 0x000db5ebd48fc0d4,
0x000d1b77326dcb90, 0x000beb0ccc145421, 0x000d10e5b22b11d1, ],
[0x00000e24c99adad8, 0x00000cf389ed4bc8, 0x00000e580cbf6966, 0x00000cde5fd7e04f,
0x00000e63628041b3, 0x00000e7e81a87361, 0x00000dabe78f6d98, 0x00000efb14cac554,
0x00000e5574743b10, 0x00000d05709f42c1, 0x00000e4690c96af1, ],
[0x0000000f7157bc98, 0x0000000e3006d948, 0x0000000fa65811e6, 0x0000000e0d127e2f,
0x0000000fc18bfe53, 0x0000000fd002d901, 0x0000000eed6461d8, 0x0000001068562754,
0x0000000fa0236f50, 0x0000000e3af13ee1, 0x0000000fa460f6d1, ],
[0x0000000011131738, 0x000000000f56d588, 0x0000000011050f86, 0x000000000f848f4f,
0x00000000111527d3, 0x00000000114369a1, 0x00000000106f2f38, 0x0000000011e2ca94,
0x00000000110a29f0, 0x000000000fa9f5c1, 0x0000000010f625d1, ],
[0x000000000011f718, 0x000000000010b6c8, 0x0000000000134a96, 0x000000000010cf7f,
0x0000000000124d03, 0x000000000013f8a1, 0x0000000000117c58, 0x0000000000132c94,
0x0000000000134fc0, 0x000000000010a091, 0x0000000000128961, ],
[0x0000000000001300, 0x0000000000001750, 0x000000000000114e, 0x000000000000131f,
0x000000000000167b, 0x0000000000001371, 0x0000000000001230, 0x000000000000182c,
0x0000000000001368, 0x0000000000000f31, 0x00000000000015c9, ],
[0x0000000000000014, 0x0000000000000022, 0x0000000000000012, 0x0000000000000027,
0x000000000000000d, 0x000000000000000d, 0x000000000000001c, 0x0000000000000002,
0x0000000000000010, 0x0000000000000029, 0x000000000000000f, ],
];
const FAST_PARTIAL_ROUND_W_HATS: [[u64; 12 - 1]; N_PARTIAL_ROUNDS] = [
[0x54accab273d3aeca, 0x12fecae33b1f1da9, 0x573bb85449ea9a27, 0x6b5ddc139f172aad,
0xd2b6d0ca34465d4c, 0x51cf0aafbddfc269, 0x6075e64679e7a403, 0x678316c041900ac9,
0x10019c84b343fc57, 0xde5b81280922f644, 0x42490a86b2f2f305, ],
[0x337c5930f7bacc46, 0x334792a4f1afb921, 0xc97ea5f1426e540e, 0x5fc74568337bd780,
0xfd5718cc391d80ef, 0xef90b77a337d923c, 0xb28561998f153fea, 0xed5f65b8894345aa,
0x7e2aacb5985893a7, 0xcbde536cb644fcf0, 0x07338300a07fc43b, ],
[0xd4c9ad02fcc8b4c1, 0x2890dac7a1caa815, 0x7d62bc45c45f5db2, 0x0a902300db5deac2,
0x663f3726307f62a4, 0x050bda7dc7d8eb3b, 0xd9db68f3f051c5b6, 0xc5110194a38210aa,
0x403862136533be0e, 0x20039e053d9b227d, 0xe2c90d16262c5f3c, ],
[0x6578da963396c755, 0xea6b546e6bc1e86f, 0x4e562ef0c66c2be3, 0x35b839dae0f9d22e,
0x4aab3d88857b058c, 0x4f7443e07ac462d3, 0x93c2c5bbc385e50f, 0xc0c0c5c8ea023ce2,
0x8409c53d4b62965d, 0x0489f2258135dcd1, 0x32958358c736aec9, ],
[0xe13b50ca15b0a455, 0x9878071e2b5d4547, 0xb8e50d27b4172b30, 0xbf312f828d3ea142,
0x5b8510573020e6e8, 0x7c3091c29d8d6afa, 0x7e2d900a50f194fa, 0xb236d5080d0b0409,
0x08f148b6c3b99320, 0x679c6b9cadbe604c, 0x6b0313be2ad9b9f2, ],
[0x12038ac320459b0e, 0x7abd36c6b25cd8e0, 0x37cc3583930e5a13, 0xafe725c4446a691d,
0x99d89ccadeb38d80, 0x96c820be5528ec36, 0x9b63969fdc84ede6, 0x8f8f21cf5ad78c48,
0x1a4d3573bc3c2d8b, 0x9f5a7bd9e771866e, 0x5bcef938b72497fc, ],
[0x5f969817be6add7a, 0x572b04c1ae5a4c6d, 0x8d219b8fac9a287b, 0x4566b3c56372f434,
0xdd3f46f108bf4441, 0xd7e1469baa3912c4, 0xac36377b68e071fc, 0xf348c609201d771a,
0x0bb926a5e2ebdd96, 0x30efa780aee4705a, 0xb24ff2673691146a, ],
[0x5d0324b3a1dab6e2, 0xbd1491a0cc9e564b, 0xb8699e13b528ef99, 0x7743d9a8753ee023,
0xce577363cdb5bcbc, 0xc056688d4f006774, 0x61f9363c10d7fdf2, 0x5f730e5530f6e06d,
0x25efb9ef3adf0072, 0xcf971d58e21a8aa7, 0xd830d7e8d0d70680, ],
[0x36e69157ac42f39d, 0x3e7aca69ddf62d3e, 0xbbbef86cac42bb30, 0xa2e793ae56c27043,
0x2a315dc4bc40c8a0, 0x84022758f3b3af55, 0x668809e74e7a470d, 0xf2d91eaafdee1820,
0x50f19afd16d03294, 0x30c087d3223bcd4b, 0xf5739d95458cc633, ],
[0x15266b5a75028317, 0x8059f198c9f88799, 0x437a070386c65244, 0xc70e0bb73942929d,
0xa8b32cb37ae137ea, 0xc2e556278323a459, 0xbc486da754091692, 0x7815a23467d6b541,
0x3e6dba4e930e8be6, 0x6b4277b0915d56ba, 0x20212bfac7922ea0, ],
[0xeeba270c067b0c8b, 0xa4d576458941f29a, 0xecdf04a28c8c83be, 0xc808f0af215d7dda,
0x424f4bfbecced0fb, 0xe4cbf6c0c10e58b3, 0x66a87bebfa09c031, 0x614ffc9443d5f0a4,
0x96c96636f7b7975a, 0x58d4222a6f860cc5, 0x2d4f51c75bf50169, ],
[0xab43452aec55310f, 0x0a719e77ec2b398c, 0x8f946888a3f5f74f, 0x7b447e0d9f7ad4fb,
0x7a2887ceb40ef226, 0x8840b904c1c49e50, 0xd91ea2510b0eaddc, 0x6617fa40a1a220fb,
0xb1c41a72a845cb45, 0x02c2715281868092, 0xaf5b1b6c46ca37bd, ],
[0xe27649b9dbcbe631, 0x4afdf11d1d5e73b2, 0x05285a0e99160910, 0x23bfd6197ed8d3ba,
0xb1e6292028792aab, 0xc997f6cc14e05cae, 0x34793ec255a555bd, 0xeb4f2da35a76dd03,
0x767a5552c9910f3a, 0x4c4cc6987c30a447, 0x64da2b6920578f8d, ],
[0xe97ce2fecc0720ac, 0x99fc5741fcdeae8a, 0x0ac47be58b345692, 0x75a446121f2cccda,
0xf38e40a102691c8e, 0xdbe5d707594714ef, 0x6ab183bdab92e450, 0x0aed83850dc10451,
0x66e16941a4373c93, 0x22af15bb3e1034a1, 0xab2136f22ed23ccc, ],
[0xb0d3214d3c4c46c1, 0x3983bffd4053346c, 0xab1239b72a6a9e64, 0x669bcbda2406c089,
0xf3118af8e563feda, 0x58323dbdd43a9c95, 0x5438aa910b51fd8c, 0xcbf071f9573f7e4f,
0x476c8fde40075e51, 0xa10f54d3c77d8bed, 0xfecafe7ec7346beb, ],
[0x79e00c6916f68fa8, 0x80e39c20c11400d6, 0x242e2b46a7c116b7, 0xea660990074fcff6,
0x18e3369da4c9272b, 0xfa6471be8be33b80, 0xede2ed2a83a4574a, 0x9e595d610deaaed6,
0xc7d2cf35fcacdc58, 0xc65cf113a9af2302, 0x35a74c3d0cac5fde, ],
[0x35d6cf1a9aeabd4b, 0x4dc004b0b64954c3, 0xcb67ab54210b4c8f, 0xa2359b770621d28e,
0x027a0a0a5e315bf6, 0xed6aad0492a86ef6, 0x127074e28969232c, 0x3e3d68e6354d396f,
0x3cf204ab96edf7c6, 0x513a9050b70c18bf, 0x73b3b7399a3f5281, ],
[0x0af9319d5b7cd620, 0x0514fbcecd8a897d, 0x542dd32e46738f8d, 0x49248ae425e9bd45,
0x8bb9ef7ac36e53ea, 0x97981020c414a723, 0xe587f186c024e0c8, 0x14f01dd28e990ad2,
0x4d3fca72e19ea756, 0x01a3824f1ee8e7f1, 0xb048d25b575f250e, ],
[0xe78a4cfe6c6aa236, 0x4840deffdefd3b04, 0x6e0952d028e63e47, 0x249d49fb1d93304d,
0xd41ce9ed49f7fbb3, 0xba255e808ea77466, 0x5ce52e6dc2005436, 0x8b5bf13acd881a04,
0xf80f439f3ac011d1, 0x1d3618fb2cc3f916, 0xf41489c837e14938, ],
[0x41e065665af15054, 0x71752ac86d1bba64, 0x9bfddd30f8ceadeb, 0x4f59dd5e6c985767,
0x8aa3e0718ecaa657, 0x355f734ed4199ca2, 0x110f361baec4d693, 0x283a46e9e134b5b1,
0x4fda33376f5c6514, 0xcca192f9565e7d13, 0x2251835db1c24c39, ],
[0xc583f62f5970a849, 0xb6cc325741cd89dd, 0xf83288467f07ac1f, 0xfd82624964b845e7,
0x11967e4e00a49fdd, 0x2fb200fae9f72577, 0xd6fb31913c7d5da7, 0xfad9ae578dd090cc,
0xcd13b2be741ea5d8, 0xc1c54f9cf54b0c27, 0x29520a761b657cce, ],
[0x0ac0e496a2b39f4a, 0x20571abb59e27953, 0xe9971143579a1d30, 0x980359c3dba518cb,
0x05ecee5a85b427c4, 0x4620dd90ad0b5366, 0x95c98f9c5b859365, 0x0fbb1806fbc56995,
0xfe4526fd802afae2, 0x70e3786431084092, 0xa8d78a0494939111, ],
[0x3d999c961b7c63b0, 0x814e82efcd172529, 0x2421e5d236704588, 0x887af7d4dd482328,
0xa5e9c291f6119b27, 0xbdc52b2676a4b4aa, 0x64832009d29bcf57, 0x09c4155174a552cc,
0x463f9ee03d290810, 0xc810936e64982542, 0x043b1c289f7bc3ac, ],
[0x673655aae8be5a8b, 0xd510fe714f39fa10, 0x2c68a099b51c9e73, 0xa667bfa9aa96999d,
0x4d67e72f063e2108, 0xf84dde3e6acda179, 0x40f9cc8c08f80981, 0x5ead032050097142,
0x6591b02092d671bb, 0x00e18c71963dd1b7, 0x8a21bcd24a14218a, ],
[0x202800f4addbdc87, 0xe4b5bdb1cc3504ff, 0xbe32b32a825596e7, 0x8e0f68c5dc223b9a,
0x58022d9e1c256ce3, 0x584d29227aa073ac, 0x8b9352ad04bef9e7, 0xaead42a3f445ecbf,
0x3c667a1d833a3cca, 0xda6f61838efa1ffe, 0xe8f749470bd7c446, ],
[0xc5b85bab9e5b3869, 0x45245258aec51cf7, 0x16e6b8e68b931830, 0xe2ae0f051418112c,
0x0470e26a0093a65b, 0x6bef71973a8146ed, 0x119265be51812daf, 0xb0be7356254bea2e,
0x8584defff7589bd7, 0x3c5fe4aeb1fb52ba, 0x9e7cd88acf543a5e, ],
[0x179be4bba87f0a8c, 0xacf63d95d8887355, 0x6696670196b0074f, 0xd99ddf1fe75085f9,
0xc2597881fef0283b, 0xcf48395ee6c54f14, 0x15226a8e4cd8d3b6, 0xc053297389af5d3b,
0x2c08893f0d1580e2, 0x0ed3cbcff6fcc5ba, 0xc82f510ecf81f6d0, ],
[0x94b06183acb715cc, 0x500392ed0d431137, 0x861cc95ad5c86323, 0x05830a443f86c4ac,
0x3b68225874a20a7c, 0x10b3309838e236fb, 0x9b77fc8bcd559e2c, 0xbdecf5e0cb9cb213,
0x30276f1221ace5fa, 0x7935dd342764a144, 0xeac6db520bb03708, ],
[0x7186a80551025f8f, 0x622247557e9b5371, 0xc4cbe326d1ad9742, 0x55f1523ac6a23ea2,
0xa13dfe77a3d52f53, 0xe30750b6301c0452, 0x08bd488070a3a32b, 0xcd800caef5b72ae3,
0x83329c90f04233ce, 0xb5b99e6664a0a3ee, 0x6b0731849e200a7f, ],
[0xec3fabc192b01799, 0x382b38cee8ee5375, 0x3bfb6c3f0e616572, 0x514abd0cf6c7bc86,
0x47521b1361dcc546, 0x178093843f863d14, 0xad1003c5d28918e7, 0x738450e42495bc81,
0xaf947c59af5e4047, 0x4653fb0685084ef2, 0x057fde2062ae35bf, ],
[0xe376678d843ce55e, 0x66f3860d7514e7fc, 0x7817f3dfff8b4ffa, 0x3929624a9def725b,
0x0126ca37f215a80a, 0xfce2f5d02762a303, 0x1bc927375febbad7, 0x85b481e5243f60bf,
0x2d3c5f42a39c91a0, 0x0811719919351ae8, 0xf669de0add993131, ],
[0x7de38bae084da92d, 0x5b848442237e8a9b, 0xf6c705da84d57310, 0x31e6a4bdb6a49017,
0x889489706e5c5c0f, 0x0e4a205459692a1b, 0xbac3fa75ee26f299, 0x5f5894f4057d755e,
0xb0dc3ecd724bb076, 0x5e34d8554a6452ba, 0x04f78fd8c1fdcc5f, ],
[0x4dd19c38779512ea, 0xdb79ba02704620e9, 0x92a29a3675a5d2be, 0xd5177029fe495166,
0xd32b3298a13330c1, 0x251c4a3eb2c5f8fd, 0xe1c48b26e0d98825, 0x3301d3362a4ffccb,
0x09bb6c88de8cd178, 0xdc05b676564f538a, 0x60192d883e473fee, ],
[0x16b9774801ac44a0, 0x3cb8411e786d3c8e, 0xa86e9cf505072491, 0x0178928152e109ae,
0x5317b905a6e1ab7b, 0xda20b3be7f53d59f, 0xcb97dedecebee9ad, 0x4bd545218c59f58d,
0x77dc8d856c05a44a, 0x87948589e4f243fd, 0x7e5217af969952c2, ],
[0xbc58987d06a84e4d, 0x0b5d420244c9cae3, 0xa3c4711b938c02c0, 0x3aace640a3e03990,
0x865a0f3249aacd8a, 0x8d00b2a7dbed06c7, 0x6eacb905beb7e2f8, 0x045322b216ec3ec7,
0xeb9de00d594828e6, 0x088c5f20df9e5c26, 0xf555f4112b19781f, ],
[0xa8cedbff1813d3a7, 0x50dcaee0fd27d164, 0xf1cb02417e23bd82, 0xfaf322786e2abe8b,
0x937a4315beb5d9b6, 0x1b18992921a11d85, 0x7d66c4368b3c497b, 0x0e7946317a6b4e99,
0xbe4430134182978b, 0x3771e82493ab262d, 0xa671690d8095ce82, ],
[0xb035585f6e929d9d, 0xba1579c7e219b954, 0xcb201cf846db4ba3, 0x287bf9177372cf45,
0xa350e4f61147d0a6, 0xd5d0ecfb50bcff99, 0x2e166aa6c776ed21, 0xe1e66c991990e282,
0x662b329b01e7bb38, 0x8aa674b36144d9a9, 0xcbabf78f97f95e65, ],
[0xeec24b15a06b53fe, 0xc8a7aa07c5633533, 0xefe9c6fa4311ad51, 0xb9173f13977109a1,
0x69ce43c9cc94aedc, 0xecf623c9cd118815, 0x28625def198c33c7, 0xccfc5f7de5c3636a,
0xf5e6c40f1621c299, 0xcec0e58c34cb64b1, 0xa868ea113387939f, ],
[0xd8dddbdc5ce4ef45, 0xacfc51de8131458c, 0x146bb3c0fe499ac0, 0x9e65309f15943903,
0x80d0ad980773aa70, 0xf97817d4ddbf0607, 0xe4626620a75ba276, 0x0dfdc7fd6fc74f66,
0xf464864ad6f2bb93, 0x02d55e52a5d44414, 0xdd8de62487c40925, ],
[0xc15acf44759545a3, 0xcbfdcf39869719d4, 0x33f62042e2f80225, 0x2599c5ead81d8fa3,
0x0b306cb6c1d7c8d0, 0x658c80d3df3729b1, 0xe8d1b2b21b41429c, 0xa1b67f09d4b3ccb8,
0x0e1adf8b84437180, 0x0d593a5e584af47b, 0xa023d94c56e151c7, ],
[0x49026cc3a4afc5a6, 0xe06dff00ab25b91b, 0x0ab38c561e8850ff, 0x92c3c8275e105eeb,
0xb65256e546889bd0, 0x3c0468236ea142f6, 0xee61766b889e18f2, 0xa206f41b12c30415,
0x02fe9d756c9f12d1, 0xe9633210630cbf12, 0x1ffea9fe85a0b0b1, ],
[0x81d1ae8cc50240f3, 0xf4c77a079a4607d7, 0xed446b2315e3efc1, 0x0b0a6b70915178c3,
0xb11ff3e089f15d9a, 0x1d4dba0b7ae9cc18, 0x65d74e2f43b48d05, 0xa2df8c6b8ae0804a,
0xa4e6f0a8c33348a6, 0xc0a26efc7be5669b, 0xa6b6582c547d0d60, ],
[0x84afc741f1c13213, 0x2f8f43734fc906f3, 0xde682d72da0a02d9, 0x0bb005236adb9ef2,
0x5bdf35c10a8b5624, 0x0739a8a343950010, 0x52f515f44785cfbc, 0xcbaf4e5d82856c60,
0xac9ea09074e3e150, 0x8f0fa011a2035fb0, 0x1a37905d8450904a, ],
[0x3abeb80def61cc85, 0x9d19c9dd4eac4133, 0x075a652d9641a985, 0x9daf69ae1b67e667,
0x364f71da77920a18, 0x50bd769f745c95b1, 0xf223d1180dbbf3fc, 0x2f885e584e04aa99,
0xb69a0fa70aea684a, 0x09584acaa6e062a0, 0x0bc051640145b19b, ],
];
// NB: This is in ROW-major order to support cache-friendly pre-multiplication.
const FAST_PARTIAL_ROUND_INITIAL_MATRIX: [[u64; 12 - 1]; 12 - 1] = [
[0xb8dee12bf8e622dc, 0x2a0bcfdad25a7a77, 0x35f873e941f6055d, 0x99b7b85b6028982e,
0x86d6993880e836f7, 0x1ef8de305b9c354d, 0x8b0a80ef933c37dc, 0x715c7164aacaf4a8,
0x43845bd4f75ac7f5, 0x3e71bb7b0ec57a1a, 0xffc5b2f8946575c3, ],
[0x863ca0992eae09b0, 0x68901dfa3ecc7696, 0x6ba9546fc13ba8be, 0x555b7567255c9650,
0x4570c6ac5e80551b, 0x8e440c6cc2d0ed18, 0xbad8ae4dbfba0799, 0x8b71ed9e65a6ed7a,
0xaade0f9eb69ee576, 0xdebe1855920c6e64, 0x3e71bb7b0ec57a1a, ],
[0x2c3887c29246a985, 0x5aeb127ffeece78f, 0xa86e940514be2461, 0x2cb276ddf6094068,
0x81e59e8f82a28b3c, 0x27bc037b1569fb52, 0x706ee8b692c2ebc7, 0xeba6949241aedb71,
0xc416ad39f1f908f8, 0xaade0f9eb69ee576, 0x43845bd4f75ac7f5, ],
[0x03df3a62e1ea48d2, 0xbb484c2d408e9b12, 0x0fbf2169623ec24c, 0x50955930c2f9eb19,
0x3dfc3cc6123745cc, 0xa2a8d3774d197b2c, 0xd16417e43d20feab, 0xd998a362dba538ba,
0xeba6949241aedb71, 0x8b71ed9e65a6ed7a, 0x715c7164aacaf4a8, ],
[0xbbf73d77fc6c411c, 0xad7f124615d240ee, 0x4e413fcebe9020ee, 0x540bd8044c672f2b,
0x6db739f6d2e9f37d, 0x9aa1b0a8f56ad33d, 0x53c179d92714378f, 0xd16417e43d20feab,
0x706ee8b692c2ebc7, 0xbad8ae4dbfba0799, 0x8b0a80ef933c37dc, ],
[0xab92e860ecde7bdc, 0xa58fc91c605c26d5, 0xfbe68b79a8d5e0b9, 0x3e7edc1407cbd848,
0xf69c76d11eaf57bf, 0x941ef2c6beace374, 0x9aa1b0a8f56ad33d, 0xa2a8d3774d197b2c,
0x27bc037b1569fb52, 0x8e440c6cc2d0ed18, 0x1ef8de305b9c354d, ],
[0xb522132046b25eaf, 0x2b7b18e882c3e2c6, 0xe3322ad433ba15c8, 0x87355794faf87b1b,
0x14f6e5ac86065fce, 0xf69c76d11eaf57bf, 0x6db739f6d2e9f37d, 0x3dfc3cc6123745cc,
0x81e59e8f82a28b3c, 0x4570c6ac5e80551b, 0x86d6993880e836f7, ],
[0x0084dd11f5c0d55c, 0x9d664d307df18036, 0x1d80d847dca52945, 0xee3eecb9b2df1658,
0x87355794faf87b1b, 0x3e7edc1407cbd848, 0x540bd8044c672f2b, 0x50955930c2f9eb19,
0x2cb276ddf6094068, 0x555b7567255c9650, 0x99b7b85b6028982e, ],
[0xeb7c39655546eba5, 0xf07245b62d94cf71, 0x17db9b690f0031a3, 0x1d80d847dca52945,
0xe3322ad433ba15c8, 0xfbe68b79a8d5e0b9, 0x4e413fcebe9020ee, 0x0fbf2169623ec24c,
0xa86e940514be2461, 0x6ba9546fc13ba8be, 0x35f873e941f6055d, ],
[0xcb7fc57923717f84, 0x795a850bf5f9e397, 0xf07245b62d94cf71, 0x9d664d307df18036,
0x2b7b18e882c3e2c6, 0xa58fc91c605c26d5, 0xad7f124615d240ee, 0xbb484c2d408e9b12,
0x5aeb127ffeece78f, 0x68901dfa3ecc7696, 0x2a0bcfdad25a7a77, ],
[0x3107f5edca2f02b8, 0xcb7fc57923717f84, 0xeb7c39655546eba5, 0x0084dd11f5c0d55c,
0xb522132046b25eaf, 0xab92e860ecde7bdc, 0xbbf73d77fc6c411c, 0x03df3a62e1ea48d2,
0x2c3887c29246a985, 0x863ca0992eae09b0, 0xb8dee12bf8e622dc, ],
[0x80772dc2645b280b, 0xdc927721da922cf8, 0xc1978156516879ad, 0x90e80c591f48b603,
0x3a2432625475e3ae, 0x00a2d4321cca94fe, 0x77736f524010c932, 0x904d3f2804a36c54,
0xbf9b39e28a16f354, 0x3a1ded54a6cd058b, 0x42392870da5737cf, ],
[0xe796d293a47a64cb, 0xb124c33152a2421a, 0x0ee5dc0ce131268a, 0xa9032a52f930fae6,
0x7e33ca8c814280de, 0xad11180f69a8c29e, 0xc75ac6d5b5a10ff3, 0xf0674a8dc5a387ec,
0xb36d43120eaa5e2b, 0x6f232aab4b533a25, 0x3a1ded54a6cd058b, ],
[0xdcedab70f40718ba, 0x14a4a64da0b2668f, 0x4715b8e5ab34653b, 0x1e8916a99c93a88e,
0xbba4b5d86b9a3b2c, 0xe76649f9bd5d5c2e, 0xaf8e2518a1ece54d, 0xdcda1344cdca873f,
0xcd080204256088e5, 0xb36d43120eaa5e2b, 0xbf9b39e28a16f354, ],
[0xf4a437f2888ae909, 0xc537d44dc2875403, 0x7f68007619fd8ba9, 0xa4911db6a32612da,
0x2f7e9aade3fdaec1, 0xe7ffd578da4ea43d, 0x43a608e7afa6b5c2, 0xca46546aa99e1575,
0xdcda1344cdca873f, 0xf0674a8dc5a387ec, 0x904d3f2804a36c54, ],
[0xf97abba0dffb6c50, 0x5e40f0c9bb82aab5, 0x5996a80497e24a6b, 0x07084430a7307c9a,
0xad2f570a5b8545aa, 0xab7f81fef4274770, 0xcb81f535cf98c9e9, 0x43a608e7afa6b5c2,
0xaf8e2518a1ece54d, 0xc75ac6d5b5a10ff3, 0x77736f524010c932, ],
[0x7f8e41e0b0a6cdff, 0x4b1ba8d40afca97d, 0x623708f28fca70e8, 0xbf150dc4914d380f,
0xc26a083554767106, 0x753b8b1126665c22, 0xab7f81fef4274770, 0xe7ffd578da4ea43d,
0xe76649f9bd5d5c2e, 0xad11180f69a8c29e, 0x00a2d4321cca94fe, ],
[0x726af914971c1374, 0x1d7f8a2cce1a9d00, 0x18737784700c75cd, 0x7fb45d605dd82838,
0x862361aeab0f9b6e, 0xc26a083554767106, 0xad2f570a5b8545aa, 0x2f7e9aade3fdaec1,
0xbba4b5d86b9a3b2c, 0x7e33ca8c814280de, 0x3a2432625475e3ae, ],
[0x64dd936da878404d, 0x4db9a2ead2bd7262, 0xbe2e19f6d07f1a83, 0x02290fe23c20351a,
0x7fb45d605dd82838, 0xbf150dc4914d380f, 0x07084430a7307c9a, 0xa4911db6a32612da,
0x1e8916a99c93a88e, 0xa9032a52f930fae6, 0x90e80c591f48b603, ],
[0x85418a9fef8a9890, 0xd8a2eb7ef5e707ad, 0xbfe85ababed2d882, 0xbe2e19f6d07f1a83,
0x18737784700c75cd, 0x623708f28fca70e8, 0x5996a80497e24a6b, 0x7f68007619fd8ba9,
0x4715b8e5ab34653b, 0x0ee5dc0ce131268a, 0xc1978156516879ad, ],
[0x156048ee7a738154, 0x91f7562377e81df5, 0xd8a2eb7ef5e707ad, 0x4db9a2ead2bd7262,
0x1d7f8a2cce1a9d00, 0x4b1ba8d40afca97d, 0x5e40f0c9bb82aab5, 0xc537d44dc2875403,
0x14a4a64da0b2668f, 0xb124c33152a2421a, 0xdc927721da922cf8, ],
[0xd841e8ef9dde8ba0, 0x156048ee7a738154, 0x85418a9fef8a9890, 0x64dd936da878404d,
0x726af914971c1374, 0x7f8e41e0b0a6cdff, 0xf97abba0dffb6c50, 0xf4a437f2888ae909,
0xdcedab70f40718ba, 0xe796d293a47a64cb, 0x80772dc2645b280b, ],
];
#[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))]
#[inline]
fn poseidon(input: [Self; 12]) -> [Self; 12] {
unsafe {
crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::poseidon(&input)
}
}
// #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))]
// #[inline]
// fn poseidon(input: [Self; 12]) -> [Self; 12] {
// unsafe {
// crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::poseidon(&input)
// }
// }
#[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))]
#[inline(always)]
fn constant_layer(state: &mut [Self; 12], round_ctr: usize) {
unsafe {
crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::constant_layer(state, round_ctr);
}
}
// #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))]
// #[inline(always)]
// fn constant_layer(state: &mut [Self; 12], round_ctr: usize) {
// unsafe {
// crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::constant_layer(state, round_ctr);
// }
// }
#[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))]
#[inline(always)]
fn sbox_layer(state: &mut [Self; 12]) {
unsafe {
crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::sbox_layer(state);
}
}
// #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))]
// #[inline(always)]
// fn sbox_layer(state: &mut [Self; 12]) {
// unsafe {
// crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::sbox_layer(state);
// }
// }
#[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))]
#[inline(always)]
fn mds_layer(state: &[Self; 12]) -> [Self; 12] {
unsafe {
crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::mds_layer(state)
}
}
// #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))]
// #[inline(always)]
// fn mds_layer(state: &[Self; 12]) -> [Self; 12] {
// unsafe {
// crate::hash::arch::x86_64::poseidon_goldilocks_avx2_bmi2::mds_layer(state)
// }
// }
#[cfg(all(target_arch="aarch64", target_feature="neon"))]
#[inline]
fn poseidon(input: [Self; 12]) -> [Self; 12] {
unsafe {
crate::hash::arch::aarch64::poseidon_goldilocks_neon::poseidon(input)
}
}
// #[cfg(all(target_arch="aarch64", target_feature="neon"))]
// #[inline]
// fn poseidon(input: [Self; 12]) -> [Self; 12] {
// unsafe {
// crate::hash::arch::aarch64::poseidon_goldilocks_neon::poseidon(input)
// }
// }
#[cfg(all(target_arch="aarch64", target_feature="neon"))]
#[inline(always)]
@ -287,46 +288,28 @@ mod tests {
let neg_one: u64 = F::NEG_ONE.to_canonical_u64();
#[rustfmt::skip]
let _test_vectors8: Vec<([u64; 8], [u64; 8])> = vec![
([0, 0, 0, 0, 0, 0, 0, 0, ],
[0x649eec3229475d06, 0x72afe85b8b600222, 0x816d0a50ddd39228, 0x5083133a721a187c,
0xbb69bd7d90c490a6, 0xea1d33a65d0a3287, 0xb4d27542d2fba3bc, 0xf9756d565d90c20a, ]),
([0, 1, 2, 3, 4, 5, 6, 7, ],
[0xdfda4e2a7ec338f4, 0x3ac8d668054b1873, 0xeaaef2f72528e7ff, 0xee7bcc836ae165bc,
0x95561d9377c3e696, 0x2e7d39c369dfccaa, 0x992178c050936f8f, 0x34e38ec33f572850, ]),
([neg_one, neg_one, neg_one, neg_one,
neg_one, neg_one, neg_one, neg_one, ],
[0x9d8553546c658f67, 0xd5f6422aea26962b, 0xffb40b4db302da75, 0x34f43bbd7882c16c,
0xccb375313fa146b0, 0x87574c332e89201a, 0x60e9e6c0c0be3a16, 0xf0e2a741e90756ba, ]),
([0x016f2dde9ccdaf6f, 0x77e29cda821fece4, 0x2f6686f781255f78, 0xd2c4c9a53070b44f,
0x4d7035c9fd01fc40, 0xc8d460945c91d509, 0x14855cd8a36a097f, 0x49f640d6a30f9cf0, ],
[0x4c3c58a3fac4ba05, 0x3f26fc2bcb33a3d4, 0xe13fcddcd7a136bb, 0x27b05be73a91e2f2,
0x37804ed8ca07fcd5, 0xe78ec2f213e28456, 0xecf67d2aacb4dbe3, 0xad14575187c496ca, ]),
];
#[rustfmt::skip]
let test_vectors12: Vec<([u64; 12], [u64; 12])> = vec![
([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ],
[0x3901858a44be6b3a, 0xb3470607c5f0ba0e, 0xb3b3ac3d89b37e8e, 0xd389513a7f6fe6e9,
0x1eceb92f5da1c96b, 0x55d0bdfc6a842adf, 0x0112c568afb8819c, 0x6ac21107619569ee,
0x3de33babbb421a85, 0x83688eb15ffe4ca3, 0x47e285b477551fa9, 0x1dd3dda781901271, ]),
[0x3c18a9786cb0b359, 0xc4055e3364a246c3, 0x7953db0ab48808f4, 0xc71603f33a1144ca,
0xd7709673896996dc, 0x46a84e87642f44ed, 0xd032648251ee0b3c, 0x1c687363b207df62,
0xdf8565563e8045fe, 0x40f5b37ff4254dae, 0xd070f637b431067c, 0x1792b1c4342109d7, ]),
([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ],
[0x641772a94a77c7e5, 0x38d2cec9c47e7314, 0x3577218e825058c9, 0x1cdb3b4d22c54bcc,
0x803234d4b16eb152, 0xbbb6c8438627c0f0, 0x1b219561c95a41fa, 0x9bdc97531bacc401,
0x4251f4fac8271d9d, 0x0279ffa7ba5ce9aa, 0x63baf77c533b5874, 0xb7ada3e1f98b25e7, ]),
[0xd64e1e3efc5b8e9e, 0x53666633020aaa47, 0xd40285597c6a8825, 0x613a4f81e81231d2,
0x414754bfebd051f0, 0xcb1f8980294a023f, 0x6eb2a9e4d54a9d0f, 0x1902bc3af467e056,
0xf045d5eafdc6021f, 0xe4150f77caaa3be5, 0xc9bfd01d39b50cce, 0x5c0a27fcb0e1459b, ]),
([neg_one, neg_one, neg_one, neg_one,
neg_one, neg_one, neg_one, neg_one,
neg_one, neg_one, neg_one, neg_one, ],
[0xd2e4605ed1eb9613, 0x62510e8cbaf8a3b5, 0x64dc1e941dbaf46c, 0x1d6c5a5fd43cc4c5,
0xac4b4f6bf503a6b4, 0x19e17983f5e52404, 0x927b08e033b29b6f, 0xa41bc2cb5ddb9bc0,
0x270d528b1accc148, 0x022169acf46c71ae, 0xbbd4566e7b49ad7d, 0x0ed1ea54401533ef, ]),
([0xa48728856b047229, 0xc43ab5e4aa986608, 0x715f470f075c057f, 0x36e955a095478013,
0x7c036db7200ba52d, 0x20377cd3410dc7dc, 0x058c0956659b05b2, 0xa66c880ee57e8399,
0xb06521c88afbd610, 0xdfa4d72ba95c8895, 0x25b403dac3622acc, 0xda607d79268a8fce, ],
[0xe85b56b0764df429, 0x7c0796201b43fe68, 0x231673b8300a6a16, 0x25db4745a952a677,
0x01431a6817415a4d, 0xfdfbbe63602076eb, 0x82c643dabf1154c1, 0x896e7e87b3f3417d,
0x27eca78818ef9c27, 0xf08c93583c24dc47, 0x1c9e1552c07a9f73, 0x7659179192cfdc88, ]),
[0xbe0085cfc57a8357, 0xd95af71847d05c09, 0xcf55a13d33c1c953, 0x95803a74f4530e82,
0xfcd99eb30a135df1, 0xe095905e913a3029, 0xde0392461b42919b, 0x7d3260e24e81d031,
0x10d3d0465d9deaa0, 0xa87571083dfc2a47, 0xe18263681e9958f8, 0xe28e96f1ae5e60d3, ]),
([0x8ccbbbea4fe5d2b7, 0xc2af59ee9ec49970, 0x90f7e1a9e658446a, 0xdcc0630a3ab8b1b8,
0x7ff8256bca20588c, 0x5d99a7ca0c44ecfb, 0x48452b17a70fbee3, 0xeb09d654690b6c88,
0x4a55d3a39c676a88, 0xc0407a38d2285139, 0xa234bac9356386d1, 0xe1633f2bad98a52f, ],
[0xa89280105650c4ec, 0xab542d53860d12ed, 0x5704148e9ccab94f, 0xd3a826d4b62da9f5,
0x8a7a6ca87892574f, 0xc7017e1cad1a674e, 0x1f06668922318e34, 0xa3b203bc8102676f,
0xfcc781b0ce382bf2, 0x934c69ff3ed14ba5, 0x504688a5996e8f13, 0x401f3f2ed524a2ba, ]),
];
check_test_vectors::<F>(test_vectors12);

View File

@ -208,7 +208,7 @@ impl<F: RichField + Extendable<D>, H: AlgebraicHasher<F>, const D: usize>
}
}
pub(crate) fn get_challenge(&mut self, builder: &mut CircuitBuilder<F, D>) -> Target {
pub fn get_challenge(&mut self, builder: &mut CircuitBuilder<F, D>) -> Target {
self.absorb_buffered_inputs(builder);
if self.output_buffer.is_empty() {

View File

@ -640,6 +640,7 @@ impl<F: RichField + Extendable<D>, const D: usize> CircuitBuilder<F, D> {
let mut timing = TimingTree::new("preprocess", Level::Trace);
let start = Instant::now();
let rate_bits = self.config.fri_config.rate_bits;
let cap_height = self.config.fri_config.cap_height;
// Hash the public inputs, and route them to a `PublicInputGate` which will enforce that
// those hash wires match the claimed public inputs.
@ -665,7 +666,7 @@ impl<F: RichField + Extendable<D>, const D: usize> CircuitBuilder<F, D> {
let degree_bits = log2_strict(degree);
let fri_params = self.fri_params(degree_bits);
assert!(
fri_params.total_arities() <= degree_bits,
fri_params.total_arities() <= degree_bits + rate_bits - cap_height,
"FRI total reduction arity is too large.",
);
@ -706,7 +707,7 @@ impl<F: RichField + Extendable<D>, const D: usize> CircuitBuilder<F, D> {
constants_sigmas_vecs,
rate_bits,
PlonkOracle::CONSTANTS_SIGMAS.blinding,
self.config.fri_config.cap_height,
cap_height,
&mut timing,
Some(&fft_root_table),
);

View File

@ -4,6 +4,7 @@ use crate::hash::hash_types::{HashOutTarget, RichField};
use crate::plonk::circuit_builder::CircuitBuilder;
use crate::plonk::circuit_data::{CommonCircuitData, VerifierCircuitTarget};
use crate::plonk::config::{AlgebraicHasher, GenericConfig};
use crate::plonk::plonk_common::salt_size;
use crate::plonk::proof::{
OpeningSetTarget, ProofChallengesTarget, ProofTarget, ProofWithPublicInputsTarget,
};
@ -141,11 +142,12 @@ impl<F: RichField + Extendable<D>, const D: usize> CircuitBuilder<F, D> {
let fri_params = &common_data.fri_params;
let cap_height = fri_params.config.cap_height;
let salt = salt_size(common_data.fri_params.hiding);
let num_leaves_per_oracle = &[
common_data.num_preprocessed_polys(),
config.num_wires,
common_data.num_zs_partial_products_polys(),
common_data.num_quotient_polys(),
config.num_wires + salt,
common_data.num_zs_partial_products_polys() + salt,
common_data.num_quotient_polys() + salt,
];
ProofTarget {
@ -200,7 +202,7 @@ mod tests {
const D: usize = 2;
type C = PoseidonGoldilocksConfig;
type F = <C as GenericConfig<D>>::F;
let config = CircuitConfig::standard_recursion_config();
let config = CircuitConfig::standard_recursion_zk_config();
let (proof, vd, cd) = dummy_proof::<F, C, D>(&config, 4_000)?;
let (proof, _vd, cd) =

View File

@ -1,6 +1,6 @@
use std::borrow::Borrow;
use plonky2_field::extension_field::Extendable;
use plonky2_field::extension_field::{Extendable, FieldExtension};
use plonky2_field::field_types::Field;
use plonky2_field::polynomial::PolynomialCoeffs;
@ -35,6 +35,11 @@ impl<F: Field> ReducingFactor<F> {
self.base * x
}
fn mul_ext<FE: FieldExtension<D, BaseField = F>, const D: usize>(&mut self, x: FE) -> FE {
self.count += 1;
x.scalar_mul(self.base)
}
fn mul_poly(&mut self, p: &mut PolynomialCoeffs<F>) {
self.count += 1;
*p *= self.base;
@ -45,6 +50,14 @@ impl<F: Field> ReducingFactor<F> {
.fold(F::ZERO, |acc, x| self.mul(acc) + *x.borrow())
}
pub fn reduce_ext<FE: FieldExtension<D, BaseField = F>, const D: usize>(
&mut self,
iter: impl DoubleEndedIterator<Item = impl Borrow<FE>>,
) -> FE {
iter.rev()
.fold(FE::ZERO, |acc, x| self.mul_ext(acc) + *x.borrow())
}
pub fn reduce_polys(
&mut self,
polys: impl DoubleEndedIterator<Item = impl Borrow<PolynomialCoeffs<F>>>,

View File

@ -15,6 +15,7 @@ use crate::hash::merkle_proofs::MerkleProof;
use crate::hash::merkle_tree::MerkleCap;
use crate::plonk::circuit_data::CommonCircuitData;
use crate::plonk::config::{GenericConfig, GenericHashOut, Hasher};
use crate::plonk::plonk_common::salt_size;
use crate::plonk::proof::{
CompressedProof, CompressedProofWithPublicInputs, OpeningSet, Proof, ProofWithPublicInputs,
};
@ -235,6 +236,7 @@ impl Buffer {
common_data: &CommonCircuitData<F, C, D>,
) -> Result<FriInitialTreeProof<F, C::Hasher>> {
let config = &common_data.config;
let salt = salt_size(common_data.fri_params.hiding);
let mut evals_proofs = Vec::with_capacity(4);
let constants_sigmas_v =
@ -242,17 +244,18 @@ impl Buffer {
let constants_sigmas_p = self.read_merkle_proof()?;
evals_proofs.push((constants_sigmas_v, constants_sigmas_p));
let wires_v = self.read_field_vec(config.num_wires)?;
let wires_v = self.read_field_vec(config.num_wires + salt)?;
let wires_p = self.read_merkle_proof()?;
evals_proofs.push((wires_v, wires_p));
let zs_partial_v =
self.read_field_vec(config.num_challenges * (1 + common_data.num_partial_products))?;
let zs_partial_v = self.read_field_vec(
config.num_challenges * (1 + common_data.num_partial_products) + salt,
)?;
let zs_partial_p = self.read_merkle_proof()?;
evals_proofs.push((zs_partial_v, zs_partial_p));
let quotient_v =
self.read_field_vec(config.num_challenges * common_data.quotient_degree_factor)?;
self.read_field_vec(config.num_challenges * common_data.quotient_degree_factor + salt)?;
let quotient_p = self.read_merkle_proof()?;
evals_proofs.push((quotient_v, quotient_p));

View File

@ -53,12 +53,12 @@ impl<P: PackedField> ConstraintConsumer<P> {
}
/// Add one constraint valid on all rows except the last.
pub fn constraint(&mut self, constraint: P) {
self.constraint_wrapping(constraint * self.z_last);
pub fn constraint_transition(&mut self, constraint: P) {
self.constraint(constraint * self.z_last);
}
/// Add one constraint on all rows.
pub fn constraint_wrapping(&mut self, constraint: P) {
pub fn constraint(&mut self, constraint: P) {
for (&alpha, acc) in self.alphas.iter().zip(&mut self.constraint_accs) {
*acc *= alpha;
*acc += constraint;
@ -68,13 +68,13 @@ impl<P: PackedField> ConstraintConsumer<P> {
/// Add one constraint, but first multiply it by a filter such that it will only apply to the
/// first row of the trace.
pub fn constraint_first_row(&mut self, constraint: P) {
self.constraint_wrapping(constraint * self.lagrange_basis_first);
self.constraint(constraint * self.lagrange_basis_first);
}
/// Add one constraint, but first multiply it by a filter such that it will only apply to the
/// last row of the trace.
pub fn constraint_last_row(&mut self, constraint: P) {
self.constraint_wrapping(constraint * self.lagrange_basis_last);
self.constraint(constraint * self.lagrange_basis_last);
}
}
@ -122,17 +122,17 @@ impl<F: RichField + Extendable<D>, const D: usize> RecursiveConstraintConsumer<F
}
/// Add one constraint valid on all rows except the last.
pub fn constraint(
pub fn constraint_transition(
&mut self,
builder: &mut CircuitBuilder<F, D>,
constraint: ExtensionTarget<D>,
) {
let filtered_constraint = builder.mul_extension(constraint, self.z_last);
self.constraint_wrapping(builder, filtered_constraint);
self.constraint(builder, filtered_constraint);
}
/// Add one constraint valid on all rows.
pub fn constraint_wrapping(
pub fn constraint(
&mut self,
builder: &mut CircuitBuilder<F, D>,
constraint: ExtensionTarget<D>,
@ -150,7 +150,7 @@ impl<F: RichField + Extendable<D>, const D: usize> RecursiveConstraintConsumer<F
constraint: ExtensionTarget<D>,
) {
let filtered_constraint = builder.mul_extension(constraint, self.lagrange_basis_first);
self.constraint_wrapping(builder, filtered_constraint);
self.constraint(builder, filtered_constraint);
}
/// Add one constraint, but first multiply it by a filter such that it will only apply to the
@ -161,6 +161,6 @@ impl<F: RichField + Extendable<D>, const D: usize> RecursiveConstraintConsumer<F
constraint: ExtensionTarget<D>,
) {
let filtered_constraint = builder.mul_extension(constraint, self.lagrange_basis_last);
self.constraint_wrapping(builder, filtered_constraint);
self.constraint(builder, filtered_constraint);
}
}

View File

@ -2,16 +2,20 @@ use std::marker::PhantomData;
use plonky2::field::extension_field::{Extendable, FieldExtension};
use plonky2::field::packed_field::PackedField;
use plonky2::field::polynomial::PolynomialValues;
use plonky2::hash::hash_types::RichField;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::permutation::PermutationPair;
use crate::stark::Stark;
use crate::util::trace_rows_to_poly_values;
use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars};
/// Toy STARK system used for testing.
/// Computes a Fibonacci sequence with state `[x0, x1]` using the state transition
/// `x0 <- x1, x1 <- x0 + x1`.
/// Computes a Fibonacci sequence with state `[x0, x1, i, j]` using the state transition
/// `x0' <- x1, x1' <- x0 + x1, i' <- i+1, j' <- j+1`.
/// Note: The `i, j` columns are only used to test the permutation argument.
#[derive(Copy, Clone)]
struct FibonacciStark<F: RichField + Extendable<D>, const D: usize> {
num_rows: usize,
@ -34,21 +38,25 @@ impl<F: RichField + Extendable<D>, const D: usize> FibonacciStark<F, D> {
}
}
/// Generate the trace using `x0, x1` as inital state values.
fn generate_trace(&self, x0: F, x1: F) -> Vec<[F; Self::COLUMNS]> {
(0..self.num_rows)
.scan([x0, x1], |acc, _| {
/// Generate the trace using `x0, x1, 0, 1` as initial state values.
fn generate_trace(&self, x0: F, x1: F) -> Vec<PolynomialValues<F>> {
let mut trace_rows = (0..self.num_rows)
.scan([x0, x1, F::ZERO, F::ONE], |acc, _| {
let tmp = *acc;
acc[0] = tmp[1];
acc[1] = tmp[0] + tmp[1];
acc[2] = tmp[2] + F::ONE;
acc[3] = tmp[3] + F::ONE;
Some(tmp)
})
.collect()
.collect::<Vec<_>>();
trace_rows[self.num_rows - 1][3] = F::ZERO; // So that column 2 and 3 are permutation of one another.
trace_rows_to_poly_values(trace_rows)
}
}
impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for FibonacciStark<F, D> {
const COLUMNS: usize = 2;
const COLUMNS: usize = 4;
const PUBLIC_INPUTS: usize = 3;
fn eval_packed_generic<FE, P, const D2: usize>(
@ -68,9 +76,11 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for FibonacciStar
.constraint_last_row(vars.local_values[1] - vars.public_inputs[Self::PI_INDEX_RES]);
// x0' <- x1
yield_constr.constraint(vars.next_values[0] - vars.local_values[1]);
yield_constr.constraint_transition(vars.next_values[0] - vars.local_values[1]);
// x1' <- x0 + x1
yield_constr.constraint(vars.next_values[1] - vars.local_values[0] - vars.local_values[1]);
yield_constr.constraint_transition(
vars.next_values[1] - vars.local_values[0] - vars.local_values[1],
);
}
fn eval_ext_recursively(
@ -91,18 +101,22 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for FibonacciStar
// x0' <- x1
let first_col_constraint = builder.sub_extension(vars.next_values[0], vars.local_values[1]);
yield_constr.constraint(builder, first_col_constraint);
yield_constr.constraint_transition(builder, first_col_constraint);
// x1' <- x0 + x1
let second_col_constraint = {
let tmp = builder.sub_extension(vars.next_values[1], vars.local_values[0]);
builder.sub_extension(tmp, vars.local_values[1])
};
yield_constr.constraint(builder, second_col_constraint);
yield_constr.constraint_transition(builder, second_col_constraint);
}
fn constraint_degree(&self) -> usize {
2
}
fn permutation_pairs(&self) -> Vec<PermutationPair> {
vec![PermutationPair::singletons(2, 3)]
}
}
#[cfg(test)]

View File

@ -1,4 +1,3 @@
use anyhow::Result;
use plonky2::field::extension_field::Extendable;
use plonky2::field::polynomial::PolynomialCoeffs;
use plonky2::fri::proof::{FriProof, FriProofTarget};
@ -11,7 +10,9 @@ use plonky2::plonk::circuit_builder::CircuitBuilder;
use plonky2::plonk::config::{AlgebraicHasher, GenericConfig};
use crate::config::StarkConfig;
use crate::permutation::get_n_permutation_challenge_sets;
use crate::permutation::{
get_n_permutation_challenge_sets, get_n_permutation_challenge_sets_target,
};
use crate::proof::*;
use crate::stark::Stark;
@ -26,7 +27,7 @@ fn get_challenges<F, C, S, const D: usize>(
pow_witness: F,
config: &StarkConfig,
degree_bits: usize,
) -> Result<StarkProofChallenges<F, D>>
) -> StarkProofChallenges<F, D>
where
F: RichField + Extendable<D>,
C: GenericConfig<D, F = F>,
@ -38,20 +39,15 @@ where
challenger.observe_cap(trace_cap);
let permutation_challenge_sets = if stark.uses_permutation_args() {
get_n_permutation_challenge_sets(
let permutation_challenge_sets = permutation_zs_cap.map(|permutation_zs_cap| {
let tmp = get_n_permutation_challenge_sets(
&mut challenger,
num_challenges,
stark.permutation_batch_size(),
)
} else {
vec![]
};
if stark.uses_permutation_args() {
let cap =
permutation_zs_cap.ok_or_else(|| anyhow::Error::msg("expected permutation_zs_cap"));
challenger.observe_cap(cap?);
}
);
challenger.observe_cap(permutation_zs_cap);
tmp
});
let stark_alphas = challenger.get_n_challenges(num_challenges);
@ -60,7 +56,7 @@ where
challenger.observe_openings(&openings.to_fri_openings());
Ok(StarkProofChallenges {
StarkProofChallenges {
permutation_challenge_sets,
stark_alphas,
stark_zeta,
@ -71,7 +67,7 @@ where
degree_bits,
&config.fri_config,
),
})
}
}
impl<F, C, const D: usize> StarkProofWithPublicInputs<F, C, D>
@ -84,11 +80,10 @@ where
stark: &S,
config: &StarkConfig,
degree_bits: usize,
) -> anyhow::Result<Vec<usize>> {
Ok(self
.get_challenges(stark, config, degree_bits)?
) -> Vec<usize> {
self.get_challenges(stark, config, degree_bits)
.fri_challenges
.fri_query_indices)
.fri_query_indices
}
/// Computes all Fiat-Shamir challenges used in the STARK proof.
@ -97,7 +92,7 @@ where
stark: &S,
config: &StarkConfig,
degree_bits: usize,
) -> Result<StarkProofChallenges<F, D>> {
) -> StarkProofChallenges<F, D> {
let StarkProof {
trace_cap,
permutation_zs_cap,
@ -131,9 +126,11 @@ where
pub(crate) fn get_challenges_target<
F: RichField + Extendable<D>,
C: GenericConfig<D, F = F>,
S: Stark<F, D>,
const D: usize,
>(
builder: &mut CircuitBuilder<F, D>,
stark: &S,
trace_cap: &MerkleCapTarget,
permutation_zs_cap: Option<&MerkleCapTarget>,
quotient_polys_cap: &MerkleCapTarget,
@ -151,6 +148,18 @@ where
let mut challenger = RecursiveChallenger::<F, C::Hasher, D>::new(builder);
challenger.observe_cap(trace_cap);
let permutation_challenge_sets = permutation_zs_cap.map(|permutation_zs_cap| {
let tmp = get_n_permutation_challenge_sets_target(
builder,
&mut challenger,
num_challenges,
stark.permutation_batch_size(),
);
challenger.observe_cap(permutation_zs_cap);
tmp
});
let stark_alphas = challenger.get_n_challenges(builder, num_challenges);
challenger.observe_cap(quotient_polys_cap);
@ -159,6 +168,7 @@ where
challenger.observe_openings(&openings.to_fri_openings());
StarkProofChallengesTarget {
permutation_challenge_sets,
stark_alphas,
stark_zeta,
fri_challenges: challenger.fri_challenges::<C>(
@ -172,9 +182,14 @@ where
}
impl<const D: usize> StarkProofWithPublicInputsTarget<D> {
pub(crate) fn get_challenges<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>>(
pub(crate) fn get_challenges<
F: RichField + Extendable<D>,
C: GenericConfig<D, F = F>,
S: Stark<F, D>,
>(
&self,
builder: &mut CircuitBuilder<F, D>,
stark: &S,
config: &StarkConfig,
) -> StarkProofChallengesTarget<D>
where
@ -194,8 +209,9 @@ impl<const D: usize> StarkProofWithPublicInputsTarget<D> {
},
} = &self.proof;
get_challenges_target::<F, C, D>(
get_challenges_target::<F, C, S, D>(
builder,
stark,
trace_cap,
permutation_zs_cap.as_ref(),
quotient_polys_cap,

View File

@ -3,6 +3,7 @@
#![allow(unused_variables)]
#![allow(incomplete_features)]
#![allow(clippy::too_many_arguments)]
#![allow(clippy::type_complexity)]
#![feature(generic_const_exprs)]
pub mod config;
@ -14,6 +15,8 @@ pub mod prover;
pub mod recursive_verifier;
pub mod stark;
pub mod stark_testing;
pub mod util;
pub mod vanishing_poly;
pub mod vars;
pub mod verifier;

View File

@ -2,16 +2,23 @@
use itertools::Itertools;
use plonky2::field::batch_util::batch_multiply_inplace;
use plonky2::field::extension_field::Extendable;
use plonky2::field::extension_field::{Extendable, FieldExtension};
use plonky2::field::field_types::Field;
use plonky2::field::packed_field::PackedField;
use plonky2::field::polynomial::PolynomialValues;
use plonky2::hash::hash_types::RichField;
use plonky2::iop::challenger::Challenger;
use plonky2::plonk::config::{GenericConfig, Hasher};
use plonky2::iop::challenger::{Challenger, RecursiveChallenger};
use plonky2::iop::ext_target::ExtensionTarget;
use plonky2::iop::target::Target;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use plonky2::plonk::config::{AlgebraicHasher, GenericConfig, Hasher};
use plonky2::util::reducing::{ReducingFactor, ReducingFactorTarget};
use rayon::prelude::*;
use crate::config::StarkConfig;
use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::stark::Stark;
use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars};
/// A pair of lists of columns, `lhs` and `rhs`, that should be permutations of one another.
/// In particular, there should exist some permutation `pi` such that for any `i`,
@ -23,32 +30,41 @@ pub struct PermutationPair {
pub column_pairs: Vec<(usize, usize)>,
}
impl PermutationPair {
pub fn singletons(lhs: usize, rhs: usize) -> Self {
Self {
column_pairs: vec![(lhs, rhs)],
}
}
}
/// A single instance of a permutation check protocol.
pub(crate) struct PermutationInstance<'a, F: Field> {
pub(crate) struct PermutationInstance<'a, T: Copy> {
pub(crate) pair: &'a PermutationPair,
pub(crate) challenge: PermutationChallenge<F>,
pub(crate) challenge: PermutationChallenge<T>,
}
/// Randomness for a single instance of a permutation check protocol.
#[derive(Copy, Clone)]
pub(crate) struct PermutationChallenge<F: Field> {
pub(crate) struct PermutationChallenge<T: Copy> {
/// Randomness used to combine multiple columns into one.
pub(crate) beta: F,
pub(crate) beta: T,
/// Random offset that's added to the beta-reduced column values.
pub(crate) gamma: F,
pub(crate) gamma: T,
}
/// Like `PermutationChallenge`, but with `num_challenges` copies to boost soundness.
pub(crate) struct PermutationChallengeSet<F: Field> {
pub(crate) challenges: Vec<PermutationChallenge<F>>,
#[derive(Clone)]
pub(crate) struct PermutationChallengeSet<T: Copy> {
pub(crate) challenges: Vec<PermutationChallenge<T>>,
}
/// Compute all Z polynomials (for permutation arguments).
pub(crate) fn compute_permutation_z_polys<F, C, S, const D: usize>(
stark: &S,
config: &StarkConfig,
challenger: &mut Challenger<F, C::Hasher>,
trace_poly_values: &[PolynomialValues<F>],
permutation_challenge_sets: &[PermutationChallengeSet<F>],
) -> Vec<PolynomialValues<F>>
where
F: RichField + Extendable<D>,
@ -56,59 +72,37 @@ where
S: Stark<F, D>,
{
let permutation_pairs = stark.permutation_pairs();
let permutation_challenge_sets = get_n_permutation_challenge_sets(
challenger,
let permutation_batches = get_permutation_batches(
&permutation_pairs,
permutation_challenge_sets,
config.num_challenges,
stark.permutation_batch_size(),
);
// Get a list of instances of our batch-permutation argument. These are permutation arguments
// where the same `Z(x)` polynomial is used to check more than one permutation.
// Before batching, each permutation pair leads to `num_challenges` permutation arguments, so we
// start with the cartesian product of `permutation_pairs` and `0..num_challenges`. Then we
// chunk these arguments based on our batch size.
let permutation_instances = permutation_pairs
.iter()
.cartesian_product(0..config.num_challenges)
.chunks(stark.permutation_batch_size())
.into_iter()
.flat_map(|batch| {
batch.enumerate().map(|(i, (pair, chal))| {
let challenge = permutation_challenge_sets[i].challenges[chal];
PermutationInstance { pair, challenge }
})
})
.collect_vec();
permutation_instances
permutation_batches
.into_par_iter()
.map(|instance| compute_permutation_z_poly(instance, trace_poly_values))
.map(|instances| compute_permutation_z_poly(&instances, trace_poly_values))
.collect()
}
/// Compute a single Z polynomial.
// TODO: Change this to handle a batch of `PermutationInstance`s.
fn compute_permutation_z_poly<F: Field>(
instance: PermutationInstance<F>,
instances: &[PermutationInstance<F>],
trace_poly_values: &[PolynomialValues<F>],
) -> PolynomialValues<F> {
let PermutationInstance { pair, challenge } = instance;
let PermutationPair { column_pairs } = pair;
let PermutationChallenge { beta, gamma } = challenge;
let degree = trace_poly_values[0].len();
let mut reduced_lhs = PolynomialValues::constant(gamma, degree);
let mut reduced_rhs = PolynomialValues::constant(gamma, degree);
let (reduced_lhs_polys, reduced_rhs_polys): (Vec<_>, Vec<_>) = instances
.iter()
.map(|instance| permutation_reduced_polys(instance, trace_poly_values, degree))
.unzip();
for ((lhs, rhs), weight) in column_pairs.iter().zip(beta.powers()) {
reduced_lhs.add_assign_scaled(&trace_poly_values[*lhs], weight);
reduced_rhs.add_assign_scaled(&trace_poly_values[*rhs], weight);
}
let numerator = poly_product_elementwise(reduced_lhs_polys.into_iter());
let denominator = poly_product_elementwise(reduced_rhs_polys.into_iter());
// Compute the quotients.
let reduced_rhs_inverses = F::batch_multiplicative_inverse(&reduced_rhs.values);
let mut quotients = reduced_lhs.values;
batch_multiply_inplace(&mut quotients, &reduced_rhs_inverses);
let denominator_inverses = F::batch_multiplicative_inverse(&denominator.values);
let mut quotients = numerator.values;
batch_multiply_inplace(&mut quotients, &denominator_inverses);
// Compute Z, which contains partial products of the quotients.
let mut partial_products = Vec::with_capacity(degree);
@ -120,6 +114,39 @@ fn compute_permutation_z_poly<F: Field>(
PolynomialValues::new(partial_products)
}
/// Computes the reduced polynomial, `\sum beta^i f_i(x) + gamma`, for both the "left" and "right"
/// sides of a given `PermutationPair`.
fn permutation_reduced_polys<F: Field>(
instance: &PermutationInstance<F>,
trace_poly_values: &[PolynomialValues<F>],
degree: usize,
) -> (PolynomialValues<F>, PolynomialValues<F>) {
let PermutationInstance {
pair: PermutationPair { column_pairs },
challenge: PermutationChallenge { beta, gamma },
} = instance;
let mut reduced_lhs = PolynomialValues::constant(*gamma, degree);
let mut reduced_rhs = PolynomialValues::constant(*gamma, degree);
for ((lhs, rhs), weight) in column_pairs.iter().zip(beta.powers()) {
reduced_lhs.add_assign_scaled(&trace_poly_values[*lhs], weight);
reduced_rhs.add_assign_scaled(&trace_poly_values[*rhs], weight);
}
(reduced_lhs, reduced_rhs)
}
/// Computes the elementwise product of a set of polynomials. Assumes that the set is non-empty and
/// that each polynomial has the same length.
fn poly_product_elementwise<F: Field>(
mut polys: impl Iterator<Item = PolynomialValues<F>>,
) -> PolynomialValues<F> {
let mut product = polys.next().expect("Expected at least one polynomial");
for poly in polys {
batch_multiply_inplace(&mut product.values, &poly.values)
}
product
}
fn get_permutation_challenge<F: RichField, H: Hasher<F>>(
challenger: &mut Challenger<F, H>,
) -> PermutationChallenge<F> {
@ -147,3 +174,221 @@ pub(crate) fn get_n_permutation_challenge_sets<F: RichField, H: Hasher<F>>(
.map(|_| get_permutation_challenge_set(challenger, num_challenges))
.collect()
}
fn get_permutation_challenge_target<
F: RichField + Extendable<D>,
H: AlgebraicHasher<F>,
const D: usize,
>(
builder: &mut CircuitBuilder<F, D>,
challenger: &mut RecursiveChallenger<F, H, D>,
) -> PermutationChallenge<Target> {
let beta = challenger.get_challenge(builder);
let gamma = challenger.get_challenge(builder);
PermutationChallenge { beta, gamma }
}
fn get_permutation_challenge_set_target<
F: RichField + Extendable<D>,
H: AlgebraicHasher<F>,
const D: usize,
>(
builder: &mut CircuitBuilder<F, D>,
challenger: &mut RecursiveChallenger<F, H, D>,
num_challenges: usize,
) -> PermutationChallengeSet<Target> {
let challenges = (0..num_challenges)
.map(|_| get_permutation_challenge_target(builder, challenger))
.collect();
PermutationChallengeSet { challenges }
}
pub(crate) fn get_n_permutation_challenge_sets_target<
F: RichField + Extendable<D>,
H: AlgebraicHasher<F>,
const D: usize,
>(
builder: &mut CircuitBuilder<F, D>,
challenger: &mut RecursiveChallenger<F, H, D>,
num_challenges: usize,
num_sets: usize,
) -> Vec<PermutationChallengeSet<Target>> {
(0..num_sets)
.map(|_| get_permutation_challenge_set_target(builder, challenger, num_challenges))
.collect()
}
/// Get a list of instances of our batch-permutation argument. These are permutation arguments
/// where the same `Z(x)` polynomial is used to check more than one permutation.
/// Before batching, each permutation pair leads to `num_challenges` permutation arguments, so we
/// start with the cartesian product of `permutation_pairs` and `0..num_challenges`. Then we
/// chunk these arguments based on our batch size.
pub(crate) fn get_permutation_batches<'a, T: Copy>(
permutation_pairs: &'a [PermutationPair],
permutation_challenge_sets: &[PermutationChallengeSet<T>],
num_challenges: usize,
batch_size: usize,
) -> Vec<Vec<PermutationInstance<'a, T>>> {
permutation_pairs
.iter()
.cartesian_product(0..num_challenges)
.chunks(batch_size)
.into_iter()
.map(|batch| {
batch
.enumerate()
.map(|(i, (pair, chal))| {
let challenge = permutation_challenge_sets[i].challenges[chal];
PermutationInstance { pair, challenge }
})
.collect_vec()
})
.collect()
}
// TODO: Use slices.
pub struct PermutationCheckVars<F: Field, FE: FieldExtension<D2, BaseField = F>, const D2: usize> {
pub(crate) local_zs: Vec<FE>,
pub(crate) next_zs: Vec<FE>,
pub(crate) permutation_challenge_sets: Vec<PermutationChallengeSet<F>>,
}
pub(crate) fn eval_permutation_checks<F, FE, P, C, S, const D: usize, const D2: usize>(
stark: &S,
config: &StarkConfig,
vars: StarkEvaluationVars<FE, FE, { S::COLUMNS }, { S::PUBLIC_INPUTS }>,
permutation_data: PermutationCheckVars<F, FE, D2>,
consumer: &mut ConstraintConsumer<FE>,
) where
F: RichField + Extendable<D>,
FE: FieldExtension<D2, BaseField = F>,
P: PackedField<Scalar = FE>,
C: GenericConfig<D, F = F>,
S: Stark<F, D>,
[(); S::COLUMNS]:,
[(); S::PUBLIC_INPUTS]:,
{
let PermutationCheckVars {
local_zs,
next_zs,
permutation_challenge_sets,
} = permutation_data;
// Check that Z(1) = 1;
for &z in &local_zs {
consumer.constraint_first_row(z - FE::ONE);
}
let permutation_pairs = stark.permutation_pairs();
let permutation_batches = get_permutation_batches(
&permutation_pairs,
&permutation_challenge_sets,
config.num_challenges,
stark.permutation_batch_size(),
);
// Each zs value corresponds to a permutation batch.
for (i, instances) in permutation_batches.iter().enumerate() {
// Z(gx) * down = Z x * up
let (reduced_lhs, reduced_rhs): (Vec<FE>, Vec<FE>) = instances
.iter()
.map(|instance| {
let PermutationInstance {
pair: PermutationPair { column_pairs },
challenge: PermutationChallenge { beta, gamma },
} = instance;
let mut factor = ReducingFactor::new(*beta);
let (lhs, rhs): (Vec<_>, Vec<_>) = column_pairs
.iter()
.map(|&(i, j)| (vars.local_values[i], vars.local_values[j]))
.unzip();
(
factor.reduce_ext(lhs.into_iter()) + FE::from_basefield(*gamma),
factor.reduce_ext(rhs.into_iter()) + FE::from_basefield(*gamma),
)
})
.unzip();
let constraint = next_zs[i] * reduced_rhs.into_iter().product()
- local_zs[i] * reduced_lhs.into_iter().product();
consumer.constraint(constraint);
}
}
// TODO: Use slices.
pub struct PermutationCheckDataTarget<const D: usize> {
pub(crate) local_zs: Vec<ExtensionTarget<D>>,
pub(crate) next_zs: Vec<ExtensionTarget<D>>,
pub(crate) permutation_challenge_sets: Vec<PermutationChallengeSet<Target>>,
}
pub(crate) fn eval_permutation_checks_recursively<F, S, const D: usize>(
builder: &mut CircuitBuilder<F, D>,
stark: &S,
config: &StarkConfig,
vars: StarkEvaluationTargets<D, { S::COLUMNS }, { S::PUBLIC_INPUTS }>,
permutation_data: PermutationCheckDataTarget<D>,
consumer: &mut RecursiveConstraintConsumer<F, D>,
) where
F: RichField + Extendable<D>,
S: Stark<F, D>,
[(); S::COLUMNS]:,
[(); S::PUBLIC_INPUTS]:,
{
let PermutationCheckDataTarget {
local_zs,
next_zs,
permutation_challenge_sets,
} = permutation_data;
let one = builder.one_extension();
// Check that Z(1) = 1;
for &z in &local_zs {
let z_1 = builder.sub_extension(z, one);
consumer.constraint_first_row(builder, z_1);
}
let permutation_pairs = stark.permutation_pairs();
let permutation_batches = get_permutation_batches(
&permutation_pairs,
&permutation_challenge_sets,
config.num_challenges,
stark.permutation_batch_size(),
);
// Each zs value corresponds to a permutation batch.
for (i, instances) in permutation_batches.iter().enumerate() {
let (reduced_lhs, reduced_rhs): (Vec<ExtensionTarget<D>>, Vec<ExtensionTarget<D>>) =
instances
.iter()
.map(|instance| {
let PermutationInstance {
pair: PermutationPair { column_pairs },
challenge: PermutationChallenge { beta, gamma },
} = instance;
let beta_ext = builder.convert_to_ext(*beta);
let gamma_ext = builder.convert_to_ext(*gamma);
let mut factor = ReducingFactorTarget::new(beta_ext);
let (lhs, rhs): (Vec<_>, Vec<_>) = column_pairs
.iter()
.map(|&(i, j)| (vars.local_values[i], vars.local_values[j]))
.unzip();
let reduced_lhs = factor.reduce(&lhs, builder);
let reduced_rhs = factor.reduce(&rhs, builder);
(
builder.add_extension(reduced_lhs, gamma_ext),
builder.add_extension(reduced_rhs, gamma_ext),
)
})
.unzip();
let reduced_lhs_product = builder.mul_many_extension(&reduced_lhs);
let reduced_rhs_product = builder.mul_many_extension(&reduced_rhs);
// constraint = next_zs[i] * reduced_rhs_product - local_zs[i] * reduced_lhs_product
let constraint = {
let tmp = builder.mul_extension(local_zs[i], reduced_lhs_product);
builder.mul_sub_extension(next_zs[i], reduced_rhs_product, tmp)
};
consumer.constraint(builder, constraint)
}
}

View File

@ -32,6 +32,7 @@ pub struct StarkProof<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>,
}
impl<F: RichField + Extendable<D>, C: GenericConfig<D, F = F>, const D: usize> StarkProof<F, C, D> {
/// Recover the length of the trace from a STARK proof and a STARK config.
pub(crate) fn recover_degree_bits(&self, config: &StarkConfig) -> usize {
let initial_merkle_proof = &self.opening_proof.query_round_proofs[0]
.initial_trees_proof
@ -51,6 +52,7 @@ pub struct StarkProofTarget<const D: usize> {
}
impl<const D: usize> StarkProofTarget<D> {
/// Recover the length of the trace from a STARK proof and a STARK config.
pub(crate) fn recover_degree_bits(&self, config: &StarkConfig) -> usize {
let initial_merkle_proof = &self.opening_proof.query_round_proofs[0]
.initial_trees_proof
@ -101,7 +103,7 @@ pub struct CompressedStarkProofWithPublicInputs<
pub(crate) struct StarkProofChallenges<F: RichField + Extendable<D>, const D: usize> {
/// Randomness used in any permutation arguments.
pub permutation_challenge_sets: Vec<PermutationChallengeSet<F>>,
pub permutation_challenge_sets: Option<Vec<PermutationChallengeSet<F>>>,
/// Random values used to combine STARK constraints.
pub stark_alphas: Vec<F>,
@ -113,6 +115,7 @@ pub(crate) struct StarkProofChallenges<F: RichField + Extendable<D>, const D: us
}
pub(crate) struct StarkProofChallengesTarget<const D: usize> {
pub permutation_challenge_sets: Option<Vec<PermutationChallengeSet<Target>>>,
pub stark_alphas: Vec<Target>,
pub stark_zeta: ExtensionTarget<D>,
pub fri_challenges: FriChallengesTarget<D>,
@ -179,27 +182,29 @@ impl<F: RichField + Extendable<D>, const D: usize> StarkOpeningSet<F, D> {
pub struct StarkOpeningSetTarget<const D: usize> {
pub local_values: Vec<ExtensionTarget<D>>,
pub next_values: Vec<ExtensionTarget<D>>,
pub permutation_zs: Vec<ExtensionTarget<D>>,
pub permutation_zs_right: Vec<ExtensionTarget<D>>,
pub permutation_zs: Option<Vec<ExtensionTarget<D>>>,
pub permutation_zs_right: Option<Vec<ExtensionTarget<D>>>,
pub quotient_polys: Vec<ExtensionTarget<D>>,
}
impl<const D: usize> StarkOpeningSetTarget<D> {
pub(crate) fn to_fri_openings(&self) -> FriOpeningsTarget<D> {
let zeta_batch = FriOpeningBatchTarget {
values: [
self.local_values.as_slice(),
self.quotient_polys.as_slice(),
self.permutation_zs.as_slice(),
]
.concat(),
values: self
.local_values
.iter()
.chain(self.permutation_zs.iter().flatten())
.chain(&self.quotient_polys)
.copied()
.collect_vec(),
};
let zeta_right_batch = FriOpeningBatchTarget {
values: [
self.next_values.as_slice(),
self.permutation_zs_right.as_slice(),
]
.concat(),
values: self
.next_values
.iter()
.chain(self.permutation_zs_right.iter().flatten())
.copied()
.collect_vec(),
};
FriOpeningsTarget {
batches: vec![zeta_batch, zeta_right_batch],

View File

@ -18,15 +18,19 @@ use rayon::prelude::*;
use crate::config::StarkConfig;
use crate::constraint_consumer::ConstraintConsumer;
use crate::permutation::compute_permutation_z_polys;
use crate::permutation::PermutationCheckVars;
use crate::permutation::{
compute_permutation_z_polys, get_n_permutation_challenge_sets, PermutationChallengeSet,
};
use crate::proof::{StarkOpeningSet, StarkProof, StarkProofWithPublicInputs};
use crate::stark::Stark;
use crate::vanishing_poly::eval_vanishing_poly;
use crate::vars::StarkEvaluationVars;
pub fn prove<F, C, S, const D: usize>(
stark: S,
config: &StarkConfig,
trace: Vec<[F; S::COLUMNS]>,
trace_poly_values: Vec<PolynomialValues<F>>,
public_inputs: [F; S::PUBLIC_INPUTS],
timing: &mut TimingTree,
) -> Result<StarkProofWithPublicInputs<F, C, D>>
@ -38,23 +42,16 @@ where
[(); S::PUBLIC_INPUTS]:,
[(); C::Hasher::HASH_SIZE]:,
{
let degree = trace.len();
let degree = trace_poly_values[0].len();
let degree_bits = log2_strict(degree);
let trace_vecs = trace.iter().map(|row| row.to_vec()).collect_vec();
let trace_col_major: Vec<Vec<F>> = transpose(&trace_vecs);
let trace_poly_values: Vec<PolynomialValues<F>> = timed!(
timing,
"compute trace polynomials",
trace_col_major
.par_iter()
.map(|column| PolynomialValues::new(column.clone()))
.collect()
);
let fri_params = config.fri_params(degree_bits);
let rate_bits = config.fri_config.rate_bits;
let cap_height = config.fri_config.cap_height;
assert!(
fri_params.total_arities() <= degree_bits + rate_bits - cap_height,
"FRI total reduction arity is too large.",
);
let trace_commitment = timed!(
timing,
"compute trace commitment",
@ -75,28 +72,36 @@ where
challenger.observe_cap(&trace_cap);
// Permutation arguments.
let permutation_zs_commitment = if stark.uses_permutation_args() {
let permutation_zs_commitment_challenges = stark.uses_permutation_args().then(|| {
let permutation_challenge_sets = get_n_permutation_challenge_sets(
&mut challenger,
config.num_challenges,
stark.permutation_batch_size(),
);
let permutation_z_polys = compute_permutation_z_polys::<F, C, S, D>(
&stark,
config,
&mut challenger,
&trace_poly_values,
&permutation_challenge_sets,
);
timed!(
let permutation_zs_commitment = timed!(
timing,
"compute permutation Z commitments",
Some(PolynomialBatch::from_values(
PolynomialBatch::from_values(
permutation_z_polys,
rate_bits,
false,
config.fri_config.cap_height,
timing,
None,
))
)
} else {
None
};
)
);
(permutation_zs_commitment, permutation_challenge_sets)
});
let permutation_zs_commitment = permutation_zs_commitment_challenges
.as_ref()
.map(|(comm, _)| comm);
let permutation_zs_cap = permutation_zs_commitment
.as_ref()
.map(|commit| commit.merkle_tree.cap.clone());
@ -108,10 +113,11 @@ where
let quotient_polys = compute_quotient_polys::<F, C, S, D>(
&stark,
&trace_commitment,
&permutation_zs_commitment_challenges,
public_inputs,
alphas,
degree_bits,
rate_bits,
config,
);
let all_quotient_chunks = quotient_polys
.into_par_iter()
@ -151,16 +157,15 @@ where
zeta,
g,
&trace_commitment,
permutation_zs_commitment.as_ref(),
permutation_zs_commitment,
&quotient_commitment,
);
challenger.observe_openings(&openings.to_fri_openings());
let initial_merkle_trees = once(&trace_commitment)
.chain(permutation_zs_commitment.as_ref())
.chain(permutation_zs_commitment)
.chain(once(&quotient_commitment))
.collect_vec();
let fri_params = config.fri_params(degree_bits);
let opening_proof = timed!(
timing,
@ -189,13 +194,17 @@ where
/// Computes the quotient polynomials `(sum alpha^i C_i(x)) / Z_H(x)` for `alpha` in `alphas`,
/// where the `C_i`s are the Stark constraints.
fn compute_quotient_polys<F, C, S, const D: usize>(
fn compute_quotient_polys<'a, F, C, S, const D: usize>(
stark: &S,
trace_commitment: &PolynomialBatch<F, C, D>,
trace_commitment: &'a PolynomialBatch<F, C, D>,
permutation_zs_commitment_challenges: &'a Option<(
PolynomialBatch<F, C, D>,
Vec<PermutationChallengeSet<F>>,
)>,
public_inputs: [F; S::PUBLIC_INPUTS],
alphas: Vec<F>,
degree_bits: usize,
rate_bits: usize,
config: &StarkConfig,
) -> Vec<PolynomialCoeffs<F>>
where
F: RichField + Extendable<D>,
@ -205,6 +214,7 @@ where
[(); S::PUBLIC_INPUTS]:,
{
let degree = 1 << degree_bits;
let rate_bits = config.fri_config.rate_bits;
let quotient_degree_bits = log2_ceil(stark.quotient_degree_factor());
assert!(
@ -224,9 +234,10 @@ where
let z_h_on_coset = ZeroPolyOnCoset::<F>::new(degree_bits, quotient_degree_bits);
// Retrieve the LDE values at index `i`.
let get_at_index = |comm: &PolynomialBatch<F, C, D>, i: usize| -> [F; S::COLUMNS] {
comm.get_lde_values(i * step).try_into().unwrap()
};
let get_at_index =
|comm: &'a PolynomialBatch<F, C, D>, i: usize| -> &'a [F] { comm.get_lde_values(i * step) };
let get_trace_at_index = |i| get_at_index(trace_commitment, i).try_into().unwrap();
// Last element of the subgroup.
let last = F::primitive_root_of_unity(degree_bits).inverse();
let size = degree << quotient_degree_bits;
@ -247,12 +258,26 @@ where
lagrange_last.values[i],
);
let vars = StarkEvaluationVars::<F, F, { S::COLUMNS }, { S::PUBLIC_INPUTS }> {
local_values: &get_at_index(trace_commitment, i),
next_values: &get_at_index(trace_commitment, (i + next_step) % size),
local_values: &get_trace_at_index(i),
next_values: &get_trace_at_index((i + next_step) % size),
public_inputs: &public_inputs,
};
stark.eval_packed_base(vars, &mut consumer);
// TODO: Add in constraints for permutation arguments.
let permutation_check_data = permutation_zs_commitment_challenges.as_ref().map(
|(permutation_zs_commitment, permutation_challenge_sets)| PermutationCheckVars {
local_zs: get_at_index(permutation_zs_commitment, i).to_vec(),
next_zs: get_at_index(permutation_zs_commitment, (i + next_step) % size)
.to_vec(),
permutation_challenge_sets: permutation_challenge_sets.to_vec(),
},
);
// TODO: Use packed field for F.
eval_vanishing_poly::<F, F, F, C, S, D, 1>(
stark,
config,
vars,
permutation_check_data,
&mut consumer,
);
// TODO: Fix this once we use a genuine `PackedField`.
let mut constraints_evals = consumer.accumulators();
// We divide the constraints evaluations by `Z_H(x)`.

View File

@ -1,5 +1,6 @@
use std::iter::once;
use anyhow::{ensure, Result};
use itertools::Itertools;
use plonky2::field::extension_field::Extendable;
use plonky2::field::field_types::Field;
@ -13,11 +14,13 @@ use plonky2::util::reducing::ReducingFactorTarget;
use crate::config::StarkConfig;
use crate::constraint_consumer::RecursiveConstraintConsumer;
use crate::permutation::PermutationCheckDataTarget;
use crate::proof::{
StarkOpeningSetTarget, StarkProof, StarkProofChallengesTarget, StarkProofTarget,
StarkProofWithPublicInputs, StarkProofWithPublicInputsTarget,
};
use crate::stark::Stark;
use crate::vanishing_poly::eval_vanishing_poly_recursively;
use crate::vars::StarkEvaluationTargets;
pub fn recursively_verify_stark_proof<
@ -37,7 +40,7 @@ pub fn recursively_verify_stark_proof<
{
assert_eq!(proof_with_pis.public_inputs.len(), S::PUBLIC_INPUTS);
let degree_bits = proof_with_pis.proof.recover_degree_bits(inner_config);
let challenges = proof_with_pis.get_challenges::<F, C>(builder, inner_config);
let challenges = proof_with_pis.get_challenges::<F, C, S>(builder, &stark, inner_config);
recursively_verify_stark_proof_with_challenges::<F, C, S, D>(
builder,
@ -67,6 +70,7 @@ fn recursively_verify_stark_proof_with_challenges<
[(); S::COLUMNS]:,
[(); S::PUBLIC_INPUTS]:,
{
check_permutation_options(&stark, &proof_with_pis, &challenges).unwrap();
let one = builder.one_extension();
let StarkProofWithPublicInputsTarget {
@ -104,8 +108,21 @@ fn recursively_verify_stark_proof_with_challenges<
l_1,
l_last,
);
stark.eval_ext_recursively(builder, vars, &mut consumer);
// TODO: Add in constraints for permutation arguments.
let permutation_data = stark
.uses_permutation_args()
.then(|| PermutationCheckDataTarget {
local_zs: permutation_zs.as_ref().unwrap().clone(),
next_zs: permutation_zs_right.as_ref().unwrap().clone(),
permutation_challenge_sets: challenges.permutation_challenge_sets.unwrap(),
});
eval_vanishing_poly_recursively::<F, C, S, D>(
builder,
&stark,
inner_config,
vars,
permutation_data,
&mut consumer,
);
let vanishing_polys_zeta = consumer.accumulators();
// Check each polynomial identity, of the form `vanishing(x) = Z_H(x) quotient(x)`, at zeta.
@ -187,24 +204,25 @@ pub fn add_virtual_stark_proof<F: RichField + Extendable<D>, S: Stark<F, D>, con
let fri_params = config.fri_params(degree_bits);
let cap_height = fri_params.config.cap_height;
let num_leaves_per_oracle = &[
S::COLUMNS,
// TODO: permutation polys
stark.quotient_degree_factor() * config.num_challenges,
];
let num_leaves_per_oracle = once(S::COLUMNS)
.chain(
stark
.uses_permutation_args()
.then(|| stark.num_permutation_batches(config)),
)
.chain(once(stark.quotient_degree_factor() * config.num_challenges))
.collect_vec();
let permutation_zs_cap = if stark.uses_permutation_args() {
Some(builder.add_virtual_cap(cap_height))
} else {
None
};
let permutation_zs_cap = stark
.uses_permutation_args()
.then(|| builder.add_virtual_cap(cap_height));
StarkProofTarget {
trace_cap: builder.add_virtual_cap(cap_height),
permutation_zs_cap,
quotient_polys_cap: builder.add_virtual_cap(cap_height),
openings: add_stark_opening_set::<F, S, D>(builder, stark, config),
opening_proof: builder.add_virtual_fri_proof(num_leaves_per_oracle, &fri_params),
opening_proof: builder.add_virtual_fri_proof(&num_leaves_per_oracle, &fri_params),
}
}
@ -217,8 +235,12 @@ fn add_stark_opening_set<F: RichField + Extendable<D>, S: Stark<F, D>, const D:
StarkOpeningSetTarget {
local_values: builder.add_virtual_extension_targets(S::COLUMNS),
next_values: builder.add_virtual_extension_targets(S::COLUMNS),
permutation_zs: vec![/*TODO*/],
permutation_zs_right: vec![/*TODO*/],
permutation_zs: stark
.uses_permutation_args()
.then(|| builder.add_virtual_extension_targets(stark.num_permutation_batches(config))),
permutation_zs_right: stark
.uses_permutation_args()
.then(|| builder.add_virtual_extension_targets(stark.num_permutation_batches(config))),
quotient_polys: builder
.add_virtual_extension_targets(stark.quotient_degree_factor() * num_challenges),
}
@ -267,5 +289,33 @@ pub fn set_stark_proof_target<F, C: GenericConfig<D, F = F>, W, const D: usize>(
&proof.openings.to_fri_openings(),
);
if let (Some(permutation_zs_cap_target), Some(permutation_zs_cap)) =
(&proof_target.permutation_zs_cap, &proof.permutation_zs_cap)
{
witness.set_cap_target(permutation_zs_cap_target, permutation_zs_cap);
}
set_fri_proof_target(witness, &proof_target.opening_proof, &proof.opening_proof);
}
/// Utility function to check that all permutation data wrapped in `Option`s are `Some` iff
/// the Stark uses a permutation argument.
fn check_permutation_options<F: RichField + Extendable<D>, S: Stark<F, D>, const D: usize>(
stark: &S,
proof_with_pis: &StarkProofWithPublicInputsTarget<D>,
challenges: &StarkProofChallengesTarget<D>,
) -> Result<()> {
let options_is_some = [
proof_with_pis.proof.permutation_zs_cap.is_some(),
proof_with_pis.proof.openings.permutation_zs.is_some(),
proof_with_pis.proof.openings.permutation_zs_right.is_some(),
challenges.permutation_challenge_sets.is_some(),
];
ensure!(
options_is_some
.into_iter()
.all(|b| b == stark.uses_permutation_args()),
"Permutation data doesn't match with Stark configuration."
);
Ok(())
}

View File

@ -16,7 +16,6 @@ use crate::vars::StarkEvaluationTargets;
use crate::vars::StarkEvaluationVars;
/// Represents a STARK system.
// TODO: Add a `constraint_degree` fn that returns the maximum constraint degree.
pub trait Stark<F: RichField + Extendable<D>, const D: usize>: Sync {
/// The total number of columns in the trace.
const COLUMNS: usize;

16
starky/src/util.rs Normal file
View File

@ -0,0 +1,16 @@
use itertools::Itertools;
use plonky2::field::field_types::Field;
use plonky2::field::polynomial::PolynomialValues;
use plonky2::util::transpose;
/// A helper function to transpose a row-wise trace and put it in the format that `prove` expects.
pub fn trace_rows_to_poly_values<F: Field, const COLUMNS: usize>(
trace_rows: Vec<[F; COLUMNS]>,
) -> Vec<PolynomialValues<F>> {
let trace_row_vecs = trace_rows.into_iter().map(|row| row.to_vec()).collect_vec();
let trace_col_vecs: Vec<Vec<F>> = transpose(&trace_row_vecs);
trace_col_vecs
.into_iter()
.map(|column| PolynomialValues::new(column))
.collect()
}

View File

@ -0,0 +1,68 @@
use plonky2::field::extension_field::{Extendable, FieldExtension};
use plonky2::field::packed_field::PackedField;
use plonky2::hash::hash_types::RichField;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use plonky2::plonk::config::GenericConfig;
use crate::config::StarkConfig;
use crate::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::permutation::{
eval_permutation_checks, eval_permutation_checks_recursively, PermutationCheckDataTarget,
PermutationCheckVars,
};
use crate::stark::Stark;
use crate::vars::{StarkEvaluationTargets, StarkEvaluationVars};
pub(crate) fn eval_vanishing_poly<F, FE, P, C, S, const D: usize, const D2: usize>(
stark: &S,
config: &StarkConfig,
vars: StarkEvaluationVars<FE, FE, { S::COLUMNS }, { S::PUBLIC_INPUTS }>,
permutation_data: Option<PermutationCheckVars<F, FE, D2>>,
consumer: &mut ConstraintConsumer<FE>,
) where
F: RichField + Extendable<D>,
FE: FieldExtension<D2, BaseField = F>,
P: PackedField<Scalar = FE>,
C: GenericConfig<D, F = F>,
S: Stark<F, D>,
[(); S::COLUMNS]:,
[(); S::PUBLIC_INPUTS]:,
{
stark.eval_packed_generic(vars, consumer);
if let Some(permutation_data) = permutation_data {
eval_permutation_checks::<F, FE, P, C, S, D, D2>(
stark,
config,
vars,
permutation_data,
consumer,
);
}
}
pub(crate) fn eval_vanishing_poly_recursively<F, C, S, const D: usize>(
builder: &mut CircuitBuilder<F, D>,
stark: &S,
config: &StarkConfig,
vars: StarkEvaluationTargets<D, { S::COLUMNS }, { S::PUBLIC_INPUTS }>,
permutation_data: Option<PermutationCheckDataTarget<D>>,
consumer: &mut RecursiveConstraintConsumer<F, D>,
) where
F: RichField + Extendable<D>,
C: GenericConfig<D, F = F>,
S: Stark<F, D>,
[(); S::COLUMNS]:,
[(); S::PUBLIC_INPUTS]:,
{
stark.eval_ext_recursively(builder, vars, consumer);
if let Some(permutation_data) = permutation_data {
eval_permutation_checks_recursively::<F, S, D>(
builder,
stark,
config,
vars,
permutation_data,
consumer,
);
}
}

View File

@ -11,8 +11,10 @@ use plonky2::plonk::plonk_common::reduce_with_powers;
use crate::config::StarkConfig;
use crate::constraint_consumer::ConstraintConsumer;
use crate::permutation::PermutationCheckVars;
use crate::proof::{StarkOpeningSet, StarkProofChallenges, StarkProofWithPublicInputs};
use crate::stark::Stark;
use crate::vanishing_poly::eval_vanishing_poly;
use crate::vars::StarkEvaluationVars;
pub fn verify_stark_proof<
@ -32,7 +34,7 @@ where
{
ensure!(proof_with_pis.public_inputs.len() == S::PUBLIC_INPUTS);
let degree_bits = proof_with_pis.proof.recover_degree_bits(config);
let challenges = proof_with_pis.get_challenges(&stark, config, degree_bits)?;
let challenges = proof_with_pis.get_challenges(&stark, config, degree_bits);
verify_stark_proof_with_challenges(stark, proof_with_pis, challenges, degree_bits, config)
}
@ -53,6 +55,7 @@ where
[(); S::PUBLIC_INPUTS]:,
[(); C::Hasher::HASH_SIZE]:,
{
check_permutation_options(&stark, &proof_with_pis, &challenges)?;
let StarkProofWithPublicInputs {
proof,
public_inputs,
@ -88,8 +91,18 @@ where
l_1,
l_last,
);
stark.eval_ext(vars, &mut consumer);
// TODO: Add in constraints for permutation arguments.
let permutation_data = stark.uses_permutation_args().then(|| PermutationCheckVars {
local_zs: permutation_zs.as_ref().unwrap().clone(),
next_zs: permutation_zs_right.as_ref().unwrap().clone(),
permutation_challenge_sets: challenges.permutation_challenge_sets.unwrap(),
});
eval_vanishing_poly::<F, F::Extension, F::Extension, C, S, D, D>(
&stark,
config,
vars,
permutation_data,
&mut consumer,
);
let vanishing_polys_zeta = consumer.accumulators();
// Check each polynomial identity, of the form `vanishing(x) = Z_H(x) quotient(x)`, at zeta.
@ -105,7 +118,10 @@ where
.chunks(stark.quotient_degree_factor())
.enumerate()
{
ensure!(vanishing_polys_zeta[i] == z_h_zeta * reduce_with_powers(chunk, zeta_pow_deg));
ensure!(
vanishing_polys_zeta[i] == z_h_zeta * reduce_with_powers(chunk, zeta_pow_deg),
"Mismatch between evaluation and opening of quotient polynomial"
);
}
let merkle_caps = once(proof.trace_cap)
@ -141,7 +157,32 @@ fn eval_l_1_and_l_last<F: Field>(log_n: usize, x: F) -> (F, F) {
(z_x * invs[0], z_x * invs[1])
}
/// Recover the length of the trace from a STARK proof and a STARK config.
/// Utility function to check that all permutation data wrapped in `Option`s are `Some` iff
/// the Stark uses a permutation argument.
fn check_permutation_options<
F: RichField + Extendable<D>,
C: GenericConfig<D, F = F>,
S: Stark<F, D>,
const D: usize,
>(
stark: &S,
proof_with_pis: &StarkProofWithPublicInputs<F, C, D>,
challenges: &StarkProofChallenges<F, D>,
) -> Result<()> {
let options_is_some = [
proof_with_pis.proof.permutation_zs_cap.is_some(),
proof_with_pis.proof.openings.permutation_zs.is_some(),
proof_with_pis.proof.openings.permutation_zs_right.is_some(),
challenges.permutation_challenge_sets.is_some(),
];
ensure!(
options_is_some
.into_iter()
.all(|b| b == stark.uses_permutation_args()),
"Permutation data doesn't match with Stark configuration."
);
Ok(())
}
#[cfg(test)]
mod tests {

View File

@ -6,9 +6,18 @@ edition = "2021"
[dependencies]
plonky2 = { path = "../plonky2" }
plonky2_util = { path = "../util" }
starky = { path = "../starky" }
anyhow = "1.0.40"
env_logger = "0.9.0"
itertools = "0.10.0"
log = "0.4.14"
rand = "0.8.4"
rand_chacha = "0.3.1"
[dev-dependencies]
criterion = "0.3.5"
[[bench]]
name = "lookup_permuted_cols"
harness = false

View File

@ -0,0 +1,30 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use itertools::Itertools;
use plonky2::field::field_types::Field;
use plonky2::field::goldilocks_field::GoldilocksField;
use rand::{thread_rng, Rng};
use system_zero::lookup::permuted_cols;
type F = GoldilocksField;
fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("lookup-permuted-cols");
for size_log in [16, 17, 18] {
let size = 1 << size_log;
group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, _| {
// We could benchmark a table of random values with
// let table = F::rand_vec(size);
// But in practice we currently use tables that are pre-sorted, which makes
// permuted_cols cheaper since it will sort the table.
let table = (0..size).map(F::from_canonical_usize).collect_vec();
let input = (0..size)
.map(|_| table[thread_rng().gen_range(0..size)])
.collect_vec();
b.iter(|| permuted_cols(&input, &table));
});
}
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View File

@ -7,18 +7,18 @@ use plonky2::plonk::circuit_builder::CircuitBuilder;
use plonky2::plonk::plonk_common::reduce_with_powers_ext_recursive;
use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::registers::arithmetic::*;
use crate::registers::alu::*;
use crate::registers::NUM_COLUMNS;
pub(crate) fn generate_addition<F: PrimeField64>(values: &mut [F; NUM_COLUMNS]) {
let in_1 = values[COL_ADD_INPUT_1].to_canonical_u64();
let in_2 = values[COL_ADD_INPUT_2].to_canonical_u64();
let in_3 = values[COL_ADD_INPUT_3].to_canonical_u64();
let in_1 = values[COL_ADD_INPUT_0].to_canonical_u64();
let in_2 = values[COL_ADD_INPUT_1].to_canonical_u64();
let in_3 = values[COL_ADD_INPUT_2].to_canonical_u64();
let output = in_1 + in_2 + in_3;
values[COL_ADD_OUTPUT_1] = F::from_canonical_u16(output as u16);
values[COL_ADD_OUTPUT_2] = F::from_canonical_u16((output >> 16) as u16);
values[COL_ADD_OUTPUT_3] = F::from_canonical_u16((output >> 32) as u16);
values[COL_ADD_OUTPUT_0] = F::from_canonical_u16(output as u16);
values[COL_ADD_OUTPUT_1] = F::from_canonical_u16((output >> 16) as u16);
values[COL_ADD_OUTPUT_2] = F::from_canonical_u16((output >> 32) as u16);
}
pub(crate) fn eval_addition<F: Field, P: PackedField<Scalar = F>>(
@ -26,12 +26,12 @@ pub(crate) fn eval_addition<F: Field, P: PackedField<Scalar = F>>(
yield_constr: &mut ConstraintConsumer<P>,
) {
let is_add = local_values[IS_ADD];
let in_1 = local_values[COL_ADD_INPUT_1];
let in_2 = local_values[COL_ADD_INPUT_2];
let in_3 = local_values[COL_ADD_INPUT_3];
let out_1 = local_values[COL_ADD_OUTPUT_1];
let out_2 = local_values[COL_ADD_OUTPUT_2];
let out_3 = local_values[COL_ADD_OUTPUT_3];
let in_1 = local_values[COL_ADD_INPUT_0];
let in_2 = local_values[COL_ADD_INPUT_1];
let in_3 = local_values[COL_ADD_INPUT_2];
let out_1 = local_values[COL_ADD_OUTPUT_0];
let out_2 = local_values[COL_ADD_OUTPUT_1];
let out_3 = local_values[COL_ADD_OUTPUT_2];
let weight_2 = F::from_canonical_u64(1 << 16);
let weight_3 = F::from_canonical_u64(1 << 32);
@ -41,7 +41,7 @@ pub(crate) fn eval_addition<F: Field, P: PackedField<Scalar = F>>(
let computed_out = in_1 + in_2 + in_3;
yield_constr.constraint_wrapping(is_add * (out - computed_out));
yield_constr.constraint(is_add * (out - computed_out));
}
pub(crate) fn eval_addition_recursively<F: RichField + Extendable<D>, const D: usize>(
@ -50,12 +50,12 @@ pub(crate) fn eval_addition_recursively<F: RichField + Extendable<D>, const D: u
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
) {
let is_add = local_values[IS_ADD];
let in_1 = local_values[COL_ADD_INPUT_1];
let in_2 = local_values[COL_ADD_INPUT_2];
let in_3 = local_values[COL_ADD_INPUT_3];
let out_1 = local_values[COL_ADD_OUTPUT_1];
let out_2 = local_values[COL_ADD_OUTPUT_2];
let out_3 = local_values[COL_ADD_OUTPUT_3];
let in_1 = local_values[COL_ADD_INPUT_0];
let in_2 = local_values[COL_ADD_INPUT_1];
let in_3 = local_values[COL_ADD_INPUT_2];
let out_1 = local_values[COL_ADD_OUTPUT_0];
let out_2 = local_values[COL_ADD_OUTPUT_1];
let out_3 = local_values[COL_ADD_OUTPUT_2];
let limb_base = builder.constant(F::from_canonical_u64(1 << 16));
// Note that this can't overflow. Since each output limb has been range checked as 16-bits,
@ -66,5 +66,5 @@ pub(crate) fn eval_addition_recursively<F: RichField + Extendable<D>, const D: u
let diff = builder.sub_extension(out, computed_out);
let filtered_diff = builder.mul_extension(is_add, diff);
yield_constr.constraint_wrapping(builder, filtered_diff);
yield_constr.constraint(builder, filtered_diff);
}

View File

@ -0,0 +1,109 @@
//! Helper methods for checking that a value is canonical, i.e. is less than `|F|`.
//!
//! See https://hackmd.io/NC-yRmmtRQSvToTHb96e8Q#Checking-element-validity
use plonky2::field::extension_field::Extendable;
use plonky2::field::field_types::Field;
use plonky2::field::packed_field::PackedField;
use plonky2::hash::hash_types::RichField;
use plonky2::iop::ext_target::ExtensionTarget;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
/// Computes the helper value used in the is-canonical check.
pub(crate) fn compute_canonical_inv<F: Field>(value_to_check: u64) -> F {
let value_hi_32 = (value_to_check >> 32) as u32;
if value_hi_32 == u32::MAX {
debug_assert_eq!(value_to_check as u32, 0, "Value was not canonical.");
// In this case it doesn't matter what we put for the purported inverse value. The
// constraint containing this value will get multiplied by the low u32 limb, which will be
// zero, satisfying the constraint regardless of what we put here.
F::ZERO
} else {
F::from_canonical_u32(u32::MAX - value_hi_32).inverse()
}
}
/// Adds constraints to require that a list of four `u16`s, in little-endian order, represent a
/// canonical field element, i.e. that their combined value is less than `|F|`. Returns their
/// combined value.
pub(crate) fn combine_u16s_check_canonical<F: Field, P: PackedField<Scalar = F>>(
limb_0_u16: P,
limb_1_u16: P,
limb_2_u16: P,
limb_3_u16: P,
inverse: P,
yield_constr: &mut ConstraintConsumer<P>,
) -> P {
let base = F::from_canonical_u32(1 << 16);
let limb_0_u32 = limb_0_u16 + limb_1_u16 * base;
let limb_1_u32 = limb_2_u16 + limb_3_u16 * base;
combine_u32s_check_canonical(limb_0_u32, limb_1_u32, inverse, yield_constr)
}
/// Adds constraints to require that a list of four `u16`s, in little-endian order, represent a
/// canonical field element, i.e. that their combined value is less than `|F|`. Returns their
/// combined value.
pub(crate) fn combine_u16s_check_canonical_circuit<F: RichField + Extendable<D>, const D: usize>(
builder: &mut CircuitBuilder<F, D>,
limb_0_u16: ExtensionTarget<D>,
limb_1_u16: ExtensionTarget<D>,
limb_2_u16: ExtensionTarget<D>,
limb_3_u16: ExtensionTarget<D>,
inverse: ExtensionTarget<D>,
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
) -> ExtensionTarget<D> {
let base = F::from_canonical_u32(1 << 16);
let limb_0_u32 = builder.mul_const_add_extension(base, limb_1_u16, limb_0_u16);
let limb_1_u32 = builder.mul_const_add_extension(base, limb_3_u16, limb_2_u16);
combine_u32s_check_canonical_circuit(builder, limb_0_u32, limb_1_u32, inverse, yield_constr)
}
/// Adds constraints to require that a pair of `u32`s, in little-endian order, represent a canonical
/// field element, i.e. that their combined value is less than `|F|`. Returns their combined value.
pub(crate) fn combine_u32s_check_canonical<F: Field, P: PackedField<Scalar = F>>(
limb_0_u32: P,
limb_1_u32: P,
inverse: P,
yield_constr: &mut ConstraintConsumer<P>,
) -> P {
let u32_max = P::from(F::from_canonical_u32(u32::MAX));
// This is zero if and only if the high limb is `u32::MAX`.
let diff = u32_max - limb_1_u32;
// If this is zero, the diff is invertible, so the high limb is not `u32::MAX`.
let hi_not_max = inverse * diff - F::ONE;
// If this is zero, either the high limb is not `u32::MAX`, or the low limb is zero.
let hi_not_max_or_lo_zero = hi_not_max * limb_0_u32;
yield_constr.constraint(hi_not_max_or_lo_zero);
// Return the combined value.
limb_0_u32 + limb_1_u32 * F::from_canonical_u64(1 << 32)
}
/// Adds constraints to require that a pair of `u32`s, in little-endian order, represent a canonical
/// field element, i.e. that their combined value is less than `|F|`. Returns their combined value.
pub(crate) fn combine_u32s_check_canonical_circuit<F: RichField + Extendable<D>, const D: usize>(
builder: &mut CircuitBuilder<F, D>,
limb_0_u32: ExtensionTarget<D>,
limb_1_u32: ExtensionTarget<D>,
inverse: ExtensionTarget<D>,
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
) -> ExtensionTarget<D> {
let one = builder.one_extension();
let u32_max = builder.constant_extension(F::Extension::from_canonical_u32(u32::MAX));
// This is zero if and only if the high limb is `u32::MAX`.
let diff = builder.sub_extension(u32_max, limb_1_u32);
// If this is zero, the diff is invertible, so the high limb is not `u32::MAX`.
let hi_not_max = builder.mul_sub_extension(inverse, diff, one);
// If this is zero, either the high limb is not `u32::MAX`, or the low limb is zero.
let hi_not_max_or_lo_zero = builder.mul_extension(hi_not_max, limb_0_u32);
yield_constr.constraint(builder, hi_not_max_or_lo_zero);
// Return the combined value.
builder.mul_const_add_extension(F::from_canonical_u64(1 << 32), limb_1_u32, limb_0_u32)
}

View File

@ -6,7 +6,7 @@ use plonky2::iop::ext_target::ExtensionTarget;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::registers::arithmetic::*;
use crate::registers::alu::*;
use crate::registers::NUM_COLUMNS;
pub(crate) fn generate_division<F: PrimeField64>(values: &mut [F; NUM_COLUMNS]) {

View File

@ -7,54 +7,53 @@ use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsume
use starky::vars::StarkEvaluationTargets;
use starky::vars::StarkEvaluationVars;
use crate::arithmetic::addition::{eval_addition, eval_addition_recursively, generate_addition};
use crate::arithmetic::division::{eval_division, eval_division_recursively, generate_division};
use crate::arithmetic::multiplication::{
eval_multiplication, eval_multiplication_recursively, generate_multiplication,
};
use crate::arithmetic::subtraction::{
use crate::alu::addition::{eval_addition, eval_addition_recursively, generate_addition};
use crate::alu::division::{eval_division, eval_division_recursively, generate_division};
use crate::alu::mul_add::{eval_mul_add, eval_mul_add_recursively, generate_mul_add};
use crate::alu::subtraction::{
eval_subtraction, eval_subtraction_recursively, generate_subtraction,
};
use crate::public_input_layout::NUM_PUBLIC_INPUTS;
use crate::registers::arithmetic::*;
use crate::registers::alu::*;
use crate::registers::NUM_COLUMNS;
mod addition;
mod canonical;
mod division;
mod multiplication;
mod mul_add;
mod subtraction;
pub(crate) fn generate_arithmetic_unit<F: PrimeField64>(values: &mut [F; NUM_COLUMNS]) {
pub(crate) fn generate_alu<F: PrimeField64>(values: &mut [F; NUM_COLUMNS]) {
if values[IS_ADD].is_one() {
generate_addition(values);
} else if values[IS_SUB].is_one() {
generate_subtraction(values);
} else if values[IS_MUL].is_one() {
generate_multiplication(values);
} else if values[IS_MUL_ADD].is_one() {
generate_mul_add(values);
} else if values[IS_DIV].is_one() {
generate_division(values);
}
}
pub(crate) fn eval_arithmetic_unit<F: Field, P: PackedField<Scalar = F>>(
pub(crate) fn eval_alu<F: Field, P: PackedField<Scalar = F>>(
vars: StarkEvaluationVars<F, P, NUM_COLUMNS, NUM_PUBLIC_INPUTS>,
yield_constr: &mut ConstraintConsumer<P>,
) {
let local_values = &vars.local_values;
// Check that the operation flag values are binary.
for col in [IS_ADD, IS_SUB, IS_MUL, IS_DIV] {
for col in [IS_ADD, IS_SUB, IS_MUL_ADD, IS_DIV] {
let val = local_values[col];
yield_constr.constraint_wrapping(val * val - val);
yield_constr.constraint(val * val - val);
}
eval_addition(local_values, yield_constr);
eval_subtraction(local_values, yield_constr);
eval_multiplication(local_values, yield_constr);
eval_mul_add(local_values, yield_constr);
eval_division(local_values, yield_constr);
}
pub(crate) fn eval_arithmetic_unit_recursively<F: RichField + Extendable<D>, const D: usize>(
pub(crate) fn eval_alu_recursively<F: RichField + Extendable<D>, const D: usize>(
builder: &mut CircuitBuilder<F, D>,
vars: StarkEvaluationTargets<D, NUM_COLUMNS, NUM_PUBLIC_INPUTS>,
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
@ -62,14 +61,14 @@ pub(crate) fn eval_arithmetic_unit_recursively<F: RichField + Extendable<D>, con
let local_values = &vars.local_values;
// Check that the operation flag values are binary.
for col in [IS_ADD, IS_SUB, IS_MUL, IS_DIV] {
for col in [IS_ADD, IS_SUB, IS_MUL_ADD, IS_DIV] {
let val = local_values[col];
let constraint = builder.mul_sub_extension(val, val, val);
yield_constr.constraint_wrapping(builder, constraint);
yield_constr.constraint(builder, constraint);
}
eval_addition_recursively(builder, local_values, yield_constr);
eval_subtraction_recursively(builder, local_values, yield_constr);
eval_multiplication_recursively(builder, local_values, yield_constr);
eval_mul_add_recursively(builder, local_values, yield_constr);
eval_division_recursively(builder, local_values, yield_constr);
}

View File

@ -0,0 +1,91 @@
use plonky2::field::extension_field::Extendable;
use plonky2::field::field_types::{Field, PrimeField64};
use plonky2::field::packed_field::PackedField;
use plonky2::hash::hash_types::RichField;
use plonky2::iop::ext_target::ExtensionTarget;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use plonky2_util::assume;
use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::alu::canonical::*;
use crate::registers::alu::*;
use crate::registers::NUM_COLUMNS;
pub(crate) fn generate_mul_add<F: PrimeField64>(values: &mut [F; NUM_COLUMNS]) {
let factor_0 = values[COL_MUL_ADD_FACTOR_0].to_canonical_u64();
let factor_1 = values[COL_MUL_ADD_FACTOR_1].to_canonical_u64();
let addend = values[COL_MUL_ADD_ADDEND].to_canonical_u64();
// Let the compiler know that each input must fit in 32 bits.
assume(factor_0 <= u32::MAX as u64);
assume(factor_1 <= u32::MAX as u64);
assume(addend <= u32::MAX as u64);
let output = factor_0 * factor_1 + addend;
// An advice value used to help verify that the limbs represent a canonical field element.
values[COL_MUL_ADD_RESULT_CANONICAL_INV] = compute_canonical_inv(output);
values[COL_MUL_ADD_OUTPUT_0] = F::from_canonical_u16(output as u16);
values[COL_MUL_ADD_OUTPUT_1] = F::from_canonical_u16((output >> 16) as u16);
values[COL_MUL_ADD_OUTPUT_2] = F::from_canonical_u16((output >> 32) as u16);
values[COL_MUL_ADD_OUTPUT_3] = F::from_canonical_u16((output >> 48) as u16);
}
pub(crate) fn eval_mul_add<F: Field, P: PackedField<Scalar = F>>(
local_values: &[P; NUM_COLUMNS],
yield_constr: &mut ConstraintConsumer<P>,
) {
let is_mul = local_values[IS_MUL_ADD];
let factor_0 = local_values[COL_MUL_ADD_FACTOR_0];
let factor_1 = local_values[COL_MUL_ADD_FACTOR_1];
let addend = local_values[COL_MUL_ADD_ADDEND];
let output_1 = local_values[COL_MUL_ADD_OUTPUT_0];
let output_2 = local_values[COL_MUL_ADD_OUTPUT_1];
let output_3 = local_values[COL_MUL_ADD_OUTPUT_2];
let output_4 = local_values[COL_MUL_ADD_OUTPUT_3];
let result_canonical_inv = local_values[COL_MUL_ADD_RESULT_CANONICAL_INV];
let computed_output = factor_0 * factor_1 + addend;
// TODO: Needs to be filtered by IS_MUL_ADD.
let output = combine_u16s_check_canonical(
output_1,
output_2,
output_3,
output_4,
result_canonical_inv,
yield_constr,
);
yield_constr.constraint(is_mul * (computed_output - output));
}
pub(crate) fn eval_mul_add_recursively<F: RichField + Extendable<D>, const D: usize>(
builder: &mut CircuitBuilder<F, D>,
local_values: &[ExtensionTarget<D>; NUM_COLUMNS],
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
) {
let is_mul = local_values[IS_MUL_ADD];
let factor_0 = local_values[COL_MUL_ADD_FACTOR_0];
let factor_1 = local_values[COL_MUL_ADD_FACTOR_1];
let addend = local_values[COL_MUL_ADD_ADDEND];
let output_1 = local_values[COL_MUL_ADD_OUTPUT_0];
let output_2 = local_values[COL_MUL_ADD_OUTPUT_1];
let output_3 = local_values[COL_MUL_ADD_OUTPUT_2];
let output_4 = local_values[COL_MUL_ADD_OUTPUT_3];
let result_canonical_inv = local_values[COL_MUL_ADD_RESULT_CANONICAL_INV];
let computed_output = builder.mul_add_extension(factor_0, factor_1, addend);
// TODO: Needs to be filtered by IS_MUL_ADD.
let output = combine_u16s_check_canonical_circuit(
builder,
output_1,
output_2,
output_3,
output_4,
result_canonical_inv,
yield_constr,
);
let diff = builder.sub_extension(computed_output, output);
let filtered_diff = builder.mul_extension(is_mul, diff);
yield_constr.constraint(builder, diff);
}

View File

@ -0,0 +1,78 @@
use plonky2::field::extension_field::Extendable;
use plonky2::field::field_types::{Field, PrimeField64};
use plonky2::field::packed_field::PackedField;
use plonky2::hash::hash_types::RichField;
use plonky2::iop::ext_target::ExtensionTarget;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::registers::alu::*;
use crate::registers::NUM_COLUMNS;
pub(crate) fn generate_subtraction<F: PrimeField64>(values: &mut [F; NUM_COLUMNS]) {
let in_1 = values[COL_SUB_INPUT_0].to_canonical_u64() as u32;
let in_2 = values[COL_SUB_INPUT_1].to_canonical_u64() as u32;
// in_1 - in_2 == diff - br*2^32
let (diff, br) = in_1.overflowing_sub(in_2);
let diff_1 = F::from_canonical_u16(diff as u16);
let diff_2 = F::from_canonical_u16((diff >> 16) as u16);
values[COL_SUB_OUTPUT_0] = F::from_canonical_u16(diff as u16);
values[COL_SUB_OUTPUT_1] = F::from_canonical_u16((diff >> 16) as u16);
values[COL_SUB_OUTPUT_BORROW] = F::from_canonical_u16(br as u16);
}
pub(crate) fn eval_subtraction<F: Field, P: PackedField<Scalar = F>>(
local_values: &[P; NUM_COLUMNS],
yield_constr: &mut ConstraintConsumer<P>,
) {
let is_sub = local_values[IS_SUB];
let in_1 = local_values[COL_SUB_INPUT_0];
let in_2 = local_values[COL_SUB_INPUT_1];
let out_1 = local_values[COL_SUB_OUTPUT_0];
let out_2 = local_values[COL_SUB_OUTPUT_1];
let out_br = local_values[COL_SUB_OUTPUT_BORROW];
let base = F::from_canonical_u64(1 << 16);
let base_sqr = F::from_canonical_u64(1 << 32);
let out_br = out_br * base_sqr;
let lhs = (out_br + in_1) - in_2;
let rhs = out_1 + out_2 * base;
yield_constr.constraint(is_sub * (lhs - rhs));
// We don't need to check that out_br is in {0, 1} because it's
// checked by boolean::col_bit(0) in the ALU.
}
pub(crate) fn eval_subtraction_recursively<F: RichField + Extendable<D>, const D: usize>(
builder: &mut CircuitBuilder<F, D>,
local_values: &[ExtensionTarget<D>; NUM_COLUMNS],
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
) {
let is_sub = local_values[IS_SUB];
let in_1 = local_values[COL_SUB_INPUT_0];
let in_2 = local_values[COL_SUB_INPUT_1];
let out_1 = local_values[COL_SUB_OUTPUT_0];
let out_2 = local_values[COL_SUB_OUTPUT_1];
let out_br = local_values[COL_SUB_OUTPUT_BORROW];
let base = builder.constant_extension(F::Extension::from_canonical_u64(1 << 16));
let base_sqr = builder.constant_extension(F::Extension::from_canonical_u64(1 << 32));
// lhs = (out_br + in_1) - in_2
let lhs = builder.add_extension(out_br, in_1);
let lhs = builder.sub_extension(lhs, in_2);
// rhs = out_1 + base * out_2
let rhs = builder.mul_add_extension(out_2, base, out_1);
// filtered_diff = is_sub * (lhs - rhs)
let diff = builder.sub_extension(lhs, rhs);
let filtered_diff = builder.mul_extension(is_sub, diff);
yield_constr.constraint(builder, filtered_diff);
}

View File

@ -1,31 +0,0 @@
use plonky2::field::extension_field::Extendable;
use plonky2::field::field_types::{Field, PrimeField64};
use plonky2::field::packed_field::PackedField;
use plonky2::hash::hash_types::RichField;
use plonky2::iop::ext_target::ExtensionTarget;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::registers::arithmetic::*;
use crate::registers::NUM_COLUMNS;
pub(crate) fn generate_multiplication<F: PrimeField64>(values: &mut [F; NUM_COLUMNS]) {
// TODO
}
pub(crate) fn eval_multiplication<F: Field, P: PackedField<Scalar = F>>(
local_values: &[P; NUM_COLUMNS],
yield_constr: &mut ConstraintConsumer<P>,
) {
let is_mul = local_values[IS_MUL];
// TODO
}
pub(crate) fn eval_multiplication_recursively<F: RichField + Extendable<D>, const D: usize>(
builder: &mut CircuitBuilder<F, D>,
local_values: &[ExtensionTarget<D>; NUM_COLUMNS],
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
) {
let is_mul = local_values[IS_MUL];
// TODO
}

View File

@ -1,31 +0,0 @@
use plonky2::field::extension_field::Extendable;
use plonky2::field::field_types::{Field, PrimeField64};
use plonky2::field::packed_field::PackedField;
use plonky2::hash::hash_types::RichField;
use plonky2::iop::ext_target::ExtensionTarget;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use crate::registers::arithmetic::*;
use crate::registers::NUM_COLUMNS;
pub(crate) fn generate_subtraction<F: PrimeField64>(values: &mut [F; NUM_COLUMNS]) {
// TODO
}
pub(crate) fn eval_subtraction<F: Field, P: PackedField<Scalar = F>>(
local_values: &[P; NUM_COLUMNS],
yield_constr: &mut ConstraintConsumer<P>,
) {
let is_sub = local_values[IS_SUB];
// TODO
}
pub(crate) fn eval_subtraction_recursively<F: RichField + Extendable<D>, const D: usize>(
builder: &mut CircuitBuilder<F, D>,
local_values: &[ExtensionTarget<D>; NUM_COLUMNS],
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
) {
let is_sub = local_values[IS_SUB];
// TODO
}

View File

@ -49,7 +49,7 @@ pub(crate) fn eval_core_registers<F: Field, P: PackedField<Scalar = F>>(
let next_clock = vars.next_values[COL_CLOCK];
let delta_clock = next_clock - local_clock;
yield_constr.constraint_first_row(local_clock);
yield_constr.constraint(delta_clock - F::ONE);
yield_constr.constraint_transition(delta_clock - F::ONE);
// The 16-bit table must start with 0, end with 2^16 - 1, and increment by 0 or 1.
let local_range_16 = vars.local_values[COL_RANGE_16];
@ -57,7 +57,7 @@ pub(crate) fn eval_core_registers<F: Field, P: PackedField<Scalar = F>>(
let delta_range_16 = next_range_16 - local_range_16;
yield_constr.constraint_first_row(local_range_16);
yield_constr.constraint_last_row(local_range_16 - F::from_canonical_u64((1 << 16) - 1));
yield_constr.constraint(delta_range_16 * delta_range_16 - delta_range_16);
yield_constr.constraint_transition(delta_range_16 * delta_range_16 - delta_range_16);
// TODO constraints for stack etc.
}
@ -77,7 +77,7 @@ pub(crate) fn eval_core_registers_recursively<F: RichField + Extendable<D>, cons
let delta_clock = builder.sub_extension(next_clock, local_clock);
yield_constr.constraint_first_row(builder, local_clock);
let constraint = builder.sub_extension(delta_clock, one_ext);
yield_constr.constraint(builder, constraint);
yield_constr.constraint_transition(builder, constraint);
// The 16-bit table must start with 0, end with 2^16 - 1, and increment by 0 or 1.
let local_range_16 = vars.local_values[COL_RANGE_16];
@ -87,7 +87,7 @@ pub(crate) fn eval_core_registers_recursively<F: RichField + Extendable<D>, cons
let constraint = builder.sub_extension(local_range_16, max_u16_ext);
yield_constr.constraint_last_row(builder, constraint);
let constraint = builder.mul_add_extension(delta_range_16, delta_range_16, delta_range_16);
yield_constr.constraint(builder, constraint);
yield_constr.constraint_transition(builder, constraint);
// TODO constraints for stack etc.
}

View File

@ -2,8 +2,9 @@
#![allow(dead_code)]
#![allow(unused_variables)]
mod arithmetic;
mod alu;
mod core_registers;
pub mod lookup;
mod memory;
mod permutation_unit;
mod public_input_layout;

147
system_zero/src/lookup.rs Normal file
View File

@ -0,0 +1,147 @@
//! Implementation of the Halo2 lookup argument.
//!
//! References:
//! - https://zcash.github.io/halo2/design/proving-system/lookup.html
//! - https://www.youtube.com/watch?v=YlTt12s7vGE&t=5237s
use std::cmp::Ordering;
use itertools::Itertools;
use plonky2::field::extension_field::Extendable;
use plonky2::field::field_types::{Field, PrimeField64};
use plonky2::field::packed_field::PackedField;
use plonky2::hash::hash_types::RichField;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use starky::vars::StarkEvaluationTargets;
use starky::vars::StarkEvaluationVars;
use crate::public_input_layout::NUM_PUBLIC_INPUTS;
use crate::registers::lookup::*;
use crate::registers::NUM_COLUMNS;
pub(crate) fn generate_lookups<F: PrimeField64>(trace_cols: &mut [Vec<F>]) {
for i in 0..NUM_LOOKUPS {
let inputs = &trace_cols[col_input(i)];
let table = &trace_cols[col_table(i)];
let (permuted_inputs, permuted_table) = permuted_cols(inputs, table);
trace_cols[col_permuted_input(i)] = permuted_inputs;
trace_cols[col_permuted_table(i)] = permuted_table;
}
}
/// Given an input column and a table column, generate the permuted input and permuted table columns
/// used in the Halo2 permutation argument.
pub fn permuted_cols<F: PrimeField64>(inputs: &[F], table: &[F]) -> (Vec<F>, Vec<F>) {
let n = inputs.len();
// The permuted inputs do not have to be ordered, but we found that sorting was faster than
// hash-based grouping. We also sort the table, as this helps us identify "unused" table
// elements efficiently.
// To compare elements, e.g. for sorting, we first need them in canonical form. It would be
// wasteful to canonicalize in each comparison, as a single element may be involved in many
// comparisons. So we will canonicalize once upfront, then use `to_noncanonical_u64` when
// comparing elements.
let sorted_inputs = inputs
.iter()
.map(|x| x.to_canonical())
.sorted_unstable_by_key(|x| x.to_noncanonical_u64())
.collect_vec();
let sorted_table = table
.iter()
.map(|x| x.to_canonical())
.sorted_unstable_by_key(|x| x.to_noncanonical_u64())
.collect_vec();
let mut unused_table_inds = Vec::with_capacity(n);
let mut unused_table_vals = Vec::with_capacity(n);
let mut permuted_table = vec![F::ZERO; n];
let mut i = 0;
let mut j = 0;
while (j < n) && (i < n) {
let input_val = sorted_inputs[i].to_noncanonical_u64();
let table_val = sorted_table[j].to_noncanonical_u64();
match input_val.cmp(&table_val) {
Ordering::Greater => {
unused_table_vals.push(sorted_table[j]);
j += 1;
}
Ordering::Less => {
if let Some(x) = unused_table_vals.pop() {
permuted_table[i] = x;
} else {
unused_table_inds.push(i);
}
i += 1;
}
Ordering::Equal => {
permuted_table[i] = sorted_table[j];
i += 1;
j += 1;
}
}
}
#[allow(clippy::needless_range_loop)] // indexing is just more natural here
for jj in j..n {
unused_table_vals.push(sorted_table[jj]);
}
for ii in i..n {
unused_table_inds.push(ii);
}
for (ind, val) in unused_table_inds.into_iter().zip_eq(unused_table_vals) {
permuted_table[ind] = val;
}
(sorted_inputs, permuted_table)
}
pub(crate) fn eval_lookups<F: Field, P: PackedField<Scalar = F>>(
vars: StarkEvaluationVars<F, P, NUM_COLUMNS, NUM_PUBLIC_INPUTS>,
yield_constr: &mut ConstraintConsumer<P>,
) {
for i in 0..NUM_LOOKUPS {
let local_perm_input = vars.local_values[col_permuted_input(i)];
let next_perm_table = vars.next_values[col_permuted_table(i)];
let next_perm_input = vars.next_values[col_permuted_input(i)];
// A "vertical" diff between the local and next permuted inputs.
let diff_input_prev = next_perm_input - local_perm_input;
// A "horizontal" diff between the next permuted input and permuted table value.
let diff_input_table = next_perm_input - next_perm_table;
yield_constr.constraint(diff_input_prev * diff_input_table);
// This is actually constraining the first row, as per the spec, since `diff_input_table`
// is a diff of the next row's values. In the context of `constraint_last_row`, the next
// row is the first row.
yield_constr.constraint_last_row(diff_input_table);
}
}
pub(crate) fn eval_lookups_recursively<F: RichField + Extendable<D>, const D: usize>(
builder: &mut CircuitBuilder<F, D>,
vars: StarkEvaluationTargets<D, NUM_COLUMNS, NUM_PUBLIC_INPUTS>,
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
) {
for i in 0..NUM_LOOKUPS {
let local_perm_input = vars.local_values[col_permuted_input(i)];
let next_perm_table = vars.next_values[col_permuted_table(i)];
let next_perm_input = vars.next_values[col_permuted_input(i)];
// A "vertical" diff between the local and next permuted inputs.
let diff_input_prev = builder.sub_extension(next_perm_input, local_perm_input);
// A "horizontal" diff between the next permuted input and permuted table value.
let diff_input_table = builder.sub_extension(next_perm_input, next_perm_table);
let diff_product = builder.mul_extension(diff_input_prev, diff_input_table);
yield_constr.constraint(builder, diff_product);
// This is actually constraining the first row, as per the spec, since `diff_input_table`
// is a diff of the next row's values. In the context of `constraint_last_row`, the next
// row is the first row.
yield_constr.constraint_last_row(builder, diff_input_table);
}
}

View File

@ -127,8 +127,7 @@ pub(crate) fn eval_permutation_unit<F, FE, P, const D: usize>(
for i in 0..SPONGE_WIDTH {
let state_cubed = state[i] * state[i].square();
yield_constr
.constraint_wrapping(state_cubed - local_values[col_full_first_mid_sbox(r, i)]);
yield_constr.constraint(state_cubed - local_values[col_full_first_mid_sbox(r, i)]);
let state_cubed = local_values[col_full_first_mid_sbox(r, i)];
state[i] *= state_cubed.square(); // Form state ** 7.
}
@ -136,8 +135,7 @@ pub(crate) fn eval_permutation_unit<F, FE, P, const D: usize>(
state = mds_layer(state);
for i in 0..SPONGE_WIDTH {
yield_constr
.constraint_wrapping(state[i] - local_values[col_full_first_after_mds(r, i)]);
yield_constr.constraint(state[i] - local_values[col_full_first_after_mds(r, i)]);
state[i] = local_values[col_full_first_after_mds(r, i)];
}
}
@ -146,10 +144,10 @@ pub(crate) fn eval_permutation_unit<F, FE, P, const D: usize>(
state = constant_layer(state, HALF_N_FULL_ROUNDS + r);
let state0_cubed = state[0] * state[0].square();
yield_constr.constraint_wrapping(state0_cubed - local_values[col_partial_mid_sbox(r)]);
yield_constr.constraint(state0_cubed - local_values[col_partial_mid_sbox(r)]);
let state0_cubed = local_values[col_partial_mid_sbox(r)];
state[0] *= state0_cubed.square(); // Form state ** 7.
yield_constr.constraint_wrapping(state[0] - local_values[col_partial_after_sbox(r)]);
yield_constr.constraint(state[0] - local_values[col_partial_after_sbox(r)]);
state[0] = local_values[col_partial_after_sbox(r)];
state = mds_layer(state);
@ -160,8 +158,7 @@ pub(crate) fn eval_permutation_unit<F, FE, P, const D: usize>(
for i in 0..SPONGE_WIDTH {
let state_cubed = state[i] * state[i].square();
yield_constr
.constraint_wrapping(state_cubed - local_values[col_full_second_mid_sbox(r, i)]);
yield_constr.constraint(state_cubed - local_values[col_full_second_mid_sbox(r, i)]);
let state_cubed = local_values[col_full_second_mid_sbox(r, i)];
state[i] *= state_cubed.square(); // Form state ** 7.
}
@ -169,8 +166,7 @@ pub(crate) fn eval_permutation_unit<F, FE, P, const D: usize>(
state = mds_layer(state);
for i in 0..SPONGE_WIDTH {
yield_constr
.constraint_wrapping(state[i] - local_values[col_full_second_after_mds(r, i)]);
yield_constr.constraint(state[i] - local_values[col_full_second_after_mds(r, i)]);
state[i] = local_values[col_full_second_after_mds(r, i)];
}
}
@ -197,7 +193,7 @@ pub(crate) fn eval_permutation_unit_recursively<F: RichField + Extendable<D>, co
let state_cubed = builder.cube_extension(state[i]);
let diff =
builder.sub_extension(state_cubed, local_values[col_full_first_mid_sbox(r, i)]);
yield_constr.constraint_wrapping(builder, diff);
yield_constr.constraint(builder, diff);
let state_cubed = local_values[col_full_first_mid_sbox(r, i)];
state[i] = builder.mul_many_extension(&[state[i], state_cubed, state_cubed]);
// Form state ** 7.
@ -208,7 +204,7 @@ pub(crate) fn eval_permutation_unit_recursively<F: RichField + Extendable<D>, co
for i in 0..SPONGE_WIDTH {
let diff =
builder.sub_extension(state[i], local_values[col_full_first_after_mds(r, i)]);
yield_constr.constraint_wrapping(builder, diff);
yield_constr.constraint(builder, diff);
state[i] = local_values[col_full_first_after_mds(r, i)];
}
}
@ -218,11 +214,11 @@ pub(crate) fn eval_permutation_unit_recursively<F: RichField + Extendable<D>, co
let state0_cubed = builder.cube_extension(state[0]);
let diff = builder.sub_extension(state0_cubed, local_values[col_partial_mid_sbox(r)]);
yield_constr.constraint_wrapping(builder, diff);
yield_constr.constraint(builder, diff);
let state0_cubed = local_values[col_partial_mid_sbox(r)];
state[0] = builder.mul_many_extension(&[state[0], state0_cubed, state0_cubed]); // Form state ** 7.
let diff = builder.sub_extension(state[0], local_values[col_partial_after_sbox(r)]);
yield_constr.constraint_wrapping(builder, diff);
yield_constr.constraint(builder, diff);
state[0] = local_values[col_partial_after_sbox(r)];
state = F::mds_layer_recursive(builder, &state);
@ -239,7 +235,7 @@ pub(crate) fn eval_permutation_unit_recursively<F: RichField + Extendable<D>, co
let state_cubed = builder.cube_extension(state[i]);
let diff =
builder.sub_extension(state_cubed, local_values[col_full_second_mid_sbox(r, i)]);
yield_constr.constraint_wrapping(builder, diff);
yield_constr.constraint(builder, diff);
let state_cubed = local_values[col_full_second_mid_sbox(r, i)];
state[i] = builder.mul_many_extension(&[state[i], state_cubed, state_cubed]);
// Form state ** 7.
@ -250,7 +246,7 @@ pub(crate) fn eval_permutation_unit_recursively<F: RichField + Extendable<D>, co
for i in 0..SPONGE_WIDTH {
let diff =
builder.sub_extension(state[i], local_values[col_full_second_after_mds(r, i)]);
yield_constr.constraint_wrapping(builder, diff);
yield_constr.constraint(builder, diff);
state[i] = local_values[col_full_second_after_mds(r, i)];
}
}

View File

@ -0,0 +1,69 @@
//! Arithmetic and logic unit.
pub(crate) const IS_ADD: usize = super::START_ALU;
pub(crate) const IS_SUB: usize = IS_ADD + 1;
pub(crate) const IS_MUL_ADD: usize = IS_SUB + 1;
pub(crate) const IS_DIV: usize = IS_MUL_ADD + 1;
const START_SHARED_COLS: usize = IS_DIV + 1;
/// Within the ALU, there are shared columns which can be used by any arithmetic/logic
/// circuit, depending on which one is active this cycle.
// Can be increased as needed as other operations are implemented.
const NUM_SHARED_COLS: usize = 4;
const fn shared_col(i: usize) -> usize {
debug_assert!(i < NUM_SHARED_COLS);
START_SHARED_COLS + i
}
/// The first value to be added; treated as an unsigned u32.
pub(crate) const COL_ADD_INPUT_0: usize = shared_col(0);
/// The second value to be added; treated as an unsigned u32.
pub(crate) const COL_ADD_INPUT_1: usize = shared_col(1);
/// The third value to be added; treated as an unsigned u32.
pub(crate) const COL_ADD_INPUT_2: usize = shared_col(2);
// Note: Addition outputs three 16-bit chunks, and since these values need to be range-checked
// anyway, we might as well use the range check unit's columns as our addition outputs. So the
// three proceeding columns are basically aliases, not columns owned by the ALU.
/// The first 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_ADD_OUTPUT_0: usize = super::range_check_16::col_rc_16_input(0);
/// The second 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_ADD_OUTPUT_1: usize = super::range_check_16::col_rc_16_input(1);
/// The third 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_ADD_OUTPUT_2: usize = super::range_check_16::col_rc_16_input(2);
/// Inputs for subtraction; the second value is subtracted from the
/// first; inputs treated as an unsigned u32.
pub(crate) const COL_SUB_INPUT_0: usize = shared_col(0);
pub(crate) const COL_SUB_INPUT_1: usize = shared_col(1);
/// The first 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_SUB_OUTPUT_0: usize = super::range_check_16::col_rc_16_input(0);
/// The second 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_SUB_OUTPUT_1: usize = super::range_check_16::col_rc_16_input(1);
/// The borrow output
pub(crate) const COL_SUB_OUTPUT_BORROW: usize = super::boolean::col_bit(0);
/// The first value to be multiplied; treated as an unsigned u32.
pub(crate) const COL_MUL_ADD_FACTOR_0: usize = shared_col(0);
/// The second value to be multiplied; treated as an unsigned u32.
pub(crate) const COL_MUL_ADD_FACTOR_1: usize = shared_col(1);
/// The value to be added to the product; treated as an unsigned u32.
pub(crate) const COL_MUL_ADD_ADDEND: usize = shared_col(2);
/// The inverse of `u32::MAX - result_hi`, where `output_hi` is the high 32-bits of the result.
/// See https://hackmd.io/NC-yRmmtRQSvToTHb96e8Q#Checking-element-validity
pub(crate) const COL_MUL_ADD_RESULT_CANONICAL_INV: usize = shared_col(3);
/// The first 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_MUL_ADD_OUTPUT_0: usize = super::range_check_16::col_rc_16_input(0);
/// The second 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_MUL_ADD_OUTPUT_1: usize = super::range_check_16::col_rc_16_input(1);
/// The third 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_MUL_ADD_OUTPUT_2: usize = super::range_check_16::col_rc_16_input(2);
/// The fourth 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_MUL_ADD_OUTPUT_3: usize = super::range_check_16::col_rc_16_input(3);
pub(super) const END: usize = super::START_ALU + NUM_SHARED_COLS;

View File

@ -1,37 +0,0 @@
//! Arithmetic unit.
pub(crate) const IS_ADD: usize = super::START_ARITHMETIC;
pub(crate) const IS_SUB: usize = IS_ADD + 1;
pub(crate) const IS_MUL: usize = IS_SUB + 1;
pub(crate) const IS_DIV: usize = IS_MUL + 1;
const START_SHARED_COLS: usize = IS_DIV + 1;
/// Within the arithmetic unit, there are shared columns which can be used by any arithmetic
/// circuit, depending on which one is active this cycle.
// Can be increased as needed as other operations are implemented.
const NUM_SHARED_COLS: usize = 3;
const fn shared_col(i: usize) -> usize {
debug_assert!(i < NUM_SHARED_COLS);
START_SHARED_COLS + i
}
/// The first value to be added; treated as an unsigned u32.
pub(crate) const COL_ADD_INPUT_1: usize = shared_col(0);
/// The second value to be added; treated as an unsigned u32.
pub(crate) const COL_ADD_INPUT_2: usize = shared_col(1);
/// The third value to be added; treated as an unsigned u32.
pub(crate) const COL_ADD_INPUT_3: usize = shared_col(2);
// Note: Addition outputs three 16-bit chunks, and since these values need to be range-checked
// anyway, we might as well use the range check unit's columns as our addition outputs. So the
// three proceeding columns are basically aliases, not columns owned by the arithmetic unit.
/// The first 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_ADD_OUTPUT_1: usize = super::range_check_16::col_rc_16_input(0);
/// The second 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_ADD_OUTPUT_2: usize = super::range_check_16::col_rc_16_input(1);
/// The third 16-bit chunk of the output, based on little-endian ordering.
pub(crate) const COL_ADD_OUTPUT_3: usize = super::range_check_16::col_rc_16_input(2);
pub(super) const END: usize = super::START_ARITHMETIC + NUM_SHARED_COLS;

View File

@ -3,19 +3,35 @@
const START_UNIT: usize = super::START_LOOKUP;
const NUM_LOOKUPS: usize =
pub(crate) const NUM_LOOKUPS: usize =
super::range_check_16::NUM_RANGE_CHECKS + super::range_check_degree::NUM_RANGE_CHECKS;
pub(crate) const fn col_input(i: usize) -> usize {
if i < super::range_check_16::NUM_RANGE_CHECKS {
super::range_check_16::col_rc_16_input(i)
} else {
super::range_check_degree::col_rc_degree_input(i - super::range_check_16::NUM_RANGE_CHECKS)
}
}
/// This column contains a permutation of the input values.
const fn col_permuted_input(i: usize) -> usize {
pub(crate) const fn col_permuted_input(i: usize) -> usize {
debug_assert!(i < NUM_LOOKUPS);
START_UNIT + 2 * i
}
pub(crate) const fn col_table(i: usize) -> usize {
if i < super::range_check_16::NUM_RANGE_CHECKS {
super::core::COL_RANGE_16
} else {
super::core::COL_CLOCK
}
}
/// This column contains a permutation of the table values.
const fn col_permuted_table(i: usize) -> usize {
pub(crate) const fn col_permuted_table(i: usize) -> usize {
debug_assert!(i < NUM_LOOKUPS);
START_UNIT + 2 * i + 1
}
pub(super) const END: usize = START_UNIT + NUM_LOOKUPS;
pub(super) const END: usize = START_UNIT + NUM_LOOKUPS * 2;

View File

@ -1,4 +1,4 @@
pub(crate) mod arithmetic;
pub(crate) mod alu;
pub(crate) mod boolean;
pub(crate) mod core;
pub(crate) mod logic;
@ -8,8 +8,8 @@ pub(crate) mod permutation;
pub(crate) mod range_check_16;
pub(crate) mod range_check_degree;
const START_ARITHMETIC: usize = 0;
const START_BOOLEAN: usize = arithmetic::END;
const START_ALU: usize = 0;
const START_BOOLEAN: usize = alu::END;
const START_CORE: usize = boolean::END;
const START_LOGIC: usize = core::END;
const START_LOOKUP: usize = logic::END;

View File

@ -1,6 +1,6 @@
//! Range check unit which checks that values are in `[0, 2^16)`.
pub(super) const NUM_RANGE_CHECKS: usize = 5;
pub(crate) const NUM_RANGE_CHECKS: usize = 5;
/// The input of the `i`th range check, i.e. the value being range checked.
pub(crate) const fn col_rc_16_input(i: usize) -> usize {

View File

@ -1,6 +1,6 @@
//! Range check unit which checks that values are in `[0, degree)`.
pub(super) const NUM_RANGE_CHECKS: usize = 5;
pub(crate) const NUM_RANGE_CHECKS: usize = 5;
/// The input of the `i`th range check, i.e. the value being range checked.
pub(crate) const fn col_rc_degree_input(i: usize) -> usize {

View File

@ -2,27 +2,30 @@ use std::marker::PhantomData;
use plonky2::field::extension_field::{Extendable, FieldExtension};
use plonky2::field::packed_field::PackedField;
use plonky2::field::polynomial::PolynomialValues;
use plonky2::hash::hash_types::RichField;
use plonky2::plonk::circuit_builder::CircuitBuilder;
use plonky2::timed;
use plonky2::util::timing::TimingTree;
use plonky2::util::transpose;
use starky::constraint_consumer::{ConstraintConsumer, RecursiveConstraintConsumer};
use starky::permutation::PermutationPair;
use starky::stark::Stark;
use starky::vars::StarkEvaluationTargets;
use starky::vars::StarkEvaluationVars;
use crate::arithmetic::{
eval_arithmetic_unit, eval_arithmetic_unit_recursively, generate_arithmetic_unit,
};
use crate::alu::{eval_alu, eval_alu_recursively, generate_alu};
use crate::core_registers::{
eval_core_registers, eval_core_registers_recursively, generate_first_row_core_registers,
generate_next_row_core_registers,
};
use crate::lookup::{eval_lookups, eval_lookups_recursively, generate_lookups};
use crate::memory::TransactionMemory;
use crate::permutation_unit::{
eval_permutation_unit, eval_permutation_unit_recursively, generate_permutation_unit,
};
use crate::public_input_layout::NUM_PUBLIC_INPUTS;
use crate::registers::NUM_COLUMNS;
use crate::registers::{lookup, NUM_COLUMNS};
/// We require at least 2^16 rows as it helps support efficient 16-bit range checks.
const MIN_TRACE_ROWS: usize = 1 << 16;
@ -33,12 +36,14 @@ pub struct SystemZero<F: RichField + Extendable<D>, const D: usize> {
}
impl<F: RichField + Extendable<D>, const D: usize> SystemZero<F, D> {
fn generate_trace(&self) -> Vec<[F; NUM_COLUMNS]> {
/// Generate the rows of the trace. Note that this does not generate the permuted columns used
/// in our lookup arguments, as those are computed after transposing to column-wise form.
fn generate_trace_rows(&self) -> Vec<[F; NUM_COLUMNS]> {
let memory = TransactionMemory::default();
let mut row = [F::ZERO; NUM_COLUMNS];
generate_first_row_core_registers(&mut row);
generate_arithmetic_unit(&mut row);
generate_alu(&mut row);
generate_permutation_unit(&mut row);
let mut trace = Vec::with_capacity(MIN_TRACE_ROWS);
@ -46,7 +51,7 @@ impl<F: RichField + Extendable<D>, const D: usize> SystemZero<F, D> {
loop {
let mut next_row = [F::ZERO; NUM_COLUMNS];
generate_next_row_core_registers(&row, &mut next_row);
generate_arithmetic_unit(&mut next_row);
generate_alu(&mut next_row);
generate_permutation_unit(&mut next_row);
trace.push(row);
@ -61,6 +66,45 @@ impl<F: RichField + Extendable<D>, const D: usize> SystemZero<F, D> {
trace.push(row);
trace
}
fn generate_trace(&self) -> Vec<PolynomialValues<F>> {
let mut timing = TimingTree::new("generate trace", log::Level::Debug);
// Generate the witness, except for permuted columns in the lookup argument.
let trace_rows = timed!(
&mut timing,
"generate trace rows",
self.generate_trace_rows()
);
// Transpose from row-wise to column-wise.
let trace_row_vecs: Vec<_> = timed!(
&mut timing,
"convert to Vecs",
trace_rows.into_iter().map(|row| row.to_vec()).collect()
);
let mut trace_col_vecs: Vec<Vec<F>> =
timed!(&mut timing, "transpose", transpose(&trace_row_vecs));
// Generate permuted columns in the lookup argument.
timed!(
&mut timing,
"generate lookup columns",
generate_lookups(&mut trace_col_vecs)
);
let trace_polys = timed!(
&mut timing,
"convert to PolynomialValues",
trace_col_vecs
.into_iter()
.map(|column| PolynomialValues::new(column))
.collect()
);
timing.print();
trace_polys
}
}
impl<F: RichField + Extendable<D>, const D: usize> Default for SystemZero<F, D> {
@ -84,8 +128,9 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for SystemZero<F,
P: PackedField<Scalar = FE>,
{
eval_core_registers(vars, yield_constr);
eval_arithmetic_unit(vars, yield_constr);
eval_alu(vars, yield_constr);
eval_permutation_unit::<F, FE, P, D2>(vars, yield_constr);
eval_lookups(vars, yield_constr);
// TODO: Other units
}
@ -96,8 +141,9 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for SystemZero<F,
yield_constr: &mut RecursiveConstraintConsumer<F, D>,
) {
eval_core_registers_recursively(builder, vars, yield_constr);
eval_arithmetic_unit_recursively(builder, vars, yield_constr);
eval_alu_recursively(builder, vars, yield_constr);
eval_permutation_unit_recursively(builder, vars, yield_constr);
eval_lookups_recursively(builder, vars, yield_constr);
// TODO: Other units
}
@ -106,9 +152,22 @@ impl<F: RichField + Extendable<D>, const D: usize> Stark<F, D> for SystemZero<F,
}
fn permutation_pairs(&self) -> Vec<PermutationPair> {
let mut pairs = Vec::new();
for i in 0..lookup::NUM_LOOKUPS {
pairs.push(PermutationPair::singletons(
lookup::col_input(i),
lookup::col_permuted_input(i),
));
pairs.push(PermutationPair::singletons(
lookup::col_table(i),
lookup::col_permuted_table(i),
));
}
// TODO: Add permutation pairs for memory.
// TODO: Add permutation pairs for range checks.
vec![]
pairs
}
}
@ -129,8 +188,9 @@ mod tests {
use crate::system_zero::SystemZero;
#[test]
#[ignore] // A bit slow.
fn run() -> Result<()> {
init_logger();
type F = GoldilocksField;
type C = PoseidonGoldilocksConfig;
const D: usize = 2;
@ -156,4 +216,8 @@ mod tests {
let system = S::default();
test_stark_low_degree(system)
}
fn init_logger() {
let _ = env_logger::builder().format_timestamp(None).try_init();
}
}