mirror of
https://github.com/logos-storage/plonky2.git
synced 2026-01-03 14:23:07 +00:00
Faster extension field multiplication (#500)
* Initial implementation of quintic extensions. * Update to/from_biguint() methods. * Draft of fast multiplication on quintic extensions over 64-bit base. * cargo fmt * Typo. * Document functions (a bit). * Refactor reduction step. * Change multiplication call so that LLVM generates better assembly. * Use one main accumulator instead of two minor ones; faster reduce. * Use one main accumulator in square too; clean up redundant code. * Call faster routines from Mul and Square impls. * Fix reduction function. * Fix square calculation. * Slightly faster reduction. * Clean up names and types. * cargo fmt * Move extension field mul/sqr specialisations to their own file. * Rename functions to have unique prefix. * Add faster quadratic multiplication/squaring. * Faster quartic multiplication and squaring. * cargo fmt * clippy * Alternative reduce160 function. * Typo. * Remove alternative reduction function. * Remove delayed reduction implementation of squaring. * Enforce assumptions about extension generators. * Make the accumulation variable a u32 instead of u64. * Add test to trigger carry branch in reduce160. * cargo fmt * Some documentation. * Clippy; improved comments. * cargo fmt * Remove redundant Square specialisations. * Fix reduce*() visibility. * Faster reduce160 from Jakub. * Change mul-by-const functions to operate on 160 bits instead of 128. * Move code for extensions of GoldilocksField to its own file.
This commit is contained in:
parent
7329dade94
commit
310493c293
@ -12,3 +12,4 @@ num = { version = "0.4", features = [ "rand" ] }
|
||||
rand = "0.8.4"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
unroll = "0.1.5"
|
||||
static_assertions = "1.1.0"
|
||||
|
||||
@ -170,7 +170,7 @@ impl<F: Extendable<2>> Mul for QuadraticExtension<F> {
|
||||
type Output = Self;
|
||||
|
||||
#[inline]
|
||||
fn mul(self, rhs: Self) -> Self {
|
||||
default fn mul(self, rhs: Self) -> Self {
|
||||
let Self([a0, a1]) = self;
|
||||
let Self([b0, b1]) = rhs;
|
||||
|
||||
|
||||
@ -201,7 +201,7 @@ impl<F: Extendable<4>> Mul for QuarticExtension<F> {
|
||||
type Output = Self;
|
||||
|
||||
#[inline]
|
||||
fn mul(self, rhs: Self) -> Self {
|
||||
default fn mul(self, rhs: Self) -> Self {
|
||||
let Self([a0, a1, a2, a3]) = self;
|
||||
let Self([b0, b1, b2, b3]) = rhs;
|
||||
|
||||
|
||||
@ -201,7 +201,7 @@ impl<F: Extendable<5>> Mul for QuinticExtension<F> {
|
||||
type Output = Self;
|
||||
|
||||
#[inline]
|
||||
fn mul(self, rhs: Self) -> Self {
|
||||
default fn mul(self, rhs: Self) -> Self {
|
||||
let Self([a0, a1, a2, a3, a4]) = self;
|
||||
let Self([b0, b1, b2, b3, b4]) = rhs;
|
||||
let w = <Self as OEF<5>>::W;
|
||||
|
||||
495
field/src/goldilocks_extensions.rs
Normal file
495
field/src/goldilocks_extensions.rs
Normal file
@ -0,0 +1,495 @@
|
||||
use std::ops::Mul;
|
||||
|
||||
use static_assertions::const_assert;
|
||||
|
||||
use crate::extension_field::quadratic::QuadraticExtension;
|
||||
use crate::extension_field::quartic::QuarticExtension;
|
||||
use crate::extension_field::quintic::QuinticExtension;
|
||||
use crate::extension_field::{Extendable, Frobenius};
|
||||
use crate::field_types::Field;
|
||||
use crate::goldilocks_field::{reduce160, GoldilocksField};
|
||||
|
||||
impl Frobenius<1> for GoldilocksField {}
|
||||
|
||||
impl Extendable<2> for GoldilocksField {
|
||||
type Extension = QuadraticExtension<Self>;
|
||||
|
||||
// Verifiable in Sage with
|
||||
// `R.<x> = GF(p)[]; assert (x^2 - 7).is_irreducible()`.
|
||||
const W: Self = Self(7);
|
||||
|
||||
// DTH_ROOT = W^((ORDER - 1)/2)
|
||||
const DTH_ROOT: Self = Self(18446744069414584320);
|
||||
|
||||
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 2] =
|
||||
[Self(18081566051660590251), Self(16121475356294670766)];
|
||||
|
||||
const EXT_POWER_OF_TWO_GENERATOR: [Self; 2] = [Self(0), Self(15659105665374529263)];
|
||||
}
|
||||
|
||||
impl Mul for QuadraticExtension<GoldilocksField> {
|
||||
#[inline]
|
||||
fn mul(self, rhs: Self) -> Self {
|
||||
let Self([a0, a1]) = self;
|
||||
let Self([b0, b1]) = rhs;
|
||||
let c = ext2_mul([a0.0, a1.0], [b0.0, b1.0]);
|
||||
Self(c)
|
||||
}
|
||||
}
|
||||
|
||||
impl Extendable<4> for GoldilocksField {
|
||||
type Extension = QuarticExtension<Self>;
|
||||
|
||||
const W: Self = Self(7);
|
||||
|
||||
// DTH_ROOT = W^((ORDER - 1)/4)
|
||||
const DTH_ROOT: Self = Self(281474976710656);
|
||||
|
||||
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 4] = [
|
||||
Self(5024755240244648895),
|
||||
Self(13227474371289740625),
|
||||
Self(3912887029498544536),
|
||||
Self(3900057112666848848),
|
||||
];
|
||||
|
||||
const EXT_POWER_OF_TWO_GENERATOR: [Self; 4] =
|
||||
[Self(0), Self(0), Self(0), Self(12587610116473453104)];
|
||||
}
|
||||
|
||||
impl Mul for QuarticExtension<GoldilocksField> {
|
||||
#[inline]
|
||||
fn mul(self, rhs: Self) -> Self {
|
||||
let Self([a0, a1, a2, a3]) = self;
|
||||
let Self([b0, b1, b2, b3]) = rhs;
|
||||
let c = ext4_mul([a0.0, a1.0, a2.0, a3.0], [b0.0, b1.0, b2.0, b3.0]);
|
||||
Self(c)
|
||||
}
|
||||
}
|
||||
|
||||
impl Extendable<5> for GoldilocksField {
|
||||
type Extension = QuinticExtension<Self>;
|
||||
|
||||
const W: Self = Self(3);
|
||||
|
||||
// DTH_ROOT = W^((ORDER - 1)/5)
|
||||
const DTH_ROOT: Self = Self(1041288259238279555);
|
||||
|
||||
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 5] = [
|
||||
Self(2899034827742553394),
|
||||
Self(13012057356839176729),
|
||||
Self(14593811582388663055),
|
||||
Self(7722900811313895436),
|
||||
Self(4557222484695340057),
|
||||
];
|
||||
|
||||
const EXT_POWER_OF_TWO_GENERATOR: [Self; 5] = [
|
||||
Self::POWER_OF_TWO_GENERATOR,
|
||||
Self(0),
|
||||
Self(0),
|
||||
Self(0),
|
||||
Self(0),
|
||||
];
|
||||
}
|
||||
|
||||
impl Mul for QuinticExtension<GoldilocksField> {
|
||||
#[inline]
|
||||
fn mul(self, rhs: Self) -> Self {
|
||||
let Self([a0, a1, a2, a3, a4]) = self;
|
||||
let Self([b0, b1, b2, b3, b4]) = rhs;
|
||||
let c = ext5_mul(
|
||||
[a0.0, a1.0, a2.0, a3.0, a4.0],
|
||||
[b0.0, b1.0, b2.0, b3.0, b4.0],
|
||||
);
|
||||
Self(c)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The functions extD_add_prods[0-4] are helper functions for
|
||||
* computing products for extensions of degree D over the Goldilocks
|
||||
* field. They are faster than the generic method because all
|
||||
* reductions are delayed until the end which means only one per
|
||||
* result coefficient is necessary.
|
||||
*/
|
||||
|
||||
/// Return a, b such that a + b*2^128 = 3*x with a < 2^128 and b < 2^32.
|
||||
#[inline(always)]
|
||||
fn u160_times_3(x: u128, y: u32) -> (u128, u32) {
|
||||
let (s, cy) = x.overflowing_add(x << 1);
|
||||
(s, 3 * y + (x >> 127) as u32 + cy as u32)
|
||||
}
|
||||
|
||||
/// Return a, b such that a + b*2^128 = 7*x with a < 2^128 and b < 2^32.
|
||||
#[inline(always)]
|
||||
fn u160_times_7(x: u128, y: u32) -> (u128, u32) {
|
||||
let (d, br) = (x << 3).overflowing_sub(x);
|
||||
// NB: subtracting the borrow can't underflow
|
||||
(d, 7 * y + (x >> (128 - 3)) as u32 - br as u32)
|
||||
}
|
||||
|
||||
/*
|
||||
* Quadratic multiplication and squaring
|
||||
*/
|
||||
|
||||
#[inline(always)]
|
||||
fn ext2_add_prods0(a: &[u64; 2], b: &[u64; 2]) -> GoldilocksField {
|
||||
// Computes a0 * b0 + W * a1 * b1;
|
||||
let [a0, a1] = *a;
|
||||
let [b0, b1] = *b;
|
||||
|
||||
let cy;
|
||||
|
||||
// W * a1 * b1
|
||||
let (mut cumul_lo, mut cumul_hi) = u160_times_7((a1 as u128) * (b1 as u128), 0u32);
|
||||
|
||||
// a0 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b0 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn ext2_add_prods1(a: &[u64; 2], b: &[u64; 2]) -> GoldilocksField {
|
||||
// Computes a0 * b1 + a1 * b0;
|
||||
let [a0, a1] = *a;
|
||||
let [b0, b1] = *b;
|
||||
|
||||
let cy;
|
||||
|
||||
// a0 * b1
|
||||
let mut cumul_lo = (a0 as u128) * (b1 as u128);
|
||||
|
||||
// a1 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b0 as u128));
|
||||
let cumul_hi = cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
/// Multiply a and b considered as elements of GF(p^2).
|
||||
#[inline(always)]
|
||||
pub(crate) fn ext2_mul(a: [u64; 2], b: [u64; 2]) -> [GoldilocksField; 2] {
|
||||
// The code in ext2_add_prods[01] assumes the quadratic extension
|
||||
// generator is 7.
|
||||
const_assert!(<GoldilocksField as Extendable<2>>::W.0 == 7u64);
|
||||
|
||||
let c0 = ext2_add_prods0(&a, &b);
|
||||
let c1 = ext2_add_prods1(&a, &b);
|
||||
[c0, c1]
|
||||
}
|
||||
|
||||
/*
|
||||
* Quartic multiplication and squaring
|
||||
*/
|
||||
|
||||
#[inline(always)]
|
||||
fn ext4_add_prods0(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField {
|
||||
// Computes c0 = a0 * b0 + W * (a1 * b3 + a2 * b2 + a3 * b1)
|
||||
|
||||
let [a0, a1, a2, a3] = *a;
|
||||
let [b0, b1, b2, b3] = *b;
|
||||
|
||||
let mut cy;
|
||||
|
||||
// a1 * b3
|
||||
let mut cumul_lo = (a1 as u128) * (b3 as u128);
|
||||
|
||||
// a2 * b2
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b2 as u128));
|
||||
let mut cumul_hi = cy as u32;
|
||||
|
||||
// a3 * b1
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b1 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// * W
|
||||
(cumul_lo, cumul_hi) = u160_times_7(cumul_lo, cumul_hi);
|
||||
|
||||
// a0 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b0 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn ext4_add_prods1(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField {
|
||||
// Computes c1 = a0 * b1 + a1 * b0 + W * (a2 * b3 + a3 * b2);
|
||||
|
||||
let [a0, a1, a2, a3] = *a;
|
||||
let [b0, b1, b2, b3] = *b;
|
||||
|
||||
let mut cy;
|
||||
|
||||
// a2 * b3
|
||||
let mut cumul_lo = (a2 as u128) * (b3 as u128);
|
||||
|
||||
// a3 * b2
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b2 as u128));
|
||||
let mut cumul_hi = cy as u32;
|
||||
|
||||
// * W
|
||||
(cumul_lo, cumul_hi) = u160_times_7(cumul_lo, cumul_hi);
|
||||
|
||||
// a0 * b1
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b1 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a1 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b0 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn ext4_add_prods2(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField {
|
||||
// Computes c2 = a0 * b2 + a1 * b1 + a2 * b0 + W * a3 * b3;
|
||||
|
||||
let [a0, a1, a2, a3] = *a;
|
||||
let [b0, b1, b2, b3] = *b;
|
||||
|
||||
let mut cy;
|
||||
|
||||
// W * a3 * b3
|
||||
let (mut cumul_lo, mut cumul_hi) = u160_times_7((a3 as u128) * (b3 as u128), 0u32);
|
||||
|
||||
// a0 * b2
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b2 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a1 * b1
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b1 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a2 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b0 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn ext4_add_prods3(a: &[u64; 4], b: &[u64; 4]) -> GoldilocksField {
|
||||
// Computes c3 = a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
|
||||
|
||||
let [a0, a1, a2, a3] = *a;
|
||||
let [b0, b1, b2, b3] = *b;
|
||||
|
||||
let mut cy;
|
||||
|
||||
// a0 * b3
|
||||
let mut cumul_lo = (a0 as u128) * (b3 as u128);
|
||||
|
||||
// a1 * b2
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b2 as u128));
|
||||
let mut cumul_hi = cy as u32;
|
||||
|
||||
// a2 * b1
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b1 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a3 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b0 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
/// Multiply a and b considered as elements of GF(p^4).
|
||||
#[inline(always)]
|
||||
pub(crate) fn ext4_mul(a: [u64; 4], b: [u64; 4]) -> [GoldilocksField; 4] {
|
||||
// The code in ext4_add_prods[0-3] assumes the quartic extension
|
||||
// generator is 7.
|
||||
const_assert!(<GoldilocksField as Extendable<4>>::W.0 == 7u64);
|
||||
|
||||
let c0 = ext4_add_prods0(&a, &b);
|
||||
let c1 = ext4_add_prods1(&a, &b);
|
||||
let c2 = ext4_add_prods2(&a, &b);
|
||||
let c3 = ext4_add_prods3(&a, &b);
|
||||
[c0, c1, c2, c3]
|
||||
}
|
||||
|
||||
/*
|
||||
* Quintic multiplication and squaring
|
||||
*/
|
||||
|
||||
#[inline(always)]
|
||||
fn ext5_add_prods0(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField {
|
||||
// Computes c0 = a0 * b0 + W * (a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1)
|
||||
|
||||
let [a0, a1, a2, a3, a4] = *a;
|
||||
let [b0, b1, b2, b3, b4] = *b;
|
||||
|
||||
let mut cy;
|
||||
|
||||
// a1 * b4
|
||||
let mut cumul_lo = (a1 as u128) * (b4 as u128);
|
||||
|
||||
// a2 * b3
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b3 as u128));
|
||||
let mut cumul_hi = cy as u32;
|
||||
|
||||
// a3 * b2
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b2 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a4 * b1
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b1 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// * W
|
||||
(cumul_lo, cumul_hi) = u160_times_3(cumul_lo, cumul_hi);
|
||||
|
||||
// a0 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b0 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn ext5_add_prods1(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField {
|
||||
// Computes c1 = a0 * b1 + a1 * b0 + W * (a2 * b4 + a3 * b3 + a4 * b2);
|
||||
|
||||
let [a0, a1, a2, a3, a4] = *a;
|
||||
let [b0, b1, b2, b3, b4] = *b;
|
||||
|
||||
let mut cy;
|
||||
|
||||
// a2 * b4
|
||||
let mut cumul_lo = (a2 as u128) * (b4 as u128);
|
||||
|
||||
// a3 * b3
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b3 as u128));
|
||||
let mut cumul_hi = cy as u32;
|
||||
|
||||
// a4 * b2
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b2 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// * W
|
||||
(cumul_lo, cumul_hi) = u160_times_3(cumul_lo, cumul_hi);
|
||||
|
||||
// a0 * b1
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b1 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a1 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b0 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn ext5_add_prods2(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField {
|
||||
// Computes c2 = a0 * b2 + a1 * b1 + a2 * b0 + W * (a3 * b4 + a4 * b3);
|
||||
|
||||
let [a0, a1, a2, a3, a4] = *a;
|
||||
let [b0, b1, b2, b3, b4] = *b;
|
||||
|
||||
let mut cy;
|
||||
|
||||
// a3 * b4
|
||||
let mut cumul_lo = (a3 as u128) * (b4 as u128);
|
||||
|
||||
// a4 * b3
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b3 as u128));
|
||||
let mut cumul_hi = cy as u32;
|
||||
|
||||
// * W
|
||||
(cumul_lo, cumul_hi) = u160_times_3(cumul_lo, cumul_hi);
|
||||
|
||||
// a0 * b2
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b2 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a1 * b1
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b1 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a2 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b0 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn ext5_add_prods3(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField {
|
||||
// Computes c3 = a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0 + W * a4 * b4;
|
||||
|
||||
let [a0, a1, a2, a3, a4] = *a;
|
||||
let [b0, b1, b2, b3, b4] = *b;
|
||||
|
||||
let mut cy;
|
||||
|
||||
// W * a4 * b4
|
||||
let (mut cumul_lo, mut cumul_hi) = u160_times_3((a4 as u128) * (b4 as u128), 0u32);
|
||||
|
||||
// a0 * b3
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a0 as u128) * (b3 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a1 * b2
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b2 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a2 * b1
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b1 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a3 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b0 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn ext5_add_prods4(a: &[u64; 5], b: &[u64; 5]) -> GoldilocksField {
|
||||
// Computes c4 = a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
|
||||
|
||||
let [a0, a1, a2, a3, a4] = *a;
|
||||
let [b0, b1, b2, b3, b4] = *b;
|
||||
|
||||
let mut cy;
|
||||
|
||||
// a0 * b4
|
||||
let mut cumul_lo = (a0 as u128) * (b4 as u128);
|
||||
|
||||
// a1 * b3
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a1 as u128) * (b3 as u128));
|
||||
let mut cumul_hi = cy as u32;
|
||||
|
||||
// a2 * b2
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a2 as u128) * (b2 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a3 * b1
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a3 as u128) * (b1 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
// a4 * b0
|
||||
(cumul_lo, cy) = cumul_lo.overflowing_add((a4 as u128) * (b0 as u128));
|
||||
cumul_hi += cy as u32;
|
||||
|
||||
unsafe { reduce160(cumul_lo, cumul_hi) }
|
||||
}
|
||||
|
||||
/// Multiply a and b considered as elements of GF(p^5).
|
||||
#[inline(always)]
|
||||
pub(crate) fn ext5_mul(a: [u64; 5], b: [u64; 5]) -> [GoldilocksField; 5] {
|
||||
// The code in ext5_add_prods[0-4] assumes the quintic extension
|
||||
// generator is 3.
|
||||
const_assert!(<GoldilocksField as Extendable<5>>::W.0 == 3u64);
|
||||
|
||||
let c0 = ext5_add_prods0(&a, &b);
|
||||
let c1 = ext5_add_prods1(&a, &b);
|
||||
let c2 = ext5_add_prods2(&a, &b);
|
||||
let c3 = ext5_add_prods3(&a, &b);
|
||||
let c4 = ext5_add_prods4(&a, &b);
|
||||
[c0, c1, c2, c3, c4]
|
||||
}
|
||||
@ -9,10 +9,6 @@ use plonky2_util::{assume, branch_hint};
|
||||
use rand::Rng;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::extension_field::quadratic::QuadraticExtension;
|
||||
use crate::extension_field::quartic::QuarticExtension;
|
||||
use crate::extension_field::quintic::QuinticExtension;
|
||||
use crate::extension_field::{Extendable, Frobenius};
|
||||
use crate::field_types::{Field, Field64, PrimeField, PrimeField64};
|
||||
use crate::inversion::try_inverse_u64;
|
||||
|
||||
@ -283,66 +279,6 @@ impl DivAssign for GoldilocksField {
|
||||
}
|
||||
}
|
||||
|
||||
impl Extendable<2> for GoldilocksField {
|
||||
type Extension = QuadraticExtension<Self>;
|
||||
|
||||
// Verifiable in Sage with
|
||||
// `R.<x> = GF(p)[]; assert (x^2 - 7).is_irreducible()`.
|
||||
const W: Self = Self(7);
|
||||
|
||||
// DTH_ROOT = W^((ORDER - 1)/2)
|
||||
const DTH_ROOT: Self = Self(18446744069414584320);
|
||||
|
||||
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 2] =
|
||||
[Self(18081566051660590251), Self(16121475356294670766)];
|
||||
|
||||
const EXT_POWER_OF_TWO_GENERATOR: [Self; 2] = [Self(0), Self(15659105665374529263)];
|
||||
}
|
||||
|
||||
impl Extendable<4> for GoldilocksField {
|
||||
type Extension = QuarticExtension<Self>;
|
||||
|
||||
const W: Self = Self(7);
|
||||
|
||||
// DTH_ROOT = W^((ORDER - 1)/4)
|
||||
const DTH_ROOT: Self = Self(281474976710656);
|
||||
|
||||
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 4] = [
|
||||
Self(5024755240244648895),
|
||||
Self(13227474371289740625),
|
||||
Self(3912887029498544536),
|
||||
Self(3900057112666848848),
|
||||
];
|
||||
|
||||
const EXT_POWER_OF_TWO_GENERATOR: [Self; 4] =
|
||||
[Self(0), Self(0), Self(0), Self(12587610116473453104)];
|
||||
}
|
||||
|
||||
impl Extendable<5> for GoldilocksField {
|
||||
type Extension = QuinticExtension<Self>;
|
||||
|
||||
const W: Self = Self(3);
|
||||
|
||||
// DTH_ROOT = W^((ORDER - 1)/5)
|
||||
const DTH_ROOT: Self = Self(1041288259238279555);
|
||||
|
||||
const EXT_MULTIPLICATIVE_GROUP_GENERATOR: [Self; 5] = [
|
||||
Self(2899034827742553394),
|
||||
Self(13012057356839176729),
|
||||
Self(14593811582388663055),
|
||||
Self(7722900811313895436),
|
||||
Self(4557222484695340057),
|
||||
];
|
||||
|
||||
const EXT_POWER_OF_TWO_GENERATOR: [Self; 5] = [
|
||||
Self::POWER_OF_TWO_GENERATOR,
|
||||
Self(0),
|
||||
Self(0),
|
||||
Self(0),
|
||||
Self(0),
|
||||
];
|
||||
}
|
||||
|
||||
/// Fast addition modulo ORDER for x86-64.
|
||||
/// This function is marked unsafe for the following reasons:
|
||||
/// - It is only correct if x + y < 2**64 + ORDER = 0x1ffffffff00000001.
|
||||
@ -407,7 +343,34 @@ fn split(x: u128) -> (u64, u64) {
|
||||
(x as u64, (x >> 64) as u64)
|
||||
}
|
||||
|
||||
impl Frobenius<1> for GoldilocksField {}
|
||||
/// Reduce the value x_lo + x_hi * 2^128 to an element in the
|
||||
/// Goldilocks field.
|
||||
///
|
||||
/// This function is marked 'unsafe' because correctness relies on the
|
||||
/// unchecked assumption that x < 2^160 - 2^128 + 2^96. Further,
|
||||
/// performance may degrade as x_hi increases beyond 2**40 or so.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn reduce160(x_lo: u128, x_hi: u32) -> GoldilocksField {
|
||||
let x_hi = (x_lo >> 96) as u64 + ((x_hi as u64) << 32); // shld to form x_hi
|
||||
let x_mid = (x_lo >> 64) as u32; // shr to form x_mid
|
||||
let x_lo = x_lo as u64;
|
||||
|
||||
// sub + jc (should fuse)
|
||||
let (mut t0, borrow) = x_lo.overflowing_sub(x_hi);
|
||||
if borrow {
|
||||
// The maximum possible value of x is (2^64 - 1)^2 * 4 * 7 < 2^133,
|
||||
// so x_hi < 2^37. A borrow will happen roughly one in 134 million
|
||||
// times, so it's best to branch.
|
||||
branch_hint();
|
||||
// NB: this assumes that x < 2^160 - 2^128 + 2^96.
|
||||
t0 -= EPSILON; // Cannot underflow if x_hi is canonical.
|
||||
}
|
||||
// imul
|
||||
let t1 = (x_mid as u64) * EPSILON;
|
||||
// add, sbb, add
|
||||
let t2 = add_no_canonicalize_trashing_input(t0, t1);
|
||||
GoldilocksField(t2)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@ -15,6 +15,7 @@ pub mod cosets;
|
||||
pub mod extension_field;
|
||||
pub mod fft;
|
||||
pub mod field_types;
|
||||
pub mod goldilocks_extensions;
|
||||
pub mod goldilocks_field;
|
||||
pub mod interpolation;
|
||||
mod inversion;
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
|
||||
use plonky2::field::extension_field::quadratic::QuadraticExtension;
|
||||
use plonky2::field::extension_field::quartic::QuarticExtension;
|
||||
use plonky2::field::extension_field::quintic::QuinticExtension;
|
||||
use plonky2::field::field_types::Field;
|
||||
@ -175,6 +176,7 @@ pub(crate) fn bench_field<F: Field>(c: &mut Criterion) {
|
||||
|
||||
fn criterion_benchmark(c: &mut Criterion) {
|
||||
bench_field::<GoldilocksField>(c);
|
||||
bench_field::<QuadraticExtension<GoldilocksField>>(c);
|
||||
bench_field::<QuarticExtension<GoldilocksField>>(c);
|
||||
bench_field::<QuinticExtension<GoldilocksField>>(c);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user