mirror of
https://github.com/logos-storage/plonky2.git
synced 2026-01-07 00:03:10 +00:00
AVX-512 packed Goldilocks (#400)
* WIP AVX-512 Goldilocks * Fix tests * fmt * Hamish PR comment
This commit is contained in:
parent
a6e1f7ccad
commit
f072d09ae4
656
field/src/arch/x86_64/avx512_goldilocks_field.rs
Normal file
656
field/src/arch/x86_64/avx512_goldilocks_field.rs
Normal file
@ -0,0 +1,656 @@
|
||||
use core::arch::x86_64::*;
|
||||
use std::fmt;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::iter::{Product, Sum};
|
||||
use std::mem::transmute;
|
||||
use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign};
|
||||
|
||||
use crate::field_types::{Field, PrimeField};
|
||||
use crate::goldilocks_field::GoldilocksField;
|
||||
use crate::ops::Square;
|
||||
use crate::packed_field::PackedField;
|
||||
|
||||
// Ideally `Avx512GoldilocksField` would wrap `__m512i`. Unfortunately, `__m512i` has an alignment
|
||||
// of 64B, which would preclude us from casting `[GoldilocksField; 8]` (alignment 8B) to
|
||||
// `Avx512GoldilocksField`. We need to ensure that `Avx512GoldilocksField` has the same alignment as
|
||||
// `GoldilocksField`. Thus we wrap `[GoldilocksField; 8]` and use the `new` and `get` methods to
|
||||
// convert to and from `__m512i`.
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(transparent)]
|
||||
pub struct Avx512GoldilocksField(pub [GoldilocksField; 8]);
|
||||
|
||||
impl Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn new(x: __m512i) -> Self {
|
||||
unsafe { transmute(x) }
|
||||
}
|
||||
#[inline]
|
||||
fn get(&self) -> __m512i {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl PackedField for Avx512GoldilocksField {
|
||||
const WIDTH: usize = 8;
|
||||
|
||||
type Scalar = GoldilocksField;
|
||||
|
||||
const ZEROS: Self = Self([GoldilocksField::ZERO; 8]);
|
||||
const ONES: Self = Self([GoldilocksField::ONE; 8]);
|
||||
|
||||
#[inline]
|
||||
fn from_arr(arr: [Self::Scalar; Self::WIDTH]) -> Self {
|
||||
Self(arr)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_arr(&self) -> [Self::Scalar; Self::WIDTH] {
|
||||
self.0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn from_slice(slice: &[Self::Scalar]) -> &Self {
|
||||
assert_eq!(slice.len(), Self::WIDTH);
|
||||
unsafe { &*slice.as_ptr().cast() }
|
||||
}
|
||||
#[inline]
|
||||
fn from_slice_mut(slice: &mut [Self::Scalar]) -> &mut Self {
|
||||
assert_eq!(slice.len(), Self::WIDTH);
|
||||
unsafe { &mut *slice.as_mut_ptr().cast() }
|
||||
}
|
||||
#[inline]
|
||||
fn as_slice(&self) -> &[Self::Scalar] {
|
||||
&self.0[..]
|
||||
}
|
||||
#[inline]
|
||||
fn as_slice_mut(&mut self) -> &mut [Self::Scalar] {
|
||||
&mut self.0[..]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) {
|
||||
let (v0, v1) = (self.get(), other.get());
|
||||
let (res0, res1) = match block_len {
|
||||
1 => unsafe { interleave1(v0, v1) },
|
||||
2 => unsafe { interleave2(v0, v1) },
|
||||
4 => unsafe { interleave4(v0, v1) },
|
||||
8 => (v0, v1),
|
||||
_ => panic!("unsupported block_len"),
|
||||
};
|
||||
(Self::new(res0), Self::new(res1))
|
||||
}
|
||||
}
|
||||
|
||||
impl Add<Self> for Avx512GoldilocksField {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn add(self, rhs: Self) -> Self {
|
||||
Self::new(unsafe { add(self.get(), rhs.get()) })
|
||||
}
|
||||
}
|
||||
impl Add<GoldilocksField> for Avx512GoldilocksField {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn add(self, rhs: GoldilocksField) -> Self {
|
||||
self + Self::from(rhs)
|
||||
}
|
||||
}
|
||||
impl Add<Avx512GoldilocksField> for GoldilocksField {
|
||||
type Output = Avx512GoldilocksField;
|
||||
#[inline]
|
||||
fn add(self, rhs: Self::Output) -> Self::Output {
|
||||
Self::Output::from(self) + rhs
|
||||
}
|
||||
}
|
||||
impl AddAssign<Self> for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn add_assign(&mut self, rhs: Self) {
|
||||
*self = *self + rhs;
|
||||
}
|
||||
}
|
||||
impl AddAssign<GoldilocksField> for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn add_assign(&mut self, rhs: GoldilocksField) {
|
||||
*self = *self + rhs;
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "({:?})", self.get())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn default() -> Self {
|
||||
Self::ZEROS
|
||||
}
|
||||
}
|
||||
|
||||
impl Div<GoldilocksField> for Avx512GoldilocksField {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn div(self, rhs: GoldilocksField) -> Self {
|
||||
self * rhs.inverse()
|
||||
}
|
||||
}
|
||||
impl DivAssign<GoldilocksField> for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn div_assign(&mut self, rhs: GoldilocksField) {
|
||||
*self *= rhs.inverse();
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GoldilocksField> for Avx512GoldilocksField {
|
||||
fn from(x: GoldilocksField) -> Self {
|
||||
Self([x; 8])
|
||||
}
|
||||
}
|
||||
|
||||
impl Mul<Self> for Avx512GoldilocksField {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn mul(self, rhs: Self) -> Self {
|
||||
Self::new(unsafe { mul(self.get(), rhs.get()) })
|
||||
}
|
||||
}
|
||||
impl Mul<GoldilocksField> for Avx512GoldilocksField {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn mul(self, rhs: GoldilocksField) -> Self {
|
||||
self * Self::from(rhs)
|
||||
}
|
||||
}
|
||||
impl Mul<Avx512GoldilocksField> for GoldilocksField {
|
||||
type Output = Avx512GoldilocksField;
|
||||
#[inline]
|
||||
fn mul(self, rhs: Avx512GoldilocksField) -> Self::Output {
|
||||
Self::Output::from(self) * rhs
|
||||
}
|
||||
}
|
||||
impl MulAssign<Self> for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn mul_assign(&mut self, rhs: Self) {
|
||||
*self = *self * rhs;
|
||||
}
|
||||
}
|
||||
impl MulAssign<GoldilocksField> for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn mul_assign(&mut self, rhs: GoldilocksField) {
|
||||
*self = *self * rhs;
|
||||
}
|
||||
}
|
||||
|
||||
impl Neg for Avx512GoldilocksField {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn neg(self) -> Self {
|
||||
Self::new(unsafe { neg(self.get()) })
|
||||
}
|
||||
}
|
||||
|
||||
impl Product for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
|
||||
iter.reduce(|x, y| x * y).unwrap_or(Self::ONES)
|
||||
}
|
||||
}
|
||||
|
||||
impl Square for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn square(&self) -> Self {
|
||||
Self::new(unsafe { square(self.get()) })
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub<Self> for Avx512GoldilocksField {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn sub(self, rhs: Self) -> Self {
|
||||
Self::new(unsafe { sub(self.get(), rhs.get()) })
|
||||
}
|
||||
}
|
||||
impl Sub<GoldilocksField> for Avx512GoldilocksField {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn sub(self, rhs: GoldilocksField) -> Self {
|
||||
self - Self::from(rhs)
|
||||
}
|
||||
}
|
||||
impl Sub<Avx512GoldilocksField> for GoldilocksField {
|
||||
type Output = Avx512GoldilocksField;
|
||||
#[inline]
|
||||
fn sub(self, rhs: Avx512GoldilocksField) -> Self::Output {
|
||||
Self::Output::from(self) - rhs
|
||||
}
|
||||
}
|
||||
impl SubAssign<Self> for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn sub_assign(&mut self, rhs: Self) {
|
||||
*self = *self - rhs;
|
||||
}
|
||||
}
|
||||
impl SubAssign<GoldilocksField> for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn sub_assign(&mut self, rhs: GoldilocksField) {
|
||||
*self = *self - rhs;
|
||||
}
|
||||
}
|
||||
|
||||
impl Sum for Avx512GoldilocksField {
|
||||
#[inline]
|
||||
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
|
||||
iter.reduce(|x, y| x + y).unwrap_or(Self::ZEROS)
|
||||
}
|
||||
}
|
||||
|
||||
const FIELD_ORDER: __m512i = unsafe { transmute([GoldilocksField::ORDER; 8]) };
|
||||
const EPSILON: __m512i = unsafe { transmute([GoldilocksField::ORDER.wrapping_neg(); 8]) };
|
||||
|
||||
#[inline]
|
||||
unsafe fn canonicalize(x: __m512i) -> __m512i {
|
||||
let mask = _mm512_cmpge_epu64_mask(x, FIELD_ORDER);
|
||||
_mm512_mask_sub_epi64(x, mask, x, FIELD_ORDER)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn add_no_double_overflow_64_64(x: __m512i, y: __m512i) -> __m512i {
|
||||
let res_wrapped = _mm512_add_epi64(x, y);
|
||||
let mask = _mm512_cmplt_epu64_mask(res_wrapped, y); // mask set if add overflowed
|
||||
let res = _mm512_mask_sub_epi64(res_wrapped, mask, res_wrapped, FIELD_ORDER);
|
||||
res
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn sub_no_double_overflow_64_64(x: __m512i, y: __m512i) -> __m512i {
|
||||
let mask = _mm512_cmplt_epu64_mask(x, y); // mask set if sub will underflow (x < y)
|
||||
let res_wrapped = _mm512_sub_epi64(x, y);
|
||||
let res = _mm512_mask_add_epi64(res_wrapped, mask, res_wrapped, FIELD_ORDER);
|
||||
res
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn add(x: __m512i, y: __m512i) -> __m512i {
|
||||
add_no_double_overflow_64_64(x, canonicalize(y))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn sub(x: __m512i, y: __m512i) -> __m512i {
|
||||
sub_no_double_overflow_64_64(x, canonicalize(y))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn neg(y: __m512i) -> __m512i {
|
||||
_mm512_sub_epi64(FIELD_ORDER, canonicalize(y))
|
||||
}
|
||||
|
||||
const LO_32_BITS_MASK: __mmask16 = unsafe { transmute(0b0101010101010101u16) };
|
||||
|
||||
#[inline]
|
||||
unsafe fn mul64_64(x: __m512i, y: __m512i) -> (__m512i, __m512i) {
|
||||
// We want to move the high 32 bits to the low position. The multiplication instruction ignores
|
||||
// the high 32 bits, so it's ok to just duplicate it into the low position. This duplication can
|
||||
// be done on port 5; bitshifts run on port 0, competing with multiplication.
|
||||
// This instruction is only provided for 32-bit floats, not integers. Idk why Intel makes the
|
||||
// distinction; the casts are free and it guarantees that the exact bit pattern is preserved.
|
||||
// Using a swizzle instruction of the wrong domain (float vs int) does not increase latency
|
||||
// since Haswell.
|
||||
let x_hi = _mm512_castps_si512(_mm512_movehdup_ps(_mm512_castsi512_ps(x)));
|
||||
let y_hi = _mm512_castps_si512(_mm512_movehdup_ps(_mm512_castsi512_ps(y)));
|
||||
|
||||
// All four pairwise multiplications
|
||||
let mul_ll = _mm512_mul_epu32(x, y);
|
||||
let mul_lh = _mm512_mul_epu32(x, y_hi);
|
||||
let mul_hl = _mm512_mul_epu32(x_hi, y);
|
||||
let mul_hh = _mm512_mul_epu32(x_hi, y_hi);
|
||||
|
||||
// Bignum addition
|
||||
// Extract high 32 bits of mul_ll and add to mul_hl. This cannot overflow.
|
||||
let mul_ll_hi = _mm512_srli_epi64::<32>(mul_ll);
|
||||
let t0 = _mm512_add_epi64(mul_hl, mul_ll_hi);
|
||||
// Extract low 32 bits of t0 and add to mul_lh. Again, this cannot overflow.
|
||||
// Also, extract high 32 bits of t0 and add to mul_hh.
|
||||
let t0_lo = _mm512_and_si512(t0, EPSILON);
|
||||
let t0_hi = _mm512_srli_epi64::<32>(t0);
|
||||
let t1 = _mm512_add_epi64(mul_lh, t0_lo);
|
||||
let t2 = _mm512_add_epi64(mul_hh, t0_hi);
|
||||
// Lastly, extract the high 32 bits of t1 and add to t2.
|
||||
let t1_hi = _mm512_srli_epi64::<32>(t1);
|
||||
let res_hi = _mm512_add_epi64(t2, t1_hi);
|
||||
|
||||
// Form res_lo by combining the low half of mul_ll with the low half of t1 (shifted into high
|
||||
// position).
|
||||
let t1_lo = _mm512_castps_si512(_mm512_moveldup_ps(_mm512_castsi512_ps(t1)));
|
||||
let res_lo = _mm512_mask_blend_epi32(LO_32_BITS_MASK, t1_lo, mul_ll);
|
||||
|
||||
(res_hi, res_lo)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn square64(x: __m512i) -> (__m512i, __m512i) {
|
||||
// Get high 32 bits of x. See comment in mul64_64_s.
|
||||
let x_hi = _mm512_castps_si512(_mm512_movehdup_ps(_mm512_castsi512_ps(x)));
|
||||
|
||||
// All pairwise multiplications.
|
||||
let mul_ll = _mm512_mul_epu32(x, x);
|
||||
let mul_lh = _mm512_mul_epu32(x, x_hi);
|
||||
let mul_hh = _mm512_mul_epu32(x_hi, x_hi);
|
||||
|
||||
// Bignum addition, but mul_lh is shifted by 33 bits (not 32).
|
||||
let mul_ll_hi = _mm512_srli_epi64::<33>(mul_ll);
|
||||
let t0 = _mm512_add_epi64(mul_lh, mul_ll_hi);
|
||||
let t0_hi = _mm512_srli_epi64::<31>(t0);
|
||||
let res_hi = _mm512_add_epi64(mul_hh, t0_hi);
|
||||
|
||||
// Form low result by adding the mul_ll and the low 31 bits of mul_lh (shifted to the high
|
||||
// position).
|
||||
let mul_lh_lo = _mm512_slli_epi64::<33>(mul_lh);
|
||||
let res_lo = _mm512_add_epi64(mul_ll, mul_lh_lo);
|
||||
|
||||
(res_hi, res_lo)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn reduce128(x: (__m512i, __m512i)) -> __m512i {
|
||||
let (hi0, lo0) = x;
|
||||
let hi_hi0 = _mm512_srli_epi64::<32>(hi0);
|
||||
let lo1 = sub_no_double_overflow_64_64(lo0, hi_hi0);
|
||||
let t1 = _mm512_mul_epu32(hi0, EPSILON);
|
||||
let lo2 = add_no_double_overflow_64_64(lo1, t1);
|
||||
lo2
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn mul(x: __m512i, y: __m512i) -> __m512i {
|
||||
reduce128(mul64_64(x, y))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn square(x: __m512i) -> __m512i {
|
||||
reduce128(square64(x))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn interleave1(x: __m512i, y: __m512i) -> (__m512i, __m512i) {
|
||||
let a = _mm512_unpacklo_epi64(x, y);
|
||||
let b = _mm512_unpackhi_epi64(x, y);
|
||||
(a, b)
|
||||
}
|
||||
|
||||
const INTERLEAVE2_IDX_A: __m512i = unsafe {
|
||||
transmute([
|
||||
0o00u64, 0o01u64, 0o10u64, 0o11u64, 0o04u64, 0o05u64, 0o14u64, 0o15u64,
|
||||
])
|
||||
};
|
||||
const INTERLEAVE2_IDX_B: __m512i = unsafe {
|
||||
transmute([
|
||||
0o02u64, 0o03u64, 0o12u64, 0o13u64, 0o06u64, 0o07u64, 0o16u64, 0o17u64,
|
||||
])
|
||||
};
|
||||
|
||||
#[inline]
|
||||
unsafe fn interleave2(x: __m512i, y: __m512i) -> (__m512i, __m512i) {
|
||||
let a = _mm512_permutex2var_epi64(x, INTERLEAVE2_IDX_A, y);
|
||||
let b = _mm512_permutex2var_epi64(x, INTERLEAVE2_IDX_B, y);
|
||||
(a, b)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn interleave4(x: __m512i, y: __m512i) -> (__m512i, __m512i) {
|
||||
let a = _mm512_shuffle_i64x2::<0x44>(x, y);
|
||||
let b = _mm512_shuffle_i64x2::<0xee>(x, y);
|
||||
(a, b)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::arch::x86_64::avx512_goldilocks_field::Avx512GoldilocksField;
|
||||
use crate::field_types::PrimeField;
|
||||
use crate::goldilocks_field::GoldilocksField;
|
||||
use crate::ops::Square;
|
||||
use crate::packed_field::PackedField;
|
||||
|
||||
fn test_vals_a() -> [GoldilocksField; 8] {
|
||||
[
|
||||
GoldilocksField::from_noncanonical_u64(14479013849828404771),
|
||||
GoldilocksField::from_noncanonical_u64(9087029921428221768),
|
||||
GoldilocksField::from_noncanonical_u64(2441288194761790662),
|
||||
GoldilocksField::from_noncanonical_u64(5646033492608483824),
|
||||
GoldilocksField::from_noncanonical_u64(2779181197214900072),
|
||||
GoldilocksField::from_noncanonical_u64(2989742820063487116),
|
||||
GoldilocksField::from_noncanonical_u64(727880025589250743),
|
||||
GoldilocksField::from_noncanonical_u64(3803926346107752679),
|
||||
]
|
||||
}
|
||||
fn test_vals_b() -> [GoldilocksField; 8] {
|
||||
[
|
||||
GoldilocksField::from_noncanonical_u64(17891926589593242302),
|
||||
GoldilocksField::from_noncanonical_u64(11009798273260028228),
|
||||
GoldilocksField::from_noncanonical_u64(2028722748960791447),
|
||||
GoldilocksField::from_noncanonical_u64(7929433601095175579),
|
||||
GoldilocksField::from_noncanonical_u64(6632528436085461172),
|
||||
GoldilocksField::from_noncanonical_u64(2145438710786785567),
|
||||
GoldilocksField::from_noncanonical_u64(11821483668392863016),
|
||||
GoldilocksField::from_noncanonical_u64(15638272883309521929),
|
||||
]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add() {
|
||||
let a_arr = test_vals_a();
|
||||
let b_arr = test_vals_b();
|
||||
|
||||
let packed_a = Avx512GoldilocksField::from_arr(a_arr);
|
||||
let packed_b = Avx512GoldilocksField::from_arr(b_arr);
|
||||
let packed_res = packed_a + packed_b;
|
||||
let arr_res = packed_res.as_arr();
|
||||
|
||||
let expected = a_arr.iter().zip(b_arr).map(|(&a, b)| a + b);
|
||||
for (exp, res) in expected.zip(arr_res) {
|
||||
assert_eq!(res, exp);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mul() {
|
||||
let a_arr = test_vals_a();
|
||||
let b_arr = test_vals_b();
|
||||
|
||||
let packed_a = Avx512GoldilocksField::from_arr(a_arr);
|
||||
let packed_b = Avx512GoldilocksField::from_arr(b_arr);
|
||||
let packed_res = packed_a * packed_b;
|
||||
let arr_res = packed_res.as_arr();
|
||||
|
||||
let expected = a_arr.iter().zip(b_arr).map(|(&a, b)| a * b);
|
||||
for (exp, res) in expected.zip(arr_res) {
|
||||
assert_eq!(res, exp);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_square() {
|
||||
let a_arr = test_vals_a();
|
||||
|
||||
let packed_a = Avx512GoldilocksField::from_arr(a_arr);
|
||||
let packed_res = packed_a.square();
|
||||
let arr_res = packed_res.as_arr();
|
||||
|
||||
let expected = a_arr.iter().map(|&a| a.square());
|
||||
for (exp, res) in expected.zip(arr_res) {
|
||||
assert_eq!(res, exp);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_neg() {
|
||||
let a_arr = test_vals_a();
|
||||
|
||||
let packed_a = Avx512GoldilocksField::from_arr(a_arr);
|
||||
let packed_res = -packed_a;
|
||||
let arr_res = packed_res.as_arr();
|
||||
|
||||
let expected = a_arr.iter().map(|&a| -a);
|
||||
for (exp, res) in expected.zip(arr_res) {
|
||||
assert_eq!(res, exp);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sub() {
|
||||
let a_arr = test_vals_a();
|
||||
let b_arr = test_vals_b();
|
||||
|
||||
let packed_a = Avx512GoldilocksField::from_arr(a_arr);
|
||||
let packed_b = Avx512GoldilocksField::from_arr(b_arr);
|
||||
let packed_res = packed_a - packed_b;
|
||||
let arr_res = packed_res.as_arr();
|
||||
|
||||
let expected = a_arr.iter().zip(b_arr).map(|(&a, b)| a - b);
|
||||
for (exp, res) in expected.zip(arr_res) {
|
||||
assert_eq!(res, exp);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interleave_is_involution() {
|
||||
let a_arr = test_vals_a();
|
||||
let b_arr = test_vals_b();
|
||||
|
||||
let packed_a = Avx512GoldilocksField::from_arr(a_arr);
|
||||
let packed_b = Avx512GoldilocksField::from_arr(b_arr);
|
||||
{
|
||||
// Interleave, then deinterleave.
|
||||
let (x, y) = packed_a.interleave(packed_b, 1);
|
||||
let (res_a, res_b) = x.interleave(y, 1);
|
||||
assert_eq!(res_a.as_arr(), a_arr);
|
||||
assert_eq!(res_b.as_arr(), b_arr);
|
||||
}
|
||||
{
|
||||
let (x, y) = packed_a.interleave(packed_b, 2);
|
||||
let (res_a, res_b) = x.interleave(y, 2);
|
||||
assert_eq!(res_a.as_arr(), a_arr);
|
||||
assert_eq!(res_b.as_arr(), b_arr);
|
||||
}
|
||||
{
|
||||
let (x, y) = packed_a.interleave(packed_b, 4);
|
||||
let (res_a, res_b) = x.interleave(y, 4);
|
||||
assert_eq!(res_a.as_arr(), a_arr);
|
||||
assert_eq!(res_b.as_arr(), b_arr);
|
||||
}
|
||||
{
|
||||
let (x, y) = packed_a.interleave(packed_b, 8);
|
||||
let (res_a, res_b) = x.interleave(y, 8);
|
||||
assert_eq!(res_a.as_arr(), a_arr);
|
||||
assert_eq!(res_b.as_arr(), b_arr);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interleave() {
|
||||
let in_a: [GoldilocksField; 8] = [
|
||||
GoldilocksField::from_noncanonical_u64(00),
|
||||
GoldilocksField::from_noncanonical_u64(01),
|
||||
GoldilocksField::from_noncanonical_u64(02),
|
||||
GoldilocksField::from_noncanonical_u64(03),
|
||||
GoldilocksField::from_noncanonical_u64(04),
|
||||
GoldilocksField::from_noncanonical_u64(05),
|
||||
GoldilocksField::from_noncanonical_u64(06),
|
||||
GoldilocksField::from_noncanonical_u64(07),
|
||||
];
|
||||
let in_b: [GoldilocksField; 8] = [
|
||||
GoldilocksField::from_noncanonical_u64(10),
|
||||
GoldilocksField::from_noncanonical_u64(11),
|
||||
GoldilocksField::from_noncanonical_u64(12),
|
||||
GoldilocksField::from_noncanonical_u64(13),
|
||||
GoldilocksField::from_noncanonical_u64(14),
|
||||
GoldilocksField::from_noncanonical_u64(15),
|
||||
GoldilocksField::from_noncanonical_u64(16),
|
||||
GoldilocksField::from_noncanonical_u64(17),
|
||||
];
|
||||
let int1_a: [GoldilocksField; 8] = [
|
||||
GoldilocksField::from_noncanonical_u64(00),
|
||||
GoldilocksField::from_noncanonical_u64(10),
|
||||
GoldilocksField::from_noncanonical_u64(02),
|
||||
GoldilocksField::from_noncanonical_u64(12),
|
||||
GoldilocksField::from_noncanonical_u64(04),
|
||||
GoldilocksField::from_noncanonical_u64(14),
|
||||
GoldilocksField::from_noncanonical_u64(06),
|
||||
GoldilocksField::from_noncanonical_u64(16),
|
||||
];
|
||||
let int1_b: [GoldilocksField; 8] = [
|
||||
GoldilocksField::from_noncanonical_u64(01),
|
||||
GoldilocksField::from_noncanonical_u64(11),
|
||||
GoldilocksField::from_noncanonical_u64(03),
|
||||
GoldilocksField::from_noncanonical_u64(13),
|
||||
GoldilocksField::from_noncanonical_u64(05),
|
||||
GoldilocksField::from_noncanonical_u64(15),
|
||||
GoldilocksField::from_noncanonical_u64(07),
|
||||
GoldilocksField::from_noncanonical_u64(17),
|
||||
];
|
||||
let int2_a: [GoldilocksField; 8] = [
|
||||
GoldilocksField::from_noncanonical_u64(00),
|
||||
GoldilocksField::from_noncanonical_u64(01),
|
||||
GoldilocksField::from_noncanonical_u64(10),
|
||||
GoldilocksField::from_noncanonical_u64(11),
|
||||
GoldilocksField::from_noncanonical_u64(04),
|
||||
GoldilocksField::from_noncanonical_u64(05),
|
||||
GoldilocksField::from_noncanonical_u64(14),
|
||||
GoldilocksField::from_noncanonical_u64(15),
|
||||
];
|
||||
let int2_b: [GoldilocksField; 8] = [
|
||||
GoldilocksField::from_noncanonical_u64(02),
|
||||
GoldilocksField::from_noncanonical_u64(03),
|
||||
GoldilocksField::from_noncanonical_u64(12),
|
||||
GoldilocksField::from_noncanonical_u64(13),
|
||||
GoldilocksField::from_noncanonical_u64(06),
|
||||
GoldilocksField::from_noncanonical_u64(07),
|
||||
GoldilocksField::from_noncanonical_u64(16),
|
||||
GoldilocksField::from_noncanonical_u64(17),
|
||||
];
|
||||
let int4_a: [GoldilocksField; 8] = [
|
||||
GoldilocksField::from_noncanonical_u64(00),
|
||||
GoldilocksField::from_noncanonical_u64(01),
|
||||
GoldilocksField::from_noncanonical_u64(02),
|
||||
GoldilocksField::from_noncanonical_u64(03),
|
||||
GoldilocksField::from_noncanonical_u64(10),
|
||||
GoldilocksField::from_noncanonical_u64(11),
|
||||
GoldilocksField::from_noncanonical_u64(12),
|
||||
GoldilocksField::from_noncanonical_u64(13),
|
||||
];
|
||||
let int4_b: [GoldilocksField; 8] = [
|
||||
GoldilocksField::from_noncanonical_u64(04),
|
||||
GoldilocksField::from_noncanonical_u64(05),
|
||||
GoldilocksField::from_noncanonical_u64(06),
|
||||
GoldilocksField::from_noncanonical_u64(07),
|
||||
GoldilocksField::from_noncanonical_u64(14),
|
||||
GoldilocksField::from_noncanonical_u64(15),
|
||||
GoldilocksField::from_noncanonical_u64(16),
|
||||
GoldilocksField::from_noncanonical_u64(17),
|
||||
];
|
||||
|
||||
let packed_a = Avx512GoldilocksField::from_arr(in_a);
|
||||
let packed_b = Avx512GoldilocksField::from_arr(in_b);
|
||||
{
|
||||
let (x1, y1) = packed_a.interleave(packed_b, 1);
|
||||
assert_eq!(x1.as_arr(), int1_a);
|
||||
assert_eq!(y1.as_arr(), int1_b);
|
||||
}
|
||||
{
|
||||
let (x2, y2) = packed_a.interleave(packed_b, 2);
|
||||
assert_eq!(x2.as_arr(), int2_a);
|
||||
assert_eq!(y2.as_arr(), int2_b);
|
||||
}
|
||||
{
|
||||
let (x4, y4) = packed_a.interleave(packed_b, 4);
|
||||
assert_eq!(x4.as_arr(), int4_a);
|
||||
assert_eq!(y4.as_arr(), int4_b);
|
||||
}
|
||||
{
|
||||
let (x8, y8) = packed_a.interleave(packed_b, 8);
|
||||
assert_eq!(x8.as_arr(), in_a);
|
||||
assert_eq!(y8.as_arr(), in_b);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,2 +1,20 @@
|
||||
#[cfg(target_feature = "avx2")]
|
||||
#[cfg(all(
|
||||
target_feature = "avx2",
|
||||
not(all(
|
||||
target_feature = "avx512bw",
|
||||
target_feature = "avx512cd",
|
||||
target_feature = "avx512dq",
|
||||
target_feature = "avx512f",
|
||||
target_feature = "avx512vl"
|
||||
))
|
||||
))]
|
||||
pub mod avx2_goldilocks_field;
|
||||
|
||||
#[cfg(all(
|
||||
target_feature = "avx512bw",
|
||||
target_feature = "avx512cd",
|
||||
target_feature = "avx512dq",
|
||||
target_feature = "avx512f",
|
||||
target_feature = "avx512vl"
|
||||
))]
|
||||
pub mod avx512_goldilocks_field;
|
||||
|
||||
@ -7,6 +7,7 @@
|
||||
#![allow(clippy::return_self_not_must_use)]
|
||||
#![feature(generic_const_exprs)]
|
||||
#![feature(specialization)]
|
||||
#![feature(stdsimd)]
|
||||
|
||||
pub(crate) mod arch;
|
||||
pub mod batch_util;
|
||||
|
||||
@ -12,7 +12,29 @@ impl<F: Field> Packable for F {
|
||||
default type Packing = Self;
|
||||
}
|
||||
|
||||
#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
|
||||
#[cfg(all(
|
||||
target_arch = "x86_64",
|
||||
target_feature = "avx2",
|
||||
not(all(
|
||||
target_feature = "avx512bw",
|
||||
target_feature = "avx512cd",
|
||||
target_feature = "avx512dq",
|
||||
target_feature = "avx512f",
|
||||
target_feature = "avx512vl"
|
||||
))
|
||||
))]
|
||||
impl Packable for crate::goldilocks_field::GoldilocksField {
|
||||
type Packing = crate::arch::x86_64::avx2_goldilocks_field::Avx2GoldilocksField;
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
target_arch = "x86_64",
|
||||
target_feature = "avx512bw",
|
||||
target_feature = "avx512cd",
|
||||
target_feature = "avx512dq",
|
||||
target_feature = "avx512f",
|
||||
target_feature = "avx512vl"
|
||||
))]
|
||||
impl Packable for crate::goldilocks_field::GoldilocksField {
|
||||
type Packing = crate::arch::x86_64::avx512_goldilocks_field::Avx512GoldilocksField;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user