mirror of
https://github.com/logos-storage/plonky2.git
synced 2026-01-07 16:23:12 +00:00
Type tweaks for packed types (#387)
* PackedField tweaks * AVX2 changes * FFT fixes * tests * test fixes * Lints * Rename things for clarity * Minor interleave fixes * Minor interleave fixes the sequel * Rebase fixes * Docs * Daniel PR comments
This commit is contained in:
parent
04c1ea2531
commit
bb029db2a7
@ -98,12 +98,12 @@ pub fn ifft_with_options<F: Field>(
|
|||||||
/// Generic FFT implementation that works with both scalar and packed inputs.
|
/// Generic FFT implementation that works with both scalar and packed inputs.
|
||||||
#[unroll_for_loops]
|
#[unroll_for_loops]
|
||||||
fn fft_classic_simd<P: PackedField>(
|
fn fft_classic_simd<P: PackedField>(
|
||||||
values: &mut [P::FieldType],
|
values: &mut [P::Scalar],
|
||||||
r: usize,
|
r: usize,
|
||||||
lg_n: usize,
|
lg_n: usize,
|
||||||
root_table: &FftRootTable<P::FieldType>,
|
root_table: &FftRootTable<P::Scalar>,
|
||||||
) {
|
) {
|
||||||
let lg_packed_width = P::LOG2_WIDTH; // 0 when P is a scalar.
|
let lg_packed_width = log2_strict(P::WIDTH); // 0 when P is a scalar.
|
||||||
let packed_values = P::pack_slice_mut(values);
|
let packed_values = P::pack_slice_mut(values);
|
||||||
let packed_n = packed_values.len();
|
let packed_n = packed_values.len();
|
||||||
debug_assert!(packed_n == 1 << (lg_n - lg_packed_width));
|
debug_assert!(packed_n == 1 << (lg_n - lg_packed_width));
|
||||||
@ -121,19 +121,18 @@ fn fft_classic_simd<P: PackedField>(
|
|||||||
let half_m = 1 << lg_half_m;
|
let half_m = 1 << lg_half_m;
|
||||||
|
|
||||||
// Set omega to root_table[lg_half_m][0..half_m] but repeated.
|
// Set omega to root_table[lg_half_m][0..half_m] but repeated.
|
||||||
let mut omega_vec = P::zero().to_vec();
|
let mut omega = P::ZERO;
|
||||||
for (j, omega) in omega_vec.iter_mut().enumerate() {
|
for (j, omega_j) in omega.as_slice_mut().iter_mut().enumerate() {
|
||||||
*omega = root_table[lg_half_m][j % half_m];
|
*omega_j = root_table[lg_half_m][j % half_m];
|
||||||
}
|
}
|
||||||
let omega = P::from_slice(&omega_vec[..]);
|
|
||||||
|
|
||||||
for k in (0..packed_n).step_by(2) {
|
for k in (0..packed_n).step_by(2) {
|
||||||
// We have two vectors and want to do math on pairs of adjacent elements (or for
|
// We have two vectors and want to do math on pairs of adjacent elements (or for
|
||||||
// lg_half_m > 0, pairs of adjacent blocks of elements). .interleave does the
|
// lg_half_m > 0, pairs of adjacent blocks of elements). .interleave does the
|
||||||
// appropriate shuffling and is its own inverse.
|
// appropriate shuffling and is its own inverse.
|
||||||
let (u, v) = packed_values[k].interleave(packed_values[k + 1], lg_half_m);
|
let (u, v) = packed_values[k].interleave(packed_values[k + 1], half_m);
|
||||||
let t = omega * v;
|
let t = omega * v;
|
||||||
(packed_values[k], packed_values[k + 1]) = (u + t).interleave(u - t, lg_half_m);
|
(packed_values[k], packed_values[k + 1]) = (u + t).interleave(u - t, half_m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -197,13 +196,13 @@ pub(crate) fn fft_classic<F: Field>(input: &[F], r: usize, root_table: &FftRootT
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let lg_packed_width = <F as Packable>::PackedType::LOG2_WIDTH;
|
let lg_packed_width = log2_strict(<F as Packable>::Packing::WIDTH);
|
||||||
if lg_n <= lg_packed_width {
|
if lg_n <= lg_packed_width {
|
||||||
// Need the slice to be at least the width of two packed vectors for the vectorized version
|
// Need the slice to be at least the width of two packed vectors for the vectorized version
|
||||||
// to work. Do this tiny problem in scalar.
|
// to work. Do this tiny problem in scalar.
|
||||||
fft_classic_simd::<F>(&mut values[..], r, lg_n, root_table);
|
fft_classic_simd::<F>(&mut values[..], r, lg_n, root_table);
|
||||||
} else {
|
} else {
|
||||||
fft_classic_simd::<<F as Packable>::PackedType>(&mut values[..], r, lg_n, root_table);
|
fft_classic_simd::<<F as Packable>::Packing>(&mut values[..], r, lg_n, root_table);
|
||||||
}
|
}
|
||||||
values
|
values
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,14 +5,14 @@ use crate::field::packed_field::PackedField;
|
|||||||
/// PackedField for a particular Field (e.g. every Field is also a PackedField), but this is the
|
/// PackedField for a particular Field (e.g. every Field is also a PackedField), but this is the
|
||||||
/// recommended one. The recommended packing varies by target_arch and target_feature.
|
/// recommended one. The recommended packing varies by target_arch and target_feature.
|
||||||
pub trait Packable: Field {
|
pub trait Packable: Field {
|
||||||
type PackedType: PackedField<FieldType = Self>;
|
type Packing: PackedField<Scalar = Self>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: Field> Packable for F {
|
impl<F: Field> Packable for F {
|
||||||
default type PackedType = Self;
|
default type Packing = Self;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_feature = "avx2")]
|
#[cfg(target_feature = "avx2")]
|
||||||
impl Packable for crate::field::goldilocks_field::GoldilocksField {
|
impl Packable for crate::field::goldilocks_field::GoldilocksField {
|
||||||
type PackedType = crate::field::packed_avx2::PackedGoldilocksAVX2;
|
type Packing = crate::field::packed_avx2::PackedGoldilocksAvx2;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,20 +2,20 @@ use core::arch::x86_64::*;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fmt::{Debug, Formatter};
|
use std::fmt::{Debug, Formatter};
|
||||||
use std::iter::{Product, Sum};
|
use std::iter::{Product, Sum};
|
||||||
use std::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign};
|
use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign};
|
||||||
|
|
||||||
use crate::field::field_types::PrimeField;
|
use crate::field::field_types::PrimeField;
|
||||||
use crate::field::packed_avx2::common::{
|
use crate::field::packed_avx2::common::{
|
||||||
add_no_canonicalize_64_64s_s, epsilon, field_order, shift, ReducibleAVX2,
|
add_no_canonicalize_64_64s_s, epsilon, field_order, shift, ReducibleAvx2,
|
||||||
};
|
};
|
||||||
use crate::field::packed_field::PackedField;
|
use crate::field::packed_field::PackedField;
|
||||||
|
|
||||||
// PackedPrimeField wraps an array of four u64s, with the new and get methods to convert that
|
// Avx2PrimeField wraps an array of four u64s, with the new and get methods to convert that
|
||||||
// array to and from __m256i, which is the type we actually operate on. This indirection is a
|
// array to and from __m256i, which is the type we actually operate on. This indirection is a
|
||||||
// terrible trick to change PackedPrimeField's alignment.
|
// terrible trick to change Avx2PrimeField's alignment.
|
||||||
// We'd like to be able to cast slices of PrimeField to slices of PackedPrimeField. Rust
|
// We'd like to be able to cast slices of PrimeField to slices of Avx2PrimeField. Rust
|
||||||
// aligns __m256i to 32 bytes but PrimeField has a lower alignment. That alignment extends to
|
// aligns __m256i to 32 bytes but PrimeField has a lower alignment. That alignment extends to
|
||||||
// PackedPrimeField and it appears that it cannot be lowered with #[repr(C, blah)]. It is
|
// Avx2PrimeField and it appears that it cannot be lowered with #[repr(C, blah)]. It is
|
||||||
// important for Rust not to assume 32-byte alignment, so we cannot wrap __m256i directly.
|
// important for Rust not to assume 32-byte alignment, so we cannot wrap __m256i directly.
|
||||||
// There are two versions of vectorized load/store instructions on x86: aligned (vmovaps and
|
// There are two versions of vectorized load/store instructions on x86: aligned (vmovaps and
|
||||||
// friends) and unaligned (vmovups etc.). The difference between them is that aligned loads and
|
// friends) and unaligned (vmovups etc.). The difference between them is that aligned loads and
|
||||||
@ -23,12 +23,12 @@ use crate::field::packed_field::PackedField;
|
|||||||
// were faster, and although this is no longer the case, compilers prefer the aligned versions if
|
// were faster, and although this is no longer the case, compilers prefer the aligned versions if
|
||||||
// they know that the address is aligned. Using aligned instructions on unaligned addresses leads to
|
// they know that the address is aligned. Using aligned instructions on unaligned addresses leads to
|
||||||
// bugs that can be frustrating to diagnose. Hence, we can't have Rust assuming alignment, and
|
// bugs that can be frustrating to diagnose. Hence, we can't have Rust assuming alignment, and
|
||||||
// therefore PackedPrimeField wraps [F; 4] and not __m256i.
|
// therefore Avx2PrimeField wraps [F; 4] and not __m256i.
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
#[repr(transparent)]
|
#[repr(transparent)]
|
||||||
pub struct PackedPrimeField<F: ReducibleAVX2>(pub [F; 4]);
|
pub struct Avx2PrimeField<F: ReducibleAvx2>(pub [F; 4]);
|
||||||
|
|
||||||
impl<F: ReducibleAVX2> PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn new(x: __m256i) -> Self {
|
fn new(x: __m256i) -> Self {
|
||||||
let mut obj = Self([F::ZERO; 4]);
|
let mut obj = Self([F::ZERO; 4]);
|
||||||
@ -45,75 +45,109 @@ impl<F: ReducibleAVX2> PackedPrimeField<F> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: ReducibleAVX2> Add<Self> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Add<Self> for Avx2PrimeField<F> {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn add(self, rhs: Self) -> Self {
|
fn add(self, rhs: Self) -> Self {
|
||||||
Self::new(unsafe { add::<F>(self.get(), rhs.get()) })
|
Self::new(unsafe { add::<F>(self.get(), rhs.get()) })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<F: ReducibleAVX2> Add<F> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Add<F> for Avx2PrimeField<F> {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn add(self, rhs: F) -> Self {
|
fn add(self, rhs: F) -> Self {
|
||||||
self + Self::broadcast(rhs)
|
self + Self::from(rhs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<F: ReducibleAVX2> AddAssign<Self> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Add<Avx2PrimeField<F>> for <Avx2PrimeField<F> as PackedField>::Scalar {
|
||||||
|
type Output = Avx2PrimeField<F>;
|
||||||
|
#[inline]
|
||||||
|
fn add(self, rhs: Self::Output) -> Self::Output {
|
||||||
|
Self::Output::from(self) + rhs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<F: ReducibleAvx2> AddAssign<Self> for Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn add_assign(&mut self, rhs: Self) {
|
fn add_assign(&mut self, rhs: Self) {
|
||||||
*self = *self + rhs;
|
*self = *self + rhs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<F: ReducibleAVX2> AddAssign<F> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> AddAssign<F> for Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn add_assign(&mut self, rhs: F) {
|
fn add_assign(&mut self, rhs: F) {
|
||||||
*self = *self + rhs;
|
*self = *self + rhs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: ReducibleAVX2> Debug for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Debug for Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||||
write!(f, "({:?})", self.get())
|
write!(f, "({:?})", self.get())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: ReducibleAVX2> Default for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Default for Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self::zero()
|
Self::ZERO
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: ReducibleAVX2> Mul<Self> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Div<F> for Avx2PrimeField<F> {
|
||||||
|
type Output = Self;
|
||||||
|
#[inline]
|
||||||
|
fn div(self, rhs: F) -> Self {
|
||||||
|
self * rhs.inverse()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<F: ReducibleAvx2> DivAssign<F> for Avx2PrimeField<F> {
|
||||||
|
#[inline]
|
||||||
|
fn div_assign(&mut self, rhs: F) {
|
||||||
|
*self *= rhs.inverse();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F: ReducibleAvx2> From<F> for Avx2PrimeField<F> {
|
||||||
|
fn from(x: F) -> Self {
|
||||||
|
Self([x; 4])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<F: ReducibleAvx2> Mul<Self> for Avx2PrimeField<F> {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn mul(self, rhs: Self) -> Self {
|
fn mul(self, rhs: Self) -> Self {
|
||||||
Self::new(unsafe { mul::<F>(self.get(), rhs.get()) })
|
Self::new(unsafe { mul::<F>(self.get(), rhs.get()) })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<F: ReducibleAVX2> Mul<F> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Mul<F> for Avx2PrimeField<F> {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn mul(self, rhs: F) -> Self {
|
fn mul(self, rhs: F) -> Self {
|
||||||
self * Self::broadcast(rhs)
|
self * Self::from(rhs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<F: ReducibleAVX2> MulAssign<Self> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Mul<Avx2PrimeField<F>> for <Avx2PrimeField<F> as PackedField>::Scalar {
|
||||||
|
type Output = Avx2PrimeField<F>;
|
||||||
|
#[inline]
|
||||||
|
fn mul(self, rhs: Avx2PrimeField<F>) -> Self::Output {
|
||||||
|
Self::Output::from(self) * rhs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<F: ReducibleAvx2> MulAssign<Self> for Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn mul_assign(&mut self, rhs: Self) {
|
fn mul_assign(&mut self, rhs: Self) {
|
||||||
*self = *self * rhs;
|
*self = *self * rhs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<F: ReducibleAVX2> MulAssign<F> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> MulAssign<F> for Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn mul_assign(&mut self, rhs: F) {
|
fn mul_assign(&mut self, rhs: F) {
|
||||||
*self = *self * rhs;
|
*self = *self * rhs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: ReducibleAVX2> Neg for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Neg for Avx2PrimeField<F> {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn neg(self) -> Self {
|
fn neg(self) -> Self {
|
||||||
@ -121,52 +155,59 @@ impl<F: ReducibleAVX2> Neg for PackedPrimeField<F> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: ReducibleAVX2> Product for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Product for Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
|
fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
|
||||||
iter.reduce(|x, y| x * y).unwrap_or(Self::one())
|
iter.reduce(|x, y| x * y).unwrap_or(Self::ONE)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: ReducibleAVX2> PackedField for PackedPrimeField<F> {
|
unsafe impl<F: ReducibleAvx2> PackedField for Avx2PrimeField<F> {
|
||||||
const LOG2_WIDTH: usize = 2;
|
const WIDTH: usize = 4;
|
||||||
|
|
||||||
type FieldType = F;
|
type Scalar = F;
|
||||||
|
type PackedPrimeField = Avx2PrimeField<F>;
|
||||||
|
|
||||||
|
const ZERO: Self = Self([F::ZERO; 4]);
|
||||||
|
const ONE: Self = Self([F::ONE; 4]);
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn broadcast(x: F) -> Self {
|
fn from_arr(arr: [Self::Scalar; Self::WIDTH]) -> Self {
|
||||||
Self([x; 4])
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn from_arr(arr: [F; Self::WIDTH]) -> Self {
|
|
||||||
Self(arr)
|
Self(arr)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn to_arr(&self) -> [F; Self::WIDTH] {
|
fn as_arr(&self) -> [Self::Scalar; Self::WIDTH] {
|
||||||
self.0
|
self.0
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn from_slice(slice: &[F]) -> Self {
|
fn from_slice(slice: &[Self::Scalar]) -> &Self {
|
||||||
assert!(slice.len() == 4);
|
assert_eq!(slice.len(), Self::WIDTH);
|
||||||
Self([slice[0], slice[1], slice[2], slice[3]])
|
unsafe { &*slice.as_ptr().cast() }
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
fn from_slice_mut(slice: &mut [Self::Scalar]) -> &mut Self {
|
||||||
|
assert_eq!(slice.len(), Self::WIDTH);
|
||||||
|
unsafe { &mut *slice.as_mut_ptr().cast() }
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
fn as_slice(&self) -> &[Self::Scalar] {
|
||||||
|
&self.0[..]
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
fn as_slice_mut(&mut self) -> &mut [Self::Scalar] {
|
||||||
|
&mut self.0[..]
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn to_vec(&self) -> Vec<F> {
|
fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) {
|
||||||
self.0.into()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn interleave(&self, other: Self, r: usize) -> (Self, Self) {
|
|
||||||
let (v0, v1) = (self.get(), other.get());
|
let (v0, v1) = (self.get(), other.get());
|
||||||
let (res0, res1) = match r {
|
let (res0, res1) = match block_len {
|
||||||
0 => unsafe { interleave0(v0, v1) },
|
|
||||||
1 => unsafe { interleave1(v0, v1) },
|
1 => unsafe { interleave1(v0, v1) },
|
||||||
2 => (v0, v1),
|
2 => unsafe { interleave2(v0, v1) },
|
||||||
_ => panic!("r cannot be more than LOG2_WIDTH"),
|
4 => (v0, v1),
|
||||||
|
_ => panic!("unsupported block_len"),
|
||||||
};
|
};
|
||||||
(Self::new(res0), Self::new(res1))
|
(Self::new(res0), Self::new(res1))
|
||||||
}
|
}
|
||||||
@ -177,37 +218,44 @@ impl<F: ReducibleAVX2> PackedField for PackedPrimeField<F> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: ReducibleAVX2> Sub<Self> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Sub<Self> for Avx2PrimeField<F> {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sub(self, rhs: Self) -> Self {
|
fn sub(self, rhs: Self) -> Self {
|
||||||
Self::new(unsafe { sub::<F>(self.get(), rhs.get()) })
|
Self::new(unsafe { sub::<F>(self.get(), rhs.get()) })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<F: ReducibleAVX2> Sub<F> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Sub<F> for Avx2PrimeField<F> {
|
||||||
type Output = Self;
|
type Output = Self;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sub(self, rhs: F) -> Self {
|
fn sub(self, rhs: F) -> Self {
|
||||||
self - Self::broadcast(rhs)
|
self - Self::from(rhs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<F: ReducibleAVX2> SubAssign<Self> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Sub<Avx2PrimeField<F>> for <Avx2PrimeField<F> as PackedField>::Scalar {
|
||||||
|
type Output = Avx2PrimeField<F>;
|
||||||
|
#[inline]
|
||||||
|
fn sub(self, rhs: Avx2PrimeField<F>) -> Self::Output {
|
||||||
|
Self::Output::from(self) - rhs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<F: ReducibleAvx2> SubAssign<Self> for Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sub_assign(&mut self, rhs: Self) {
|
fn sub_assign(&mut self, rhs: Self) {
|
||||||
*self = *self - rhs;
|
*self = *self - rhs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<F: ReducibleAVX2> SubAssign<F> for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> SubAssign<F> for Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sub_assign(&mut self, rhs: F) {
|
fn sub_assign(&mut self, rhs: F) {
|
||||||
*self = *self - rhs;
|
*self = *self - rhs;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: ReducibleAVX2> Sum for PackedPrimeField<F> {
|
impl<F: ReducibleAvx2> Sum for Avx2PrimeField<F> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
|
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
|
||||||
iter.reduce(|x, y| x + y).unwrap_or(Self::zero())
|
iter.reduce(|x, y| x + y).unwrap_or(Self::ZERO)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -367,25 +415,25 @@ unsafe fn square64(x: __m256i) -> (__m256i, __m256i) {
|
|||||||
|
|
||||||
/// Multiply two integers modulo FIELD_ORDER.
|
/// Multiply two integers modulo FIELD_ORDER.
|
||||||
#[inline]
|
#[inline]
|
||||||
unsafe fn mul<F: ReducibleAVX2>(x: __m256i, y: __m256i) -> __m256i {
|
unsafe fn mul<F: ReducibleAvx2>(x: __m256i, y: __m256i) -> __m256i {
|
||||||
F::reduce128(mul64_64(x, y))
|
F::reduce128(mul64_64(x, y))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Square an integer modulo FIELD_ORDER.
|
/// Square an integer modulo FIELD_ORDER.
|
||||||
#[inline]
|
#[inline]
|
||||||
unsafe fn square<F: ReducibleAVX2>(x: __m256i) -> __m256i {
|
unsafe fn square<F: ReducibleAvx2>(x: __m256i) -> __m256i {
|
||||||
F::reduce128(square64(x))
|
F::reduce128(square64(x))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
unsafe fn interleave0(x: __m256i, y: __m256i) -> (__m256i, __m256i) {
|
unsafe fn interleave1(x: __m256i, y: __m256i) -> (__m256i, __m256i) {
|
||||||
let a = _mm256_unpacklo_epi64(x, y);
|
let a = _mm256_unpacklo_epi64(x, y);
|
||||||
let b = _mm256_unpackhi_epi64(x, y);
|
let b = _mm256_unpackhi_epi64(x, y);
|
||||||
(a, b)
|
(a, b)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
unsafe fn interleave1(x: __m256i, y: __m256i) -> (__m256i, __m256i) {
|
unsafe fn interleave2(x: __m256i, y: __m256i) -> (__m256i, __m256i) {
|
||||||
let y_lo = _mm256_castsi256_si128(y); // This has 0 cost.
|
let y_lo = _mm256_castsi256_si128(y); // This has 0 cost.
|
||||||
|
|
||||||
// 1 places y_lo in the high half of x; 0 would place it in the lower half.
|
// 1 places y_lo in the high half of x; 0 would place it in the lower half.
|
||||||
@ -2,7 +2,7 @@ use core::arch::x86_64::*;
|
|||||||
|
|
||||||
use crate::field::field_types::PrimeField;
|
use crate::field::field_types::PrimeField;
|
||||||
|
|
||||||
pub trait ReducibleAVX2: PrimeField {
|
pub trait ReducibleAvx2: PrimeField {
|
||||||
unsafe fn reduce128(x: (__m256i, __m256i)) -> __m256i;
|
unsafe fn reduce128(x: (__m256i, __m256i)) -> __m256i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -2,12 +2,12 @@ use core::arch::x86_64::*;
|
|||||||
|
|
||||||
use crate::field::goldilocks_field::GoldilocksField;
|
use crate::field::goldilocks_field::GoldilocksField;
|
||||||
use crate::field::packed_avx2::common::{
|
use crate::field::packed_avx2::common::{
|
||||||
add_no_canonicalize_64_64s_s, epsilon, shift, sub_no_canonicalize_64s_64_s, ReducibleAVX2,
|
add_no_canonicalize_64_64s_s, epsilon, shift, sub_no_canonicalize_64s_64_s, ReducibleAvx2,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Reduce a u128 modulo FIELD_ORDER. The input is (u64, u64), pre-shifted by 2^63. The result is
|
/// Reduce a u128 modulo FIELD_ORDER. The input is (u64, u64), pre-shifted by 2^63. The result is
|
||||||
/// similarly shifted.
|
/// similarly shifted.
|
||||||
impl ReducibleAVX2 for GoldilocksField {
|
impl ReducibleAvx2 for GoldilocksField {
|
||||||
#[inline]
|
#[inline]
|
||||||
unsafe fn reduce128(x: (__m256i, __m256i)) -> __m256i {
|
unsafe fn reduce128(x: (__m256i, __m256i)) -> __m256i {
|
||||||
let (hi0, lo0) = x;
|
let (hi0, lo0) = x;
|
||||||
|
|||||||
@ -1,21 +1,21 @@
|
|||||||
|
mod avx2_prime_field;
|
||||||
mod common;
|
mod common;
|
||||||
mod goldilocks;
|
mod goldilocks;
|
||||||
mod packed_prime_field;
|
|
||||||
|
|
||||||
use packed_prime_field::PackedPrimeField;
|
use avx2_prime_field::Avx2PrimeField;
|
||||||
|
|
||||||
use crate::field::goldilocks_field::GoldilocksField;
|
use crate::field::goldilocks_field::GoldilocksField;
|
||||||
|
|
||||||
pub type PackedGoldilocksAVX2 = PackedPrimeField<GoldilocksField>;
|
pub type PackedGoldilocksAvx2 = Avx2PrimeField<GoldilocksField>;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::field::goldilocks_field::GoldilocksField;
|
use crate::field::goldilocks_field::GoldilocksField;
|
||||||
use crate::field::packed_avx2::common::ReducibleAVX2;
|
use crate::field::packed_avx2::avx2_prime_field::Avx2PrimeField;
|
||||||
use crate::field::packed_avx2::packed_prime_field::PackedPrimeField;
|
use crate::field::packed_avx2::common::ReducibleAvx2;
|
||||||
use crate::field::packed_field::PackedField;
|
use crate::field::packed_field::PackedField;
|
||||||
|
|
||||||
fn test_vals_a<F: ReducibleAVX2>() -> [F; 4] {
|
fn test_vals_a<F: ReducibleAvx2>() -> [F; 4] {
|
||||||
[
|
[
|
||||||
F::from_noncanonical_u64(14479013849828404771),
|
F::from_noncanonical_u64(14479013849828404771),
|
||||||
F::from_noncanonical_u64(9087029921428221768),
|
F::from_noncanonical_u64(9087029921428221768),
|
||||||
@ -23,7 +23,7 @@ mod tests {
|
|||||||
F::from_noncanonical_u64(5646033492608483824),
|
F::from_noncanonical_u64(5646033492608483824),
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
fn test_vals_b<F: ReducibleAVX2>() -> [F; 4] {
|
fn test_vals_b<F: ReducibleAvx2>() -> [F; 4] {
|
||||||
[
|
[
|
||||||
F::from_noncanonical_u64(17891926589593242302),
|
F::from_noncanonical_u64(17891926589593242302),
|
||||||
F::from_noncanonical_u64(11009798273260028228),
|
F::from_noncanonical_u64(11009798273260028228),
|
||||||
@ -32,17 +32,17 @@ mod tests {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_add<F: ReducibleAVX2>()
|
fn test_add<F: ReducibleAvx2>()
|
||||||
where
|
where
|
||||||
[(); PackedPrimeField::<F>::WIDTH]:,
|
[(); Avx2PrimeField::<F>::WIDTH]:,
|
||||||
{
|
{
|
||||||
let a_arr = test_vals_a::<F>();
|
let a_arr = test_vals_a::<F>();
|
||||||
let b_arr = test_vals_b::<F>();
|
let b_arr = test_vals_b::<F>();
|
||||||
|
|
||||||
let packed_a = PackedPrimeField::<F>::from_arr(a_arr);
|
let packed_a = Avx2PrimeField::<F>::from_arr(a_arr);
|
||||||
let packed_b = PackedPrimeField::<F>::from_arr(b_arr);
|
let packed_b = Avx2PrimeField::<F>::from_arr(b_arr);
|
||||||
let packed_res = packed_a + packed_b;
|
let packed_res = packed_a + packed_b;
|
||||||
let arr_res = packed_res.to_arr();
|
let arr_res = packed_res.as_arr();
|
||||||
|
|
||||||
let expected = a_arr.iter().zip(b_arr).map(|(&a, b)| a + b);
|
let expected = a_arr.iter().zip(b_arr).map(|(&a, b)| a + b);
|
||||||
for (exp, res) in expected.zip(arr_res) {
|
for (exp, res) in expected.zip(arr_res) {
|
||||||
@ -50,17 +50,17 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_mul<F: ReducibleAVX2>()
|
fn test_mul<F: ReducibleAvx2>()
|
||||||
where
|
where
|
||||||
[(); PackedPrimeField::<F>::WIDTH]:,
|
[(); Avx2PrimeField::<F>::WIDTH]:,
|
||||||
{
|
{
|
||||||
let a_arr = test_vals_a::<F>();
|
let a_arr = test_vals_a::<F>();
|
||||||
let b_arr = test_vals_b::<F>();
|
let b_arr = test_vals_b::<F>();
|
||||||
|
|
||||||
let packed_a = PackedPrimeField::<F>::from_arr(a_arr);
|
let packed_a = Avx2PrimeField::<F>::from_arr(a_arr);
|
||||||
let packed_b = PackedPrimeField::<F>::from_arr(b_arr);
|
let packed_b = Avx2PrimeField::<F>::from_arr(b_arr);
|
||||||
let packed_res = packed_a * packed_b;
|
let packed_res = packed_a * packed_b;
|
||||||
let arr_res = packed_res.to_arr();
|
let arr_res = packed_res.as_arr();
|
||||||
|
|
||||||
let expected = a_arr.iter().zip(b_arr).map(|(&a, b)| a * b);
|
let expected = a_arr.iter().zip(b_arr).map(|(&a, b)| a * b);
|
||||||
for (exp, res) in expected.zip(arr_res) {
|
for (exp, res) in expected.zip(arr_res) {
|
||||||
@ -68,15 +68,15 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_square<F: ReducibleAVX2>()
|
fn test_square<F: ReducibleAvx2>()
|
||||||
where
|
where
|
||||||
[(); PackedPrimeField::<F>::WIDTH]:,
|
[(); Avx2PrimeField::<F>::WIDTH]:,
|
||||||
{
|
{
|
||||||
let a_arr = test_vals_a::<F>();
|
let a_arr = test_vals_a::<F>();
|
||||||
|
|
||||||
let packed_a = PackedPrimeField::<F>::from_arr(a_arr);
|
let packed_a = Avx2PrimeField::<F>::from_arr(a_arr);
|
||||||
let packed_res = packed_a.square();
|
let packed_res = packed_a.square();
|
||||||
let arr_res = packed_res.to_arr();
|
let arr_res = packed_res.as_arr();
|
||||||
|
|
||||||
let expected = a_arr.iter().map(|&a| a.square());
|
let expected = a_arr.iter().map(|&a| a.square());
|
||||||
for (exp, res) in expected.zip(arr_res) {
|
for (exp, res) in expected.zip(arr_res) {
|
||||||
@ -84,15 +84,15 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_neg<F: ReducibleAVX2>()
|
fn test_neg<F: ReducibleAvx2>()
|
||||||
where
|
where
|
||||||
[(); PackedPrimeField::<F>::WIDTH]:,
|
[(); Avx2PrimeField::<F>::WIDTH]:,
|
||||||
{
|
{
|
||||||
let a_arr = test_vals_a::<F>();
|
let a_arr = test_vals_a::<F>();
|
||||||
|
|
||||||
let packed_a = PackedPrimeField::<F>::from_arr(a_arr);
|
let packed_a = Avx2PrimeField::<F>::from_arr(a_arr);
|
||||||
let packed_res = -packed_a;
|
let packed_res = -packed_a;
|
||||||
let arr_res = packed_res.to_arr();
|
let arr_res = packed_res.as_arr();
|
||||||
|
|
||||||
let expected = a_arr.iter().map(|&a| -a);
|
let expected = a_arr.iter().map(|&a| -a);
|
||||||
for (exp, res) in expected.zip(arr_res) {
|
for (exp, res) in expected.zip(arr_res) {
|
||||||
@ -100,17 +100,17 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_sub<F: ReducibleAVX2>()
|
fn test_sub<F: ReducibleAvx2>()
|
||||||
where
|
where
|
||||||
[(); PackedPrimeField::<F>::WIDTH]:,
|
[(); Avx2PrimeField::<F>::WIDTH]:,
|
||||||
{
|
{
|
||||||
let a_arr = test_vals_a::<F>();
|
let a_arr = test_vals_a::<F>();
|
||||||
let b_arr = test_vals_b::<F>();
|
let b_arr = test_vals_b::<F>();
|
||||||
|
|
||||||
let packed_a = PackedPrimeField::<F>::from_arr(a_arr);
|
let packed_a = Avx2PrimeField::<F>::from_arr(a_arr);
|
||||||
let packed_b = PackedPrimeField::<F>::from_arr(b_arr);
|
let packed_b = Avx2PrimeField::<F>::from_arr(b_arr);
|
||||||
let packed_res = packed_a - packed_b;
|
let packed_res = packed_a - packed_b;
|
||||||
let arr_res = packed_res.to_arr();
|
let arr_res = packed_res.as_arr();
|
||||||
|
|
||||||
let expected = a_arr.iter().zip(b_arr).map(|(&a, b)| a - b);
|
let expected = a_arr.iter().zip(b_arr).map(|(&a, b)| a - b);
|
||||||
for (exp, res) in expected.zip(arr_res) {
|
for (exp, res) in expected.zip(arr_res) {
|
||||||
@ -118,33 +118,39 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_interleave_is_involution<F: ReducibleAVX2>()
|
fn test_interleave_is_involution<F: ReducibleAvx2>()
|
||||||
where
|
where
|
||||||
[(); PackedPrimeField::<F>::WIDTH]:,
|
[(); Avx2PrimeField::<F>::WIDTH]:,
|
||||||
{
|
{
|
||||||
let a_arr = test_vals_a::<F>();
|
let a_arr = test_vals_a::<F>();
|
||||||
let b_arr = test_vals_b::<F>();
|
let b_arr = test_vals_b::<F>();
|
||||||
|
|
||||||
let packed_a = PackedPrimeField::<F>::from_arr(a_arr);
|
let packed_a = Avx2PrimeField::<F>::from_arr(a_arr);
|
||||||
let packed_b = PackedPrimeField::<F>::from_arr(b_arr);
|
let packed_b = Avx2PrimeField::<F>::from_arr(b_arr);
|
||||||
{
|
{
|
||||||
// Interleave, then deinterleave.
|
// Interleave, then deinterleave.
|
||||||
let (x, y) = packed_a.interleave(packed_b, 0);
|
|
||||||
let (res_a, res_b) = x.interleave(y, 0);
|
|
||||||
assert_eq!(res_a.to_arr(), a_arr);
|
|
||||||
assert_eq!(res_b.to_arr(), b_arr);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
let (x, y) = packed_a.interleave(packed_b, 1);
|
let (x, y) = packed_a.interleave(packed_b, 1);
|
||||||
let (res_a, res_b) = x.interleave(y, 1);
|
let (res_a, res_b) = x.interleave(y, 1);
|
||||||
assert_eq!(res_a.to_arr(), a_arr);
|
assert_eq!(res_a.as_arr(), a_arr);
|
||||||
assert_eq!(res_b.to_arr(), b_arr);
|
assert_eq!(res_b.as_arr(), b_arr);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let (x, y) = packed_a.interleave(packed_b, 2);
|
||||||
|
let (res_a, res_b) = x.interleave(y, 2);
|
||||||
|
assert_eq!(res_a.as_arr(), a_arr);
|
||||||
|
assert_eq!(res_b.as_arr(), b_arr);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let (x, y) = packed_a.interleave(packed_b, 4);
|
||||||
|
let (res_a, res_b) = x.interleave(y, 4);
|
||||||
|
assert_eq!(res_a.as_arr(), a_arr);
|
||||||
|
assert_eq!(res_b.as_arr(), b_arr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_interleave<F: ReducibleAVX2>()
|
fn test_interleave<F: ReducibleAvx2>()
|
||||||
where
|
where
|
||||||
[(); PackedPrimeField::<F>::WIDTH]:,
|
[(); Avx2PrimeField::<F>::WIDTH]:,
|
||||||
{
|
{
|
||||||
let in_a: [F; 4] = [
|
let in_a: [F; 4] = [
|
||||||
F::from_noncanonical_u64(00),
|
F::from_noncanonical_u64(00),
|
||||||
@ -158,42 +164,47 @@ mod tests {
|
|||||||
F::from_noncanonical_u64(12),
|
F::from_noncanonical_u64(12),
|
||||||
F::from_noncanonical_u64(13),
|
F::from_noncanonical_u64(13),
|
||||||
];
|
];
|
||||||
let int0_a: [F; 4] = [
|
let int1_a: [F; 4] = [
|
||||||
F::from_noncanonical_u64(00),
|
F::from_noncanonical_u64(00),
|
||||||
F::from_noncanonical_u64(10),
|
F::from_noncanonical_u64(10),
|
||||||
F::from_noncanonical_u64(02),
|
F::from_noncanonical_u64(02),
|
||||||
F::from_noncanonical_u64(12),
|
F::from_noncanonical_u64(12),
|
||||||
];
|
];
|
||||||
let int0_b: [F; 4] = [
|
let int1_b: [F; 4] = [
|
||||||
F::from_noncanonical_u64(01),
|
F::from_noncanonical_u64(01),
|
||||||
F::from_noncanonical_u64(11),
|
F::from_noncanonical_u64(11),
|
||||||
F::from_noncanonical_u64(03),
|
F::from_noncanonical_u64(03),
|
||||||
F::from_noncanonical_u64(13),
|
F::from_noncanonical_u64(13),
|
||||||
];
|
];
|
||||||
let int1_a: [F; 4] = [
|
let int2_a: [F; 4] = [
|
||||||
F::from_noncanonical_u64(00),
|
F::from_noncanonical_u64(00),
|
||||||
F::from_noncanonical_u64(01),
|
F::from_noncanonical_u64(01),
|
||||||
F::from_noncanonical_u64(10),
|
F::from_noncanonical_u64(10),
|
||||||
F::from_noncanonical_u64(11),
|
F::from_noncanonical_u64(11),
|
||||||
];
|
];
|
||||||
let int1_b: [F; 4] = [
|
let int2_b: [F; 4] = [
|
||||||
F::from_noncanonical_u64(02),
|
F::from_noncanonical_u64(02),
|
||||||
F::from_noncanonical_u64(03),
|
F::from_noncanonical_u64(03),
|
||||||
F::from_noncanonical_u64(12),
|
F::from_noncanonical_u64(12),
|
||||||
F::from_noncanonical_u64(13),
|
F::from_noncanonical_u64(13),
|
||||||
];
|
];
|
||||||
|
|
||||||
let packed_a = PackedPrimeField::<F>::from_arr(in_a);
|
let packed_a = Avx2PrimeField::<F>::from_arr(in_a);
|
||||||
let packed_b = PackedPrimeField::<F>::from_arr(in_b);
|
let packed_b = Avx2PrimeField::<F>::from_arr(in_b);
|
||||||
{
|
|
||||||
let (x0, y0) = packed_a.interleave(packed_b, 0);
|
|
||||||
assert_eq!(x0.to_arr(), int0_a);
|
|
||||||
assert_eq!(y0.to_arr(), int0_b);
|
|
||||||
}
|
|
||||||
{
|
{
|
||||||
let (x1, y1) = packed_a.interleave(packed_b, 1);
|
let (x1, y1) = packed_a.interleave(packed_b, 1);
|
||||||
assert_eq!(x1.to_arr(), int1_a);
|
assert_eq!(x1.as_arr(), int1_a);
|
||||||
assert_eq!(y1.to_arr(), int1_b);
|
assert_eq!(y1.as_arr(), int1_b);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let (x2, y2) = packed_a.interleave(packed_b, 2);
|
||||||
|
assert_eq!(x2.as_arr(), int2_a);
|
||||||
|
assert_eq!(y2.as_arr(), int2_b);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let (x4, y4) = packed_a.interleave(packed_b, 4);
|
||||||
|
assert_eq!(x4.as_arr(), in_a);
|
||||||
|
assert_eq!(y4.as_arr(), in_b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,76 +1,82 @@
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::iter::{Product, Sum};
|
use std::iter::{Product, Sum};
|
||||||
use std::ops::{Add, AddAssign, Mul, MulAssign, Neg, Sub, SubAssign};
|
use std::ops::{Add, AddAssign, Div, Mul, MulAssign, Neg, Sub, SubAssign};
|
||||||
|
use std::slice;
|
||||||
|
|
||||||
use crate::field::field_types::Field;
|
use crate::field::field_types::Field;
|
||||||
|
|
||||||
pub trait PackedField:
|
/// # Safety
|
||||||
|
/// - WIDTH is assumed to be a power of 2.
|
||||||
|
/// - If P implements PackedField then P must be castable to/from [P::Scalar; P::WIDTH] without UB.
|
||||||
|
pub unsafe trait PackedField:
|
||||||
'static
|
'static
|
||||||
+ Add<Self, Output = Self>
|
+ Add<Self, Output = Self>
|
||||||
+ Add<Self::FieldType, Output = Self>
|
+ Add<Self::Scalar, Output = Self>
|
||||||
+ AddAssign<Self>
|
+ AddAssign<Self>
|
||||||
+ AddAssign<Self::FieldType>
|
+ AddAssign<Self::Scalar>
|
||||||
+ Copy
|
+ Copy
|
||||||
+ Debug
|
+ Debug
|
||||||
+ Default
|
+ Default
|
||||||
// TODO: Implementing Div sounds like a pain so it's a worry for later.
|
+ From<Self::Scalar>
|
||||||
|
// TODO: Implement packed / packed division
|
||||||
|
+ Div<Self::Scalar, Output = Self>
|
||||||
+ Mul<Self, Output = Self>
|
+ Mul<Self, Output = Self>
|
||||||
+ Mul<Self::FieldType, Output = Self>
|
+ Mul<Self::Scalar, Output = Self>
|
||||||
+ MulAssign<Self>
|
+ MulAssign<Self>
|
||||||
+ MulAssign<Self::FieldType>
|
+ MulAssign<Self::Scalar>
|
||||||
+ Neg<Output = Self>
|
+ Neg<Output = Self>
|
||||||
+ Product
|
+ Product
|
||||||
+ Send
|
+ Send
|
||||||
+ Sub<Self, Output = Self>
|
+ Sub<Self, Output = Self>
|
||||||
+ Sub<Self::FieldType, Output = Self>
|
+ Sub<Self::Scalar, Output = Self>
|
||||||
+ SubAssign<Self>
|
+ SubAssign<Self>
|
||||||
+ SubAssign<Self::FieldType>
|
+ SubAssign<Self::Scalar>
|
||||||
+ Sum
|
+ Sum
|
||||||
+ Sync
|
+ Sync
|
||||||
|
where
|
||||||
|
Self::Scalar: Add<Self, Output = Self>,
|
||||||
|
Self::Scalar: Mul<Self, Output = Self>,
|
||||||
|
Self::Scalar: Sub<Self, Output = Self>,
|
||||||
{
|
{
|
||||||
type FieldType: Field;
|
type Scalar: Field;
|
||||||
|
type PackedPrimeField: PackedField<Scalar = <Self::Scalar as Field>::PrimeField>;
|
||||||
|
|
||||||
const LOG2_WIDTH: usize;
|
const WIDTH: usize;
|
||||||
const WIDTH: usize = 1 << Self::LOG2_WIDTH;
|
const ZERO: Self;
|
||||||
|
const ONE: Self;
|
||||||
|
|
||||||
fn square(&self) -> Self {
|
fn square(&self) -> Self {
|
||||||
*self * *self
|
*self * *self
|
||||||
}
|
}
|
||||||
|
|
||||||
fn zero() -> Self {
|
fn from_arr(arr: [Self::Scalar; Self::WIDTH]) -> Self;
|
||||||
Self::broadcast(Self::FieldType::ZERO)
|
fn as_arr(&self) -> [Self::Scalar; Self::WIDTH];
|
||||||
}
|
|
||||||
fn one() -> Self {
|
|
||||||
Self::broadcast(Self::FieldType::ONE)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn broadcast(x: Self::FieldType) -> Self;
|
fn from_slice(slice: &[Self::Scalar]) -> &Self;
|
||||||
|
fn from_slice_mut(slice: &mut [Self::Scalar]) -> &mut Self;
|
||||||
|
fn as_slice(&self) -> &[Self::Scalar];
|
||||||
|
fn as_slice_mut(&mut self) -> &mut [Self::Scalar];
|
||||||
|
|
||||||
fn from_arr(arr: [Self::FieldType; Self::WIDTH]) -> Self;
|
/// Take interpret two vectors as chunks of block_len elements. Unpack and interleave those
|
||||||
fn to_arr(&self) -> [Self::FieldType; Self::WIDTH];
|
|
||||||
|
|
||||||
fn from_slice(slice: &[Self::FieldType]) -> Self;
|
|
||||||
fn to_vec(&self) -> Vec<Self::FieldType>;
|
|
||||||
|
|
||||||
/// Take interpret two vectors as chunks of (1 << r) elements. Unpack and interleave those
|
|
||||||
/// chunks. This is best seen with an example. If we have:
|
/// chunks. This is best seen with an example. If we have:
|
||||||
/// A = [x0, y0, x1, y1],
|
/// A = [x0, y0, x1, y1],
|
||||||
/// B = [x2, y2, x3, y3],
|
/// B = [x2, y2, x3, y3],
|
||||||
/// then
|
/// then
|
||||||
/// interleave(A, B, 0) = ([x0, x2, x1, x3], [y0, y2, y1, y3]).
|
/// interleave(A, B, 1) = ([x0, x2, x1, x3], [y0, y2, y1, y3]).
|
||||||
/// Pairs that were adjacent in the input are at corresponding positions in the output.
|
/// Pairs that were adjacent in the input are at corresponding positions in the output.
|
||||||
/// r lets us set the size of chunks we're interleaving. If we set r = 1, then for
|
/// r lets us set the size of chunks we're interleaving. If we set block_len = 2, then for
|
||||||
/// A = [x0, x1, y0, y1],
|
/// A = [x0, x1, y0, y1],
|
||||||
/// B = [x2, x3, y2, y3],
|
/// B = [x2, x3, y2, y3],
|
||||||
/// we obtain
|
/// we obtain
|
||||||
/// interleave(A, B, r) = ([x0, x1, x2, x3], [y0, y1, y2, y3]).
|
/// interleave(A, B, block_len) = ([x0, x1, x2, x3], [y0, y1, y2, y3]).
|
||||||
/// We can also think about this as stacking the vectors, dividing them into 2x2 matrices, and
|
/// We can also think about this as stacking the vectors, dividing them into 2x2 matrices, and
|
||||||
/// transposing those matrices.
|
/// transposing those matrices.
|
||||||
/// When r = LOG2_WIDTH, this operation is a no-op. Values of r > LOG2_WIDTH are not
|
/// When block_len = WIDTH, this operation is a no-op. block_len must divide WIDTH. Since
|
||||||
/// permitted.
|
/// WIDTH is specified to be a power of 2, block_len must also be a power of 2. It cannot be 0
|
||||||
fn interleave(&self, other: Self, r: usize) -> (Self, Self);
|
/// and it cannot be > WIDTH.
|
||||||
|
fn interleave(&self, other: Self, block_len: usize) -> (Self, Self);
|
||||||
|
|
||||||
fn pack_slice(buf: &[Self::FieldType]) -> &[Self] {
|
fn pack_slice(buf: &[Self::Scalar]) -> &[Self] {
|
||||||
assert!(
|
assert!(
|
||||||
buf.len() % Self::WIDTH == 0,
|
buf.len() % Self::WIDTH == 0,
|
||||||
"Slice length (got {}) must be a multiple of packed field width ({}).",
|
"Slice length (got {}) must be a multiple of packed field width ({}).",
|
||||||
@ -81,7 +87,7 @@ pub trait PackedField:
|
|||||||
let n = buf.len() / Self::WIDTH;
|
let n = buf.len() / Self::WIDTH;
|
||||||
unsafe { std::slice::from_raw_parts(buf_ptr, n) }
|
unsafe { std::slice::from_raw_parts(buf_ptr, n) }
|
||||||
}
|
}
|
||||||
fn pack_slice_mut(buf: &mut [Self::FieldType]) -> &mut [Self] {
|
fn pack_slice_mut(buf: &mut [Self::Scalar]) -> &mut [Self] {
|
||||||
assert!(
|
assert!(
|
||||||
buf.len() % Self::WIDTH == 0,
|
buf.len() % Self::WIDTH == 0,
|
||||||
"Slice length (got {}) must be a multiple of packed field width ({}).",
|
"Slice length (got {}) must be a multiple of packed field width ({}).",
|
||||||
@ -94,35 +100,42 @@ pub trait PackedField:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: Field> PackedField for F {
|
unsafe impl<F: Field> PackedField for F {
|
||||||
type FieldType = Self;
|
type Scalar = Self;
|
||||||
|
type PackedPrimeField = F::PrimeField;
|
||||||
|
|
||||||
const LOG2_WIDTH: usize = 0;
|
const WIDTH: usize = 1;
|
||||||
|
const ZERO: Self = <F as Field>::ZERO;
|
||||||
|
const ONE: Self = <F as Field>::ONE;
|
||||||
|
|
||||||
fn broadcast(x: Self::FieldType) -> Self {
|
fn square(&self) -> Self {
|
||||||
x
|
<Self as Field>::square(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_arr(arr: [Self::FieldType; Self::WIDTH]) -> Self {
|
fn from_arr(arr: [Self::Scalar; Self::WIDTH]) -> Self {
|
||||||
arr[0]
|
arr[0]
|
||||||
}
|
}
|
||||||
fn to_arr(&self) -> [Self::FieldType; Self::WIDTH] {
|
fn as_arr(&self) -> [Self::Scalar; Self::WIDTH] {
|
||||||
[*self]
|
[*self]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_slice(slice: &[Self::FieldType]) -> Self {
|
fn from_slice(slice: &[Self::Scalar]) -> &Self {
|
||||||
assert_eq!(slice.len(), 1);
|
&slice[0]
|
||||||
slice[0]
|
|
||||||
}
|
}
|
||||||
fn to_vec(&self) -> Vec<Self::FieldType> {
|
fn from_slice_mut(slice: &mut [Self::Scalar]) -> &mut Self {
|
||||||
vec![*self]
|
&mut slice[0]
|
||||||
|
}
|
||||||
|
fn as_slice(&self) -> &[Self::Scalar] {
|
||||||
|
slice::from_ref(self)
|
||||||
|
}
|
||||||
|
fn as_slice_mut(&mut self) -> &mut [Self::Scalar] {
|
||||||
|
slice::from_mut(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn interleave(&self, other: Self, r: usize) -> (Self, Self) {
|
fn interleave(&self, other: Self, block_len: usize) -> (Self, Self) {
|
||||||
if r == 0 {
|
match block_len {
|
||||||
(*self, other)
|
1 => (*self, other),
|
||||||
} else {
|
_ => panic!("unsupported block length"),
|
||||||
panic!("r > LOG2_WIDTH");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user