2021-09-13 17:42:25 -07:00
|
|
|
use core::arch::x86_64::*;
|
|
|
|
|
|
|
|
|
|
use crate::field::goldilocks_field::GoldilocksField;
|
|
|
|
|
use crate::field::packed_avx2::common::{
|
2021-12-03 13:12:19 -08:00
|
|
|
add_no_canonicalize_64_64s_s, epsilon, shift, sub_no_canonicalize_64s_64_s, ReducibleAvx2,
|
2021-09-13 17:42:25 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/// Reduce a u128 modulo FIELD_ORDER. The input is (u64, u64), pre-shifted by 2^63. The result is
|
|
|
|
|
/// similarly shifted.
|
2021-12-03 13:12:19 -08:00
|
|
|
impl ReducibleAvx2 for GoldilocksField {
|
2021-09-13 17:42:25 -07:00
|
|
|
#[inline]
|
2021-12-02 18:33:43 -08:00
|
|
|
unsafe fn reduce128(x: (__m256i, __m256i)) -> __m256i {
|
|
|
|
|
let (hi0, lo0) = x;
|
|
|
|
|
let lo0_s = shift(lo0);
|
2021-09-13 17:42:25 -07:00
|
|
|
let hi_hi0 = _mm256_srli_epi64(hi0, 32);
|
|
|
|
|
let lo1_s = sub_no_canonicalize_64s_64_s::<GoldilocksField>(lo0_s, hi_hi0);
|
|
|
|
|
let t1 = _mm256_mul_epu32(hi0, epsilon::<GoldilocksField>());
|
|
|
|
|
let lo2_s = add_no_canonicalize_64_64s_s::<GoldilocksField>(t1, lo1_s);
|
2021-12-02 18:33:43 -08:00
|
|
|
let lo2 = shift(lo2_s);
|
|
|
|
|
lo2
|
2021-09-13 17:42:25 -07:00
|
|
|
}
|
|
|
|
|
}
|