From 3790b55c4b1dda59561a4f794fb5225748a729ee Mon Sep 17 00:00:00 2001 From: Jakub Nabaglo Date: Thu, 14 Oct 2021 08:59:47 -0700 Subject: [PATCH] Delete obsolete optimizations (Crandall Poseidon on AVX2 and NEON) (#305) --- src/hash/arch/aarch64/mod.rs | 3 - .../arch/aarch64/poseidon_crandall_neon.rs | 247 ------------------ src/hash/arch/mod.rs | 3 - src/hash/arch/x86_64/mod.rs | 4 - .../arch/x86_64/poseidon_crandall_avx2.rs | 245 ----------------- src/hash/poseidon_crandall.rs | 80 ------ 6 files changed, 582 deletions(-) delete mode 100644 src/hash/arch/aarch64/mod.rs delete mode 100644 src/hash/arch/aarch64/poseidon_crandall_neon.rs delete mode 100644 src/hash/arch/x86_64/poseidon_crandall_avx2.rs diff --git a/src/hash/arch/aarch64/mod.rs b/src/hash/arch/aarch64/mod.rs deleted file mode 100644 index 552d4b8c..00000000 --- a/src/hash/arch/aarch64/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -// Requires NEON -#[cfg(target_feature = "neon")] -pub(crate) mod poseidon_crandall_neon; diff --git a/src/hash/arch/aarch64/poseidon_crandall_neon.rs b/src/hash/arch/aarch64/poseidon_crandall_neon.rs deleted file mode 100644 index b3a6c6e3..00000000 --- a/src/hash/arch/aarch64/poseidon_crandall_neon.rs +++ /dev/null @@ -1,247 +0,0 @@ -use core::arch::aarch64::*; - -use crate::field::crandall_field::CrandallField; -use crate::field::field_types::PrimeField; -use crate::field::packed_crandall_neon::PackedCrandallNeon; -use crate::field::packed_field::PackedField; - -const EPSILON: u64 = 0u64.wrapping_sub(CrandallField::ORDER); - -const MDS_MATRIX_EXPS8: [i32; 8] = [2, 0, 1, 8, 4, 3, 0, 0]; -const MDS_MATRIX_EXPS12: [i32; 12] = [10, 13, 2, 0, 4, 1, 8, 7, 15, 5, 0, 0]; - -/// Pair of vectors (hi, lo) representing a u128. -type Vecs128 = (uint64x2_t, uint64x2_t); - -/// Takes cumul (u128) and x (u64). Returns cumul + (x << SHIFT) as u128. -#[inline(always)] -unsafe fn shift_and_accumulate( - x: uint64x2_t, - (hi_cumul, lo_cumul): Vecs128, -) -> Vecs128 -where - [(); (64 - SHIFT) as usize]: , -{ - let x_shifted_lo = vshlq_n_u64::(x); - let res_lo = vaddq_u64(lo_cumul, x_shifted_lo); - let carry = vcgtq_u64(lo_cumul, res_lo); - // This works around a bug in Rust's NEON intrisics. A shift by 64, even though well-defined - // in ARM's docs, is considered undefined behavior by LLVM. Avoid the intrinsic when - // SHIFT == 0. - let tmp_hi = if SHIFT == 0 { - hi_cumul - } else { - vsraq_n_u64::<{ 64 - SHIFT }>(hi_cumul, x) - }; - let res_hi = vsubq_u64(tmp_hi, carry); - (res_hi, res_lo) -} - -/// Extract state[OFFSET..OFFSET + 2] as a vector. Wraps around the boundary. -#[inline(always)] -unsafe fn get_vector_with_offset( - state: [CrandallField; WIDTH], -) -> uint64x2_t { - let lo = vmov_n_u64(state[OFFSET % WIDTH].0); - let hi = vmov_n_u64(state[(OFFSET + 1) % WIDTH].0); - vcombine_u64(lo, hi) -} - -/// Extract CrandallField element from vector. -#[inline(always)] -unsafe fn extract(v: uint64x2_t) -> CrandallField { - CrandallField(vgetq_lane_u64::(v)) -} - -type StateVecs8 = [Vecs128; 4]; - -#[inline(always)] -unsafe fn iteration8( - [cumul0, cumul1, cumul2, cumul3]: StateVecs8, - state: [CrandallField; 8], -) -> StateVecs8 -// 4 vectors of 2 needed to represent entire state. -where - [(); INDEX + 2]: , - [(); INDEX + 4]: , - [(); INDEX + 6]: , - [(); (64 - SHIFT) as usize]: , -{ - // Entire state, rotated by INDEX. - let state0 = get_vector_with_offset::<8, INDEX>(state); - let state1 = get_vector_with_offset::<8, { INDEX + 2 }>(state); - let state2 = get_vector_with_offset::<8, { INDEX + 4 }>(state); - let state3 = get_vector_with_offset::<8, { INDEX + 6 }>(state); - [ - shift_and_accumulate::(state0, cumul0), - shift_and_accumulate::(state1, cumul1), - shift_and_accumulate::(state2, cumul2), - shift_and_accumulate::(state3, cumul3), - ] -} - -#[inline(always)] -pub fn poseidon8_mds(state: [CrandallField; 8]) -> [CrandallField; 8] { - unsafe { - let mut res = [(vmovq_n_u64(0), vmovq_n_u64(0)); 4]; - - // The scalar loop goes: - // for r in 0..WIDTH { - // let mut res = 0u128; - // for i in 0..WIDTH { - // res += (state[(i + r) % WIDTH] as u128) << MDS_MATRIX_EXPS[i]; - // } - // result[r] = reduce(res); - // } - // - // Here, we swap the loops. Equivalent to: - // let mut res = [0u128; WIDTH]; - // for i in 0..WIDTH { - // let mds_matrix_exp = MDS_MATRIX_EXPS[i]; - // for r in 0..WIDTH { - // res[r] += (state[(i + r) % WIDTH] as u128) << mds_matrix_exp; - // } - // } - // for r in 0..WIDTH { - // result[r] = reduce(res[r]); - // } - // - // Notice that that in the lower version, all iterations of the inner loop shift by the same - // amount. In vector, we perform multiple iterations of the loop at once, and vector shifts - // are cheaper when all elements are shifted by the same amount. - - res = iteration8::<0, { MDS_MATRIX_EXPS8[0] }>(res, state); - res = iteration8::<1, { MDS_MATRIX_EXPS8[1] }>(res, state); - res = iteration8::<2, { MDS_MATRIX_EXPS8[2] }>(res, state); - res = iteration8::<3, { MDS_MATRIX_EXPS8[3] }>(res, state); - res = iteration8::<4, { MDS_MATRIX_EXPS8[4] }>(res, state); - res = iteration8::<5, { MDS_MATRIX_EXPS8[5] }>(res, state); - res = iteration8::<6, { MDS_MATRIX_EXPS8[6] }>(res, state); - res = iteration8::<7, { MDS_MATRIX_EXPS8[7] }>(res, state); - - let [res0, res1, res2, res3] = res; - let reduced0 = reduce96(res0); - let reduced1 = reduce96(res1); - let reduced2 = reduce96(res2); - let reduced3 = reduce96(res3); - [ - extract::<0>(reduced0), - extract::<1>(reduced0), - extract::<0>(reduced1), - extract::<1>(reduced1), - extract::<0>(reduced2), - extract::<1>(reduced2), - extract::<0>(reduced3), - extract::<1>(reduced3), - ] - } -} - -type StateVecs12 = [Vecs128; 6]; - -#[inline(always)] -unsafe fn iteration12( - [cumul0, cumul1, cumul2, cumul3, cumul4, cumul5]: StateVecs12, - state: [CrandallField; 12], -) -> StateVecs12 -// 6 vectors of 2 needed to represent entire state. -where - [(); INDEX + 2]: , - [(); INDEX + 4]: , - [(); INDEX + 6]: , - [(); INDEX + 8]: , - [(); INDEX + 10]: , - [(); (64 - SHIFT) as usize]: , -{ - // Entire state, rotated by INDEX. - let state0 = get_vector_with_offset::<12, INDEX>(state); - let state1 = get_vector_with_offset::<12, { INDEX + 2 }>(state); - let state2 = get_vector_with_offset::<12, { INDEX + 4 }>(state); - let state3 = get_vector_with_offset::<12, { INDEX + 6 }>(state); - let state4 = get_vector_with_offset::<12, { INDEX + 8 }>(state); - let state5 = get_vector_with_offset::<12, { INDEX + 10 }>(state); - [ - shift_and_accumulate::(state0, cumul0), - shift_and_accumulate::(state1, cumul1), - shift_and_accumulate::(state2, cumul2), - shift_and_accumulate::(state3, cumul3), - shift_and_accumulate::(state4, cumul4), - shift_and_accumulate::(state5, cumul5), - ] -} - -#[inline(always)] -pub fn poseidon12_mds(state: [CrandallField; 12]) -> [CrandallField; 12] { - unsafe { - let mut res = [(vmovq_n_u64(0), vmovq_n_u64(0)); 6]; - - // See width-8 version for explanation. - - res = iteration12::<0, { MDS_MATRIX_EXPS12[0] }>(res, state); - res = iteration12::<1, { MDS_MATRIX_EXPS12[1] }>(res, state); - res = iteration12::<2, { MDS_MATRIX_EXPS12[2] }>(res, state); - res = iteration12::<3, { MDS_MATRIX_EXPS12[3] }>(res, state); - res = iteration12::<4, { MDS_MATRIX_EXPS12[4] }>(res, state); - res = iteration12::<5, { MDS_MATRIX_EXPS12[5] }>(res, state); - res = iteration12::<6, { MDS_MATRIX_EXPS12[6] }>(res, state); - res = iteration12::<7, { MDS_MATRIX_EXPS12[7] }>(res, state); - res = iteration12::<8, { MDS_MATRIX_EXPS12[8] }>(res, state); - res = iteration12::<9, { MDS_MATRIX_EXPS12[9] }>(res, state); - res = iteration12::<10, { MDS_MATRIX_EXPS12[10] }>(res, state); - res = iteration12::<11, { MDS_MATRIX_EXPS12[11] }>(res, state); - - let [res0, res1, res2, res3, res4, res5] = res; - let reduced0 = reduce96(res0); - let reduced1 = reduce96(res1); - let reduced2 = reduce96(res2); - let reduced3 = reduce96(res3); - let reduced4 = reduce96(res4); - let reduced5 = reduce96(res5); - [ - extract::<0>(reduced0), - extract::<1>(reduced0), - extract::<0>(reduced1), - extract::<1>(reduced1), - extract::<0>(reduced2), - extract::<1>(reduced2), - extract::<0>(reduced3), - extract::<1>(reduced3), - extract::<0>(reduced4), - extract::<1>(reduced4), - extract::<0>(reduced5), - extract::<1>(reduced5), - ] - } -} - -#[inline(always)] -unsafe fn reduce96(x: Vecs128) -> uint64x2_t { - let (hi, lo) = x; - let hi_lo = vmovn_u64(hi); // Extract the low 32 bits of each 64-bit element - mul_add_32_32_64(hi_lo, vmov_n_u32(EPSILON as u32), lo) -} - -// x * y + z in the prime field. x and y are u32; z is u64. -#[inline(always)] -unsafe fn mul_add_32_32_64(x: uint32x2_t, y: uint32x2_t, z: uint64x2_t) -> uint64x2_t { - // No canonicalization needed because x * y + z < 2^64 + FIELD_ORDER. - let res_wrapped = vmlal_u32(z, x, y); - let mask = vcgtq_u64(z, res_wrapped); - let res_unwrapped = vaddq_u64(res_wrapped, vmovq_n_u64(EPSILON)); - vbslq_u64(mask, res_unwrapped, res_wrapped) -} - -/// Poseidon constant layer for Crandall. Assumes that every element in round_constants is in -/// 0..CrandallField::ORDER; when this is not true it may return garbage. It's marked unsafe for -/// this reason. -#[inline(always)] -pub unsafe fn poseidon_const( - state: &mut [CrandallField; 2 * PACKED_WIDTH], - round_constants: [u64; 2 * PACKED_WIDTH], -) { - let packed_state = PackedCrandallNeon::pack_slice_mut(state); - for i in 0..PACKED_WIDTH { - let packed_round_const = vld1q_u64(round_constants[2 * i..2 * i + 2].as_ptr()); - packed_state[i] = packed_state[i].add_canonical_u64(packed_round_const); - } -} diff --git a/src/hash/arch/mod.rs b/src/hash/arch/mod.rs index 1de23f67..15f321bd 100644 --- a/src/hash/arch/mod.rs +++ b/src/hash/arch/mod.rs @@ -1,5 +1,2 @@ #[cfg(target_arch = "x86_64")] pub(crate) mod x86_64; - -#[cfg(target_arch = "aarch64")] -pub(crate) mod aarch64; diff --git a/src/hash/arch/x86_64/mod.rs b/src/hash/arch/x86_64/mod.rs index 407675aa..fa3681d0 100644 --- a/src/hash/arch/x86_64/mod.rs +++ b/src/hash/arch/x86_64/mod.rs @@ -3,7 +3,3 @@ // - BMI2 (for MULX and SHRX) #[cfg(all(target_feature = "avx2", target_feature = "bmi2"))] pub(crate) mod poseidon_goldilocks_avx2_bmi2; - -// Requires AVX2 -#[cfg(target_feature = "avx2")] -pub(crate) mod poseidon_crandall_avx2; diff --git a/src/hash/arch/x86_64/poseidon_crandall_avx2.rs b/src/hash/arch/x86_64/poseidon_crandall_avx2.rs deleted file mode 100644 index fc181325..00000000 --- a/src/hash/arch/x86_64/poseidon_crandall_avx2.rs +++ /dev/null @@ -1,245 +0,0 @@ -use core::arch::x86_64::*; - -use crate::field::crandall_field::CrandallField; -use crate::field::field_types::PrimeField; -use crate::field::packed_avx2::PackedCrandallAVX2; -use crate::field::packed_field::PackedField; - -const EPSILON: u64 = 0u64.wrapping_sub(CrandallField::ORDER); -const SIGN_BIT: u64 = 1 << 63; - -const MDS_MATRIX_EXPS8: [i32; 8] = [2, 0, 1, 8, 4, 3, 0, 0]; -const MDS_MATRIX_EXPS12: [i32; 12] = [10, 13, 2, 0, 4, 1, 8, 7, 15, 5, 0, 0]; - -/// Pair of vectors (hi, lo) representing a u128. -type Vecs128 = (__m256i, __m256i); - -/// Takes cumul (u128) and x (u64). Returns cumul + (x << SHIFT) as u128. -/// Assumes that cumul is shifted by 1 << 63; the result is similarly shifted. -#[inline(always)] -unsafe fn shift_and_accumulate( - x: __m256i, - (hi_cumul, lo_cumul_s): Vecs128, -) -> Vecs128 -where - [(); (64 - SHIFT) as usize]: , -{ - let x_shifted_lo = _mm256_slli_epi64::(x); - let x_shifted_hi = _mm256_srli_epi64::<{ 64 - SHIFT }>(x); - let res_lo_s = _mm256_add_epi64(lo_cumul_s, x_shifted_lo); - let carry = _mm256_cmpgt_epi64(lo_cumul_s, res_lo_s); - let res_hi = _mm256_sub_epi64(_mm256_add_epi64(hi_cumul, x_shifted_hi), carry); - (res_hi, res_lo_s) -} - -/// Extract state[OFFSET..OFFSET + 4] as a vector. Wraps around the boundary. -#[inline(always)] -unsafe fn get_vector_with_offset( - state: [CrandallField; WIDTH], -) -> __m256i { - _mm256_setr_epi64x( - state[OFFSET % WIDTH].0 as i64, - state[(OFFSET + 1) % WIDTH].0 as i64, - state[(OFFSET + 2) % WIDTH].0 as i64, - state[(OFFSET + 3) % WIDTH].0 as i64, - ) -} - -/// Extract CrandallField element from vector. -#[inline(always)] -unsafe fn extract(v: __m256i) -> CrandallField { - CrandallField(_mm256_extract_epi64::(v) as u64) -} - -#[inline(always)] -unsafe fn iteration8( - [cumul0_s, cumul1_s]: [Vecs128; 2], - state: [CrandallField; 8], -) -> [Vecs128; 2] -// 2 vectors of 4 needed to represent entire state. -where - [(); INDEX + 4]: , - [(); (64 - SHIFT) as usize]: , -{ - // Entire state, rotated by INDEX. - let state0 = get_vector_with_offset::<8, INDEX>(state); - let state1 = get_vector_with_offset::<8, { INDEX + 4 }>(state); - [ - shift_and_accumulate::(state0, cumul0_s), - shift_and_accumulate::(state1, cumul1_s), - ] -} - -#[inline(always)] -pub fn poseidon8_mds(state: [CrandallField; 8]) -> [CrandallField; 8] { - unsafe { - let mut res_s = [(_mm256_setzero_si256(), _mm256_set1_epi64x(SIGN_BIT as i64)); 2]; - - // The scalar loop goes: - // for r in 0..WIDTH { - // let mut res = 0u128; - // for i in 0..WIDTH { - // res += (state[(i + r) % WIDTH] as u128) << MDS_MATRIX_EXPS[i]; - // } - // result[r] = reduce(res); - // } - // - // Here, we swap the loops. Equivalent to: - // let mut res = [0u128; WIDTH]; - // for i in 0..WIDTH { - // let mds_matrix_exp = MDS_MATRIX_EXPS[i]; - // for r in 0..WIDTH { - // res[r] += (state[(i + r) % WIDTH] as u128) << mds_matrix_exp; - // } - // } - // for r in 0..WIDTH { - // result[r] = reduce(res[r]); - // } - // - // Notice that that in the lower version, all iterations of the inner loop shift by the same - // amount. In vector, we perform multiple iterations of the loop at once, and vector shifts - // are cheaper when all elements are shifted by the same amount. - - res_s = iteration8::<0, { MDS_MATRIX_EXPS8[0] }>(res_s, state); - res_s = iteration8::<1, { MDS_MATRIX_EXPS8[1] }>(res_s, state); - res_s = iteration8::<2, { MDS_MATRIX_EXPS8[2] }>(res_s, state); - res_s = iteration8::<3, { MDS_MATRIX_EXPS8[3] }>(res_s, state); - res_s = iteration8::<4, { MDS_MATRIX_EXPS8[4] }>(res_s, state); - res_s = iteration8::<5, { MDS_MATRIX_EXPS8[5] }>(res_s, state); - res_s = iteration8::<6, { MDS_MATRIX_EXPS8[6] }>(res_s, state); - res_s = iteration8::<7, { MDS_MATRIX_EXPS8[7] }>(res_s, state); - - let [res0_s, res1_s] = res_s; - let reduced0 = reduce96s(res0_s); - let reduced1 = reduce96s(res1_s); - [ - extract::<0>(reduced0), - extract::<1>(reduced0), - extract::<2>(reduced0), - extract::<3>(reduced0), - extract::<0>(reduced1), - extract::<1>(reduced1), - extract::<2>(reduced1), - extract::<3>(reduced1), - ] - } -} - -#[inline(always)] -unsafe fn iteration12( - [cumul0_s, cumul1_s, cumul2_s]: [Vecs128; 3], - state: [CrandallField; 12], -) -> [Vecs128; 3] -// 3 vectors of 4 needed to represent entire state. -where - [(); INDEX + 4]: , - [(); INDEX + 8]: , - [(); (64 - SHIFT) as usize]: , -{ - // Entire state, rotated by INDEX. - let state0 = get_vector_with_offset::<12, INDEX>(state); - let state1 = get_vector_with_offset::<12, { INDEX + 4 }>(state); - let state2 = get_vector_with_offset::<12, { INDEX + 8 }>(state); - [ - shift_and_accumulate::(state0, cumul0_s), - shift_and_accumulate::(state1, cumul1_s), - shift_and_accumulate::(state2, cumul2_s), - ] -} - -#[inline(always)] -pub fn poseidon12_mds(state: [CrandallField; 12]) -> [CrandallField; 12] { - unsafe { - let mut res_s = [(_mm256_setzero_si256(), _mm256_set1_epi64x(SIGN_BIT as i64)); 3]; - - // See width-8 version for explanation. - - res_s = iteration12::<0, { MDS_MATRIX_EXPS12[0] }>(res_s, state); - res_s = iteration12::<1, { MDS_MATRIX_EXPS12[1] }>(res_s, state); - res_s = iteration12::<2, { MDS_MATRIX_EXPS12[2] }>(res_s, state); - res_s = iteration12::<3, { MDS_MATRIX_EXPS12[3] }>(res_s, state); - res_s = iteration12::<4, { MDS_MATRIX_EXPS12[4] }>(res_s, state); - res_s = iteration12::<5, { MDS_MATRIX_EXPS12[5] }>(res_s, state); - res_s = iteration12::<6, { MDS_MATRIX_EXPS12[6] }>(res_s, state); - res_s = iteration12::<7, { MDS_MATRIX_EXPS12[7] }>(res_s, state); - res_s = iteration12::<8, { MDS_MATRIX_EXPS12[8] }>(res_s, state); - res_s = iteration12::<9, { MDS_MATRIX_EXPS12[9] }>(res_s, state); - res_s = iteration12::<10, { MDS_MATRIX_EXPS12[10] }>(res_s, state); - res_s = iteration12::<11, { MDS_MATRIX_EXPS12[11] }>(res_s, state); - - let [res0_s, res1_s, res2_s] = res_s; - let reduced0 = reduce96s(res0_s); - let reduced1 = reduce96s(res1_s); - let reduced2 = reduce96s(res2_s); - [ - extract::<0>(reduced0), - extract::<1>(reduced0), - extract::<2>(reduced0), - extract::<3>(reduced0), - extract::<0>(reduced1), - extract::<1>(reduced1), - extract::<2>(reduced1), - extract::<3>(reduced1), - extract::<0>(reduced2), - extract::<1>(reduced2), - extract::<2>(reduced2), - extract::<3>(reduced2), - ] - } -} - -#[inline(always)] -unsafe fn reduce96s(x_s: Vecs128) -> __m256i { - let (hi0, lo0_s) = x_s; - let lo1 = _mm256_mul_epu32(hi0, _mm256_set1_epi64x(EPSILON as i64)); - add_no_canonicalize_64_64s(lo1, lo0_s) -} - -#[inline(always)] -unsafe fn add_no_canonicalize_64_64s(x: __m256i, y_s: __m256i) -> __m256i { - let res_wrapped_s = _mm256_add_epi64(x, y_s); - let mask = _mm256_cmpgt_epi64(y_s, res_wrapped_s); - let res_wrapped = _mm256_xor_si256(res_wrapped_s, _mm256_set1_epi64x(SIGN_BIT as i64)); - let wrapback_amt = _mm256_and_si256(mask, _mm256_set1_epi64x(EPSILON as i64)); - let res = _mm256_add_epi64(res_wrapped, wrapback_amt); - res -} - -/// Poseidon constant layer for Crandall. Assumes that every element in round_constants is in -/// 0..CrandallField::ORDER; when this is not true it may return garbage. It's marked unsafe for -/// this reason. -#[inline(always)] -pub unsafe fn poseidon_const( - state: &mut [CrandallField; 4 * PACKED_WIDTH], - round_constants: [u64; 4 * PACKED_WIDTH], -) { - let packed_state = PackedCrandallAVX2::pack_slice_mut(state); - for i in 0..PACKED_WIDTH { - let constants_ptr = (&round_constants[4 * i..4 * i + 4]).as_ptr(); - let packed_constants = _mm256_loadu_si256(constants_ptr.cast::<__m256i>()); - packed_state[i] = packed_state[i].add_canonical_u64(packed_constants); - } -} - -#[inline(always)] -pub fn poseidon_sbox(state: &mut [CrandallField; 4 * PACKED_WIDTH]) { - // This function is manually interleaved to maximize instruction-level parallelism. - - let packed_state = PackedCrandallAVX2::pack_slice_mut(state); - - let mut x2 = [PackedCrandallAVX2::zero(); PACKED_WIDTH]; - for i in 0..PACKED_WIDTH { - x2[i] = packed_state[i].square(); - } - - let mut x3 = [PackedCrandallAVX2::zero(); PACKED_WIDTH]; - let mut x4 = [PackedCrandallAVX2::zero(); PACKED_WIDTH]; - for i in 0..PACKED_WIDTH { - x3[i] = packed_state[i] * x2[i]; - x4[i] = x2[i].square(); - } - - for i in 0..PACKED_WIDTH { - packed_state[i] = x3[i] * x4[i]; - } -} diff --git a/src/hash/poseidon_crandall.rs b/src/hash/poseidon_crandall.rs index 3501c9ee..31116461 100644 --- a/src/hash/poseidon_crandall.rs +++ b/src/hash/poseidon_crandall.rs @@ -145,46 +145,6 @@ impl Poseidon<8> for CrandallField { [0x3f3fd62d28872386, 0x2be97f5416341131, 0xaaee943e6eccf7b8, 0x9b7a25991a49b57f, 0x61e9415bfc0d135a, 0xdc5d5c2cec372bd8, 0x3fc702a71c42c8df, ], ]; - - #[cfg(all(target_arch="x86_64", target_feature="avx2"))] - #[inline(always)] - fn constant_layer(state: &mut [Self; 8], round_ctr: usize) { - use std::convert::TryInto; - use crate::hash::poseidon::ALL_ROUND_CONSTANTS; - - // This assumes that every element of ALL_ROUND_CONSTANTS is in 0..CrandallField::ORDER. - unsafe { crate::hash::arch::x86_64::poseidon_crandall_avx2::poseidon_const::<2>(state, - ALL_ROUND_CONSTANTS[8 * round_ctr..8 * round_ctr + 8].try_into().unwrap()); } - } - - #[cfg(all(target_arch="aarch64", target_feature="neon"))] - #[inline(always)] - fn constant_layer(state: &mut [Self; 8], round_ctr: usize) { - use std::convert::TryInto; - use crate::hash::poseidon::ALL_ROUND_CONSTANTS; - - // This assumes that every element of ALL_ROUND_CONSTANTS is in 0..CrandallField::ORDER. - unsafe { crate::hash::arch::aarch64::poseidon_crandall_neon::poseidon_const::<4>(state, - ALL_ROUND_CONSTANTS[8 * round_ctr..8 * round_ctr + 8].try_into().unwrap()); } - } - - #[cfg(all(target_arch="x86_64", target_feature="avx2"))] - #[inline(always)] - fn mds_layer(state_: &[CrandallField; 8]) -> [CrandallField; 8] { - crate::hash::arch::x86_64::poseidon_crandall_avx2::poseidon8_mds(*state_) - } - - #[cfg(all(target_arch="aarch64", target_feature="neon"))] - #[inline] - fn mds_layer(state_: &[CrandallField; 8]) -> [CrandallField; 8] { - crate::hash::arch::aarch64::poseidon_crandall_neon::poseidon8_mds(*state_) - } - - #[cfg(all(target_arch="x86_64", target_feature="avx2"))] - #[inline(always)] - fn sbox_layer(state: &mut [Self; 8]) { - crate::hash::arch::x86_64::poseidon_crandall_avx2::poseidon_sbox::<2>(state); - } } #[rustfmt::skip] @@ -390,46 +350,6 @@ impl Poseidon<12> for CrandallField { 0xda5e708c57dfe9f9, 0x2d506a5bb5b7480c, 0xf2bfc6a0100f3c6d, 0x029914d117a17af3, 0xf2bc5f8a1eb47c5f, 0xeb159cc540fb5e78, 0x8a041eb885fb24f5, ], ]; - - #[cfg(all(target_arch="x86_64", target_feature="avx2"))] - #[inline(always)] - fn constant_layer(state: &mut [Self; 12], round_ctr: usize) { - use std::convert::TryInto; - use crate::hash::poseidon::ALL_ROUND_CONSTANTS; - - // This assumes that every element of ALL_ROUND_CONSTANTS is in 0..CrandallField::ORDER. - unsafe { crate::hash::arch::x86_64::poseidon_crandall_avx2::poseidon_const::<3>( - state, ALL_ROUND_CONSTANTS[12 * round_ctr..12 * round_ctr + 12].try_into().unwrap()); } - } - - #[cfg(all(target_arch="aarch64", target_feature="neon"))] - #[inline(always)] - fn constant_layer(state: &mut [Self; 12], round_ctr: usize) { - use std::convert::TryInto; - use crate::hash::poseidon::ALL_ROUND_CONSTANTS; - - // This assumes that every element of ALL_ROUND_CONSTANTS is in 0..CrandallField::ORDER. - unsafe { crate::hash::arch::aarch64::poseidon_crandall_neon::poseidon_const::<6>(state, - ALL_ROUND_CONSTANTS[12 * round_ctr..12 * round_ctr + 12].try_into().unwrap()); } - } - - #[cfg(all(target_arch="x86_64", target_feature="avx2"))] - #[inline(always)] - fn mds_layer(state_: &[CrandallField; 12]) -> [CrandallField; 12] { - crate::hash::arch::x86_64::poseidon_crandall_avx2::poseidon12_mds(*state_) - } - - #[cfg(all(target_arch="aarch64", target_feature="neon"))] - #[inline] - fn mds_layer(state_: &[CrandallField; 12]) -> [CrandallField; 12] { - crate::hash::arch::aarch64::poseidon_crandall_neon::poseidon12_mds(*state_) - } - - #[cfg(all(target_arch="x86_64", target_feature="avx2"))] - #[inline(always)] - fn sbox_layer(state: &mut [Self; 12]) { - crate::hash::arch::x86_64::poseidon_crandall_avx2::poseidon_sbox::<3>(state); - } } #[cfg(test)]