From bd3834c4035abdafeaade7db48fa2bc65f00611e Mon Sep 17 00:00:00 2001 From: Robin Salen Date: Mon, 26 Jun 2023 07:44:19 -0400 Subject: [PATCH] Silence Poseidon warnings for ARM targets --- plonky2/src/hash/poseidon_goldilocks.rs | 232 ++++++++++++------------ 1 file changed, 117 insertions(+), 115 deletions(-) diff --git a/plonky2/src/hash/poseidon_goldilocks.rs b/plonky2/src/hash/poseidon_goldilocks.rs index 0d03b3e9..c784a1db 100644 --- a/plonky2/src/hash/poseidon_goldilocks.rs +++ b/plonky2/src/hash/poseidon_goldilocks.rs @@ -4,8 +4,8 @@ //! `poseidon_constants.sage` script in the `mir-protocol/hash-constants` //! repository. +#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))] use plonky2_field::types::Field; -use unroll::unroll_for_loops; use crate::field::goldilocks_field::GoldilocksField; use crate::hash::poseidon::{Poseidon, N_PARTIAL_ROUNDS}; @@ -214,9 +214,9 @@ impl Poseidon for GoldilocksField { 0xdcedab70f40718ba, 0xe796d293a47a64cb, 0x80772dc2645b280b, ], ]; - #[cfg(target_arch="x86_64")] + #[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))] #[inline(always)] - #[unroll_for_loops] + #[unroll::unroll_for_loops] fn mds_layer(state: &[Self; 12]) -> [Self; 12] { let mut result = [GoldilocksField::ZERO; 12]; @@ -231,8 +231,8 @@ impl Poseidon for GoldilocksField { state_l[r] = (s as u32) as u64; } - let state_h = mds_multiply_freq(state_h); - let state_l = mds_multiply_freq(state_l); + let state_h = poseidon12_mds::mds_multiply_freq(state_h); + let state_l = poseidon12_mds::mds_multiply_freq(state_l); for r in 0..12 { let s = state_l[r] as u128 + ((state_h[r] as u128) << 32); @@ -307,137 +307,139 @@ impl Poseidon for GoldilocksField { // MDS layer helper methods // The following code has been adapted from winterfell/crypto/src/hash/mds/mds_f64_12x12.rs // located at https://github.com/facebook/winterfell. +#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))] +mod poseidon12_mds { + const MDS_FREQ_BLOCK_ONE: [i64; 3] = [16, 32, 16]; + const MDS_FREQ_BLOCK_TWO: [(i64, i64); 3] = [(2, -1), (-4, 1), (16, 1)]; + const MDS_FREQ_BLOCK_THREE: [i64; 3] = [-1, -8, 2]; -const MDS_FREQ_BLOCK_ONE: [i64; 3] = [16, 32, 16]; -const MDS_FREQ_BLOCK_TWO: [(i64, i64); 3] = [(2, -1), (-4, 1), (16, 1)]; -const MDS_FREQ_BLOCK_THREE: [i64; 3] = [-1, -8, 2]; + /// Split 3 x 4 FFT-based MDS vector-multiplication with the Poseidon circulant MDS matrix. + #[inline(always)] + pub(crate) fn mds_multiply_freq(state: [u64; 12]) -> [u64; 12] { + let [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] = state; -/// Split 3 x 4 FFT-based MDS vector-multiplication with the Poseidon circulant MDS matrix. -#[inline(always)] -fn mds_multiply_freq(state: [u64; 12]) -> [u64; 12] { - let [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] = state; + let (u0, u1, u2) = fft4_real([s0, s3, s6, s9]); + let (u4, u5, u6) = fft4_real([s1, s4, s7, s10]); + let (u8, u9, u10) = fft4_real([s2, s5, s8, s11]); - let (u0, u1, u2) = fft4_real([s0, s3, s6, s9]); - let (u4, u5, u6) = fft4_real([s1, s4, s7, s10]); - let (u8, u9, u10) = fft4_real([s2, s5, s8, s11]); + // This where the multiplication in frequency domain is done. More precisely, and with + // the appropriate permuations in between, the sequence of + // 3-point FFTs --> multiplication by twiddle factors --> Hadamard multiplication --> + // 3 point iFFTs --> multiplication by (inverse) twiddle factors + // is "squashed" into one step composed of the functions "block1", "block2" and "block3". + // The expressions in the aforementioned functions are the result of explicit computations + // combined with the Karatsuba trick for the multiplication of complex numbers. - // This where the multiplication in frequency domain is done. More precisely, and with - // the appropriate permuations in between, the sequence of - // 3-point FFTs --> multiplication by twiddle factors --> Hadamard multiplication --> - // 3 point iFFTs --> multiplication by (inverse) twiddle factors - // is "squashed" into one step composed of the functions "block1", "block2" and "block3". - // The expressions in the aforementioned functions are the result of explicit computations - // combined with the Karatsuba trick for the multiplication of complex numbers. + let [v0, v4, v8] = block1([u0, u4, u8], MDS_FREQ_BLOCK_ONE); + let [v1, v5, v9] = block2([u1, u5, u9], MDS_FREQ_BLOCK_TWO); + let [v2, v6, v10] = block3([u2, u6, u10], MDS_FREQ_BLOCK_THREE); + // The 4th block is not computed as it is similar to the 2nd one, up to complex conjugation. - let [v0, v4, v8] = block1([u0, u4, u8], MDS_FREQ_BLOCK_ONE); - let [v1, v5, v9] = block2([u1, u5, u9], MDS_FREQ_BLOCK_TWO); - let [v2, v6, v10] = block3([u2, u6, u10], MDS_FREQ_BLOCK_THREE); - // The 4th block is not computed as it is similar to the 2nd one, up to complex conjugation. + let [s0, s3, s6, s9] = ifft4_real_unreduced((v0, v1, v2)); + let [s1, s4, s7, s10] = ifft4_real_unreduced((v4, v5, v6)); + let [s2, s5, s8, s11] = ifft4_real_unreduced((v8, v9, v10)); - let [s0, s3, s6, s9] = ifft4_real_unreduced((v0, v1, v2)); - let [s1, s4, s7, s10] = ifft4_real_unreduced((v4, v5, v6)); - let [s2, s5, s8, s11] = ifft4_real_unreduced((v8, v9, v10)); + [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + } - [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -} + #[inline(always)] + fn block1(x: [i64; 3], y: [i64; 3]) -> [i64; 3] { + let [x0, x1, x2] = x; + let [y0, y1, y2] = y; + let z0 = x0 * y0 + x1 * y2 + x2 * y1; + let z1 = x0 * y1 + x1 * y0 + x2 * y2; + let z2 = x0 * y2 + x1 * y1 + x2 * y0; -/// Real 2-FFT over u64 integers. -#[inline(always)] -fn fft2_real(x: [u64; 2]) -> [i64; 2] { - [(x[0] as i64 + x[1] as i64), (x[0] as i64 - x[1] as i64)] -} + [z0, z1, z2] + } -/// Real 2-iFFT over u64 integers. -/// Division by two to complete the inverse FFT is not performed here. -#[inline(always)] -fn ifft2_real_unreduced(y: [i64; 2]) -> [u64; 2] { - [(y[0] + y[1]) as u64, (y[0] - y[1]) as u64] -} + #[inline(always)] + fn block2(x: [(i64, i64); 3], y: [(i64, i64); 3]) -> [(i64, i64); 3] { + let [(x0r, x0i), (x1r, x1i), (x2r, x2i)] = x; + let [(y0r, y0i), (y1r, y1i), (y2r, y2i)] = y; + let x0s = x0r + x0i; + let x1s = x1r + x1i; + let x2s = x2r + x2i; + let y0s = y0r + y0i; + let y1s = y1r + y1i; + let y2s = y2r + y2i; -/// Real 4-FFT over u64 integers. -#[inline(always)] -fn fft4_real(x: [u64; 4]) -> (i64, (i64, i64), i64) { - let [z0, z2] = fft2_real([x[0], x[2]]); - let [z1, z3] = fft2_real([x[1], x[3]]); - let y0 = z0 + z1; - let y1 = (z2, -z3); - let y2 = z0 - z1; - (y0, y1, y2) -} + // Compute x0​y0 ​− ix1​y2​ − ix2​y1​ using Karatsuba for complex numbers multiplication + let m0 = (x0r * y0r, x0i * y0i); + let m1 = (x1r * y2r, x1i * y2i); + let m2 = (x2r * y1r, x2i * y1i); + let z0r = (m0.0 - m0.1) + (x1s * y2s - m1.0 - m1.1) + (x2s * y1s - m2.0 - m2.1); + let z0i = (x0s * y0s - m0.0 - m0.1) + (-m1.0 + m1.1) + (-m2.0 + m2.1); + let z0 = (z0r, z0i); -/// Real 4-iFFT over u64 integers. -/// Division by four to complete the inverse FFT is not performed here. -#[inline(always)] -fn ifft4_real_unreduced(y: (i64, (i64, i64), i64)) -> [u64; 4] { - let z0 = y.0 + y.2; - let z1 = y.0 - y.2; - let z2 = y.1 .0; - let z3 = -y.1 .1; + // Compute x0​y1​ + x1​y0​ − ix2​y2 using Karatsuba for complex numbers multiplication + let m0 = (x0r * y1r, x0i * y1i); + let m1 = (x1r * y0r, x1i * y0i); + let m2 = (x2r * y2r, x2i * y2i); + let z1r = (m0.0 - m0.1) + (m1.0 - m1.1) + (x2s * y2s - m2.0 - m2.1); + let z1i = (x0s * y1s - m0.0 - m0.1) + (x1s * y0s - m1.0 - m1.1) + (-m2.0 + m2.1); + let z1 = (z1r, z1i); - let [x0, x2] = ifft2_real_unreduced([z0, z2]); - let [x1, x3] = ifft2_real_unreduced([z1, z3]); + // Compute x0​y2​ + x1​y1 ​+ x2​y0​ using Karatsuba for complex numbers multiplication + let m0 = (x0r * y2r, x0i * y2i); + let m1 = (x1r * y1r, x1i * y1i); + let m2 = (x2r * y0r, x2i * y0i); + let z2r = (m0.0 - m0.1) + (m1.0 - m1.1) + (m2.0 - m2.1); + let z2i = (x0s * y2s - m0.0 - m0.1) + (x1s * y1s - m1.0 - m1.1) + (x2s * y0s - m2.0 - m2.1); + let z2 = (z2r, z2i); - [x0, x1, x2, x3] -} + [z0, z1, z2] + } -#[inline(always)] -fn block1(x: [i64; 3], y: [i64; 3]) -> [i64; 3] { - let [x0, x1, x2] = x; - let [y0, y1, y2] = y; - let z0 = x0 * y0 + x1 * y2 + x2 * y1; - let z1 = x0 * y1 + x1 * y0 + x2 * y2; - let z2 = x0 * y2 + x1 * y1 + x2 * y0; + #[inline(always)] + fn block3(x: [i64; 3], y: [i64; 3]) -> [i64; 3] { + let [x0, x1, x2] = x; + let [y0, y1, y2] = y; + let z0 = x0 * y0 - x1 * y2 - x2 * y1; + let z1 = x0 * y1 + x1 * y0 - x2 * y2; + let z2 = x0 * y2 + x1 * y1 + x2 * y0; - [z0, z1, z2] -} + [z0, z1, z2] + } -#[inline(always)] -fn block2(x: [(i64, i64); 3], y: [(i64, i64); 3]) -> [(i64, i64); 3] { - let [(x0r, x0i), (x1r, x1i), (x2r, x2i)] = x; - let [(y0r, y0i), (y1r, y1i), (y2r, y2i)] = y; - let x0s = x0r + x0i; - let x1s = x1r + x1i; - let x2s = x2r + x2i; - let y0s = y0r + y0i; - let y1s = y1r + y1i; - let y2s = y2r + y2i; + /// Real 2-FFT over u64 integers. + #[inline(always)] + pub(crate) fn fft2_real(x: [u64; 2]) -> [i64; 2] { + [(x[0] as i64 + x[1] as i64), (x[0] as i64 - x[1] as i64)] + } - // Compute x0​y0 ​− ix1​y2​ − ix2​y1​ using Karatsuba for complex numbers multiplication - let m0 = (x0r * y0r, x0i * y0i); - let m1 = (x1r * y2r, x1i * y2i); - let m2 = (x2r * y1r, x2i * y1i); - let z0r = (m0.0 - m0.1) + (x1s * y2s - m1.0 - m1.1) + (x2s * y1s - m2.0 - m2.1); - let z0i = (x0s * y0s - m0.0 - m0.1) + (-m1.0 + m1.1) + (-m2.0 + m2.1); - let z0 = (z0r, z0i); + /// Real 2-iFFT over u64 integers. + /// Division by two to complete the inverse FFT is not performed here. + #[inline(always)] + pub(crate) fn ifft2_real_unreduced(y: [i64; 2]) -> [u64; 2] { + [(y[0] + y[1]) as u64, (y[0] - y[1]) as u64] + } - // Compute x0​y1​ + x1​y0​ − ix2​y2 using Karatsuba for complex numbers multiplication - let m0 = (x0r * y1r, x0i * y1i); - let m1 = (x1r * y0r, x1i * y0i); - let m2 = (x2r * y2r, x2i * y2i); - let z1r = (m0.0 - m0.1) + (m1.0 - m1.1) + (x2s * y2s - m2.0 - m2.1); - let z1i = (x0s * y1s - m0.0 - m0.1) + (x1s * y0s - m1.0 - m1.1) + (-m2.0 + m2.1); - let z1 = (z1r, z1i); + /// Real 4-FFT over u64 integers. + #[inline(always)] + pub(crate) fn fft4_real(x: [u64; 4]) -> (i64, (i64, i64), i64) { + let [z0, z2] = fft2_real([x[0], x[2]]); + let [z1, z3] = fft2_real([x[1], x[3]]); + let y0 = z0 + z1; + let y1 = (z2, -z3); + let y2 = z0 - z1; + (y0, y1, y2) + } - // Compute x0​y2​ + x1​y1 ​+ x2​y0​ using Karatsuba for complex numbers multiplication - let m0 = (x0r * y2r, x0i * y2i); - let m1 = (x1r * y1r, x1i * y1i); - let m2 = (x2r * y0r, x2i * y0i); - let z2r = (m0.0 - m0.1) + (m1.0 - m1.1) + (m2.0 - m2.1); - let z2i = (x0s * y2s - m0.0 - m0.1) + (x1s * y1s - m1.0 - m1.1) + (x2s * y0s - m2.0 - m2.1); - let z2 = (z2r, z2i); + /// Real 4-iFFT over u64 integers. + /// Division by four to complete the inverse FFT is not performed here. + #[inline(always)] + pub(crate) fn ifft4_real_unreduced(y: (i64, (i64, i64), i64)) -> [u64; 4] { + let z0 = y.0 + y.2; + let z1 = y.0 - y.2; + let z2 = y.1 .0; + let z3 = -y.1 .1; - [z0, z1, z2] -} + let [x0, x2] = ifft2_real_unreduced([z0, z2]); + let [x1, x3] = ifft2_real_unreduced([z1, z3]); -#[inline(always)] -fn block3(x: [i64; 3], y: [i64; 3]) -> [i64; 3] { - let [x0, x1, x2] = x; - let [y0, y1, y2] = y; - let z0 = x0 * y0 - x1 * y2 - x2 * y1; - let z1 = x0 * y1 + x1 * y0 - x2 * y2; - let z2 = x0 * y2 + x1 * y1 + x2 * y0; - - [z0, z1, z2] + [x0, x1, x2, x3] + } } #[cfg(test)]