Silence Poseidon warnings for ARM targets

2026-02-18 04:43:32 +00:00 · 2023-06-26 07:44:19 -04:00 · 2023-06-26 07:44:19 -04:00 · bd3834c403
commit bd3834c403
parent 5b8740a729
1 changed files with 117 additions and 115 deletions
--- a/plonky2/src/hash/poseidon_goldilocks.rs
+++ b/plonky2/src/hash/poseidon_goldilocks.rs
@ -4,8 +4,8 @@
 //! `poseidon_constants.sage` script in the `mir-protocol/hash-constants`
 //! repository.

+#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
 use plonky2_field::types::Field;
-use unroll::unroll_for_loops;

 use crate::field::goldilocks_field::GoldilocksField;
 use crate::hash::poseidon::{Poseidon, N_PARTIAL_ROUNDS};
@ -214,9 +214,9 @@ impl Poseidon for GoldilocksField {
         0xdcedab70f40718ba, 0xe796d293a47a64cb, 0x80772dc2645b280b, ],
    ];

-    #[cfg(target_arch="x86_64")]
+    #[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
    #[inline(always)]
-    #[unroll_for_loops]
+    #[unroll::unroll_for_loops]
    fn mds_layer(state: &[Self; 12]) -> [Self; 12] {
        let mut result = [GoldilocksField::ZERO; 12];

@ -231,8 +231,8 @@ impl Poseidon for GoldilocksField {
            state_l[r] = (s as u32) as u64;
        }

-        let state_h = mds_multiply_freq(state_h);
-        let state_l = mds_multiply_freq(state_l);
+        let state_h = poseidon12_mds::mds_multiply_freq(state_h);
+        let state_l = poseidon12_mds::mds_multiply_freq(state_l);

        for r in 0..12 {
            let s = state_l[r] as u128 + ((state_h[r] as u128) << 32);
@ -307,137 +307,139 @@ impl Poseidon for GoldilocksField {
 // MDS layer helper methods
 // The following code has been adapted from winterfell/crypto/src/hash/mds/mds_f64_12x12.rs
 // located at https://github.com/facebook/winterfell.
+#[cfg(not(all(target_arch = "aarch64", target_feature = "neon")))]
+mod poseidon12_mds {
+    const MDS_FREQ_BLOCK_ONE: [i64; 3] = [16, 32, 16];
+    const MDS_FREQ_BLOCK_TWO: [(i64, i64); 3] = [(2, -1), (-4, 1), (16, 1)];
+    const MDS_FREQ_BLOCK_THREE: [i64; 3] = [-1, -8, 2];

-const MDS_FREQ_BLOCK_ONE: [i64; 3] = [16, 32, 16];
-const MDS_FREQ_BLOCK_TWO: [(i64, i64); 3] = [(2, -1), (-4, 1), (16, 1)];
-const MDS_FREQ_BLOCK_THREE: [i64; 3] = [-1, -8, 2];
+    /// Split 3 x 4 FFT-based MDS vector-multiplication with the Poseidon circulant MDS matrix.
+    #[inline(always)]
+    pub(crate) fn mds_multiply_freq(state: [u64; 12]) -> [u64; 12] {
+        let [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] = state;

-/// Split 3 x 4 FFT-based MDS vector-multiplication with the Poseidon circulant MDS matrix.
-#[inline(always)]
-fn mds_multiply_freq(state: [u64; 12]) -> [u64; 12] {
-    let [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] = state;
+        let (u0, u1, u2) = fft4_real([s0, s3, s6, s9]);
+        let (u4, u5, u6) = fft4_real([s1, s4, s7, s10]);
+        let (u8, u9, u10) = fft4_real([s2, s5, s8, s11]);

-    let (u0, u1, u2) = fft4_real([s0, s3, s6, s9]);
-    let (u4, u5, u6) = fft4_real([s1, s4, s7, s10]);
-    let (u8, u9, u10) = fft4_real([s2, s5, s8, s11]);
+        // This where the multiplication in frequency domain is done. More precisely, and with
+        // the appropriate permuations in between, the sequence of
+        // 3-point FFTs --> multiplication by twiddle factors --> Hadamard multiplication -->
+        // 3 point iFFTs --> multiplication by (inverse) twiddle factors
+        // is "squashed" into one step composed of the functions "block1", "block2" and "block3".
+        // The expressions in the aforementioned functions are the result of explicit computations
+        // combined with the Karatsuba trick for the multiplication of complex numbers.

-    // This where the multiplication in frequency domain is done. More precisely, and with
-    // the appropriate permuations in between, the sequence of
-    // 3-point FFTs --> multiplication by twiddle factors --> Hadamard multiplication -->
-    // 3 point iFFTs --> multiplication by (inverse) twiddle factors
-    // is "squashed" into one step composed of the functions "block1", "block2" and "block3".
-    // The expressions in the aforementioned functions are the result of explicit computations
-    // combined with the Karatsuba trick for the multiplication of complex numbers.
+        let [v0, v4, v8] = block1([u0, u4, u8], MDS_FREQ_BLOCK_ONE);
+        let [v1, v5, v9] = block2([u1, u5, u9], MDS_FREQ_BLOCK_TWO);
+        let [v2, v6, v10] = block3([u2, u6, u10], MDS_FREQ_BLOCK_THREE);
+        // The 4th block is not computed as it is similar to the 2nd one, up to complex conjugation.

-    let [v0, v4, v8] = block1([u0, u4, u8], MDS_FREQ_BLOCK_ONE);
-    let [v1, v5, v9] = block2([u1, u5, u9], MDS_FREQ_BLOCK_TWO);
-    let [v2, v6, v10] = block3([u2, u6, u10], MDS_FREQ_BLOCK_THREE);
-    // The 4th block is not computed as it is similar to the 2nd one, up to complex conjugation.
+        let [s0, s3, s6, s9] = ifft4_real_unreduced((v0, v1, v2));
+        let [s1, s4, s7, s10] = ifft4_real_unreduced((v4, v5, v6));
+        let [s2, s5, s8, s11] = ifft4_real_unreduced((v8, v9, v10));

-    let [s0, s3, s6, s9] = ifft4_real_unreduced((v0, v1, v2));
-    let [s1, s4, s7, s10] = ifft4_real_unreduced((v4, v5, v6));
-    let [s2, s5, s8, s11] = ifft4_real_unreduced((v8, v9, v10));
+        [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
+    }

-    [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11]
-}
+    #[inline(always)]
+    fn block1(x: [i64; 3], y: [i64; 3]) -> [i64; 3] {
+        let [x0, x1, x2] = x;
+        let [y0, y1, y2] = y;
+        let z0 = x0 * y0 + x1 * y2 + x2 * y1;
+        let z1 = x0 * y1 + x1 * y0 + x2 * y2;
+        let z2 = x0 * y2 + x1 * y1 + x2 * y0;

-/// Real 2-FFT over u64 integers.
-#[inline(always)]
-fn fft2_real(x: [u64; 2]) -> [i64; 2] {
-    [(x[0] as i64 + x[1] as i64), (x[0] as i64 - x[1] as i64)]
-}
+        [z0, z1, z2]
+    }

-/// Real 2-iFFT over u64 integers.
-/// Division by two to complete the inverse FFT is not performed here.
-#[inline(always)]
-fn ifft2_real_unreduced(y: [i64; 2]) -> [u64; 2] {
-    [(y[0] + y[1]) as u64, (y[0] - y[1]) as u64]
-}
+    #[inline(always)]
+    fn block2(x: [(i64, i64); 3], y: [(i64, i64); 3]) -> [(i64, i64); 3] {
+        let [(x0r, x0i), (x1r, x1i), (x2r, x2i)] = x;
+        let [(y0r, y0i), (y1r, y1i), (y2r, y2i)] = y;
+        let x0s = x0r + x0i;
+        let x1s = x1r + x1i;
+        let x2s = x2r + x2i;
+        let y0s = y0r + y0i;
+        let y1s = y1r + y1i;
+        let y2s = y2r + y2i;

-/// Real 4-FFT over u64 integers.
-#[inline(always)]
-fn fft4_real(x: [u64; 4]) -> (i64, (i64, i64), i64) {
-    let [z0, z2] = fft2_real([x[0], x[2]]);
-    let [z1, z3] = fft2_real([x[1], x[3]]);
-    let y0 = z0 + z1;
-    let y1 = (z2, -z3);
-    let y2 = z0 - z1;
-    (y0, y1, y2)
-}
+        // Compute x0y0 − ix1y2 − ix2y1 using Karatsuba for complex numbers multiplication
+        let m0 = (x0r * y0r, x0i * y0i);
+        let m1 = (x1r * y2r, x1i * y2i);
+        let m2 = (x2r * y1r, x2i * y1i);
+        let z0r = (m0.0 - m0.1) + (x1s * y2s - m1.0 - m1.1) + (x2s * y1s - m2.0 - m2.1);
+        let z0i = (x0s * y0s - m0.0 - m0.1) + (-m1.0 + m1.1) + (-m2.0 + m2.1);
+        let z0 = (z0r, z0i);

-/// Real 4-iFFT over u64 integers.
-/// Division by four to complete the inverse FFT is not performed here.
-#[inline(always)]
-fn ifft4_real_unreduced(y: (i64, (i64, i64), i64)) -> [u64; 4] {
-    let z0 = y.0 + y.2;
-    let z1 = y.0 - y.2;
-    let z2 = y.1 .0;
-    let z3 = -y.1 .1;
+        // Compute x0y1 + x1y0 − ix2y2 using Karatsuba for complex numbers multiplication
+        let m0 = (x0r * y1r, x0i * y1i);
+        let m1 = (x1r * y0r, x1i * y0i);
+        let m2 = (x2r * y2r, x2i * y2i);
+        let z1r = (m0.0 - m0.1) + (m1.0 - m1.1) + (x2s * y2s - m2.0 - m2.1);
+        let z1i = (x0s * y1s - m0.0 - m0.1) + (x1s * y0s - m1.0 - m1.1) + (-m2.0 + m2.1);
+        let z1 = (z1r, z1i);

-    let [x0, x2] = ifft2_real_unreduced([z0, z2]);
-    let [x1, x3] = ifft2_real_unreduced([z1, z3]);
+        // Compute x0y2 + x1y1 + x2y0 using Karatsuba for complex numbers multiplication
+        let m0 = (x0r * y2r, x0i * y2i);
+        let m1 = (x1r * y1r, x1i * y1i);
+        let m2 = (x2r * y0r, x2i * y0i);
+        let z2r = (m0.0 - m0.1) + (m1.0 - m1.1) + (m2.0 - m2.1);
+        let z2i = (x0s * y2s - m0.0 - m0.1) + (x1s * y1s - m1.0 - m1.1) + (x2s * y0s - m2.0 - m2.1);
+        let z2 = (z2r, z2i);

-    [x0, x1, x2, x3]
-}
+        [z0, z1, z2]
+    }

-#[inline(always)]
-fn block1(x: [i64; 3], y: [i64; 3]) -> [i64; 3] {
-    let [x0, x1, x2] = x;
-    let [y0, y1, y2] = y;
-    let z0 = x0 * y0 + x1 * y2 + x2 * y1;
-    let z1 = x0 * y1 + x1 * y0 + x2 * y2;
-    let z2 = x0 * y2 + x1 * y1 + x2 * y0;
+    #[inline(always)]
+    fn block3(x: [i64; 3], y: [i64; 3]) -> [i64; 3] {
+        let [x0, x1, x2] = x;
+        let [y0, y1, y2] = y;
+        let z0 = x0 * y0 - x1 * y2 - x2 * y1;
+        let z1 = x0 * y1 + x1 * y0 - x2 * y2;
+        let z2 = x0 * y2 + x1 * y1 + x2 * y0;

-    [z0, z1, z2]
-}
+        [z0, z1, z2]
+    }

-#[inline(always)]
-fn block2(x: [(i64, i64); 3], y: [(i64, i64); 3]) -> [(i64, i64); 3] {
-    let [(x0r, x0i), (x1r, x1i), (x2r, x2i)] = x;
-    let [(y0r, y0i), (y1r, y1i), (y2r, y2i)] = y;
-    let x0s = x0r + x0i;
-    let x1s = x1r + x1i;
-    let x2s = x2r + x2i;
-    let y0s = y0r + y0i;
-    let y1s = y1r + y1i;
-    let y2s = y2r + y2i;
+    /// Real 2-FFT over u64 integers.
+    #[inline(always)]
+    pub(crate) fn fft2_real(x: [u64; 2]) -> [i64; 2] {
+        [(x[0] as i64 + x[1] as i64), (x[0] as i64 - x[1] as i64)]
+    }

-    // Compute x0y0 − ix1y2 − ix2y1 using Karatsuba for complex numbers multiplication
-    let m0 = (x0r * y0r, x0i * y0i);
-    let m1 = (x1r * y2r, x1i * y2i);
-    let m2 = (x2r * y1r, x2i * y1i);
-    let z0r = (m0.0 - m0.1) + (x1s * y2s - m1.0 - m1.1) + (x2s * y1s - m2.0 - m2.1);
-    let z0i = (x0s * y0s - m0.0 - m0.1) + (-m1.0 + m1.1) + (-m2.0 + m2.1);
-    let z0 = (z0r, z0i);
+    /// Real 2-iFFT over u64 integers.
+    /// Division by two to complete the inverse FFT is not performed here.
+    #[inline(always)]
+    pub(crate) fn ifft2_real_unreduced(y: [i64; 2]) -> [u64; 2] {
+        [(y[0] + y[1]) as u64, (y[0] - y[1]) as u64]
+    }

-    // Compute x0y1 + x1y0 − ix2y2 using Karatsuba for complex numbers multiplication
-    let m0 = (x0r * y1r, x0i * y1i);
-    let m1 = (x1r * y0r, x1i * y0i);
-    let m2 = (x2r * y2r, x2i * y2i);
-    let z1r = (m0.0 - m0.1) + (m1.0 - m1.1) + (x2s * y2s - m2.0 - m2.1);
-    let z1i = (x0s * y1s - m0.0 - m0.1) + (x1s * y0s - m1.0 - m1.1) + (-m2.0 + m2.1);
-    let z1 = (z1r, z1i);
+    /// Real 4-FFT over u64 integers.
+    #[inline(always)]
+    pub(crate) fn fft4_real(x: [u64; 4]) -> (i64, (i64, i64), i64) {
+        let [z0, z2] = fft2_real([x[0], x[2]]);
+        let [z1, z3] = fft2_real([x[1], x[3]]);
+        let y0 = z0 + z1;
+        let y1 = (z2, -z3);
+        let y2 = z0 - z1;
+        (y0, y1, y2)
+    }

-    // Compute x0y2 + x1y1 + x2y0 using Karatsuba for complex numbers multiplication
-    let m0 = (x0r * y2r, x0i * y2i);
-    let m1 = (x1r * y1r, x1i * y1i);
-    let m2 = (x2r * y0r, x2i * y0i);
-    let z2r = (m0.0 - m0.1) + (m1.0 - m1.1) + (m2.0 - m2.1);
-    let z2i = (x0s * y2s - m0.0 - m0.1) + (x1s * y1s - m1.0 - m1.1) + (x2s * y0s - m2.0 - m2.1);
-    let z2 = (z2r, z2i);
+    /// Real 4-iFFT over u64 integers.
+    /// Division by four to complete the inverse FFT is not performed here.
+    #[inline(always)]
+    pub(crate) fn ifft4_real_unreduced(y: (i64, (i64, i64), i64)) -> [u64; 4] {
+        let z0 = y.0 + y.2;
+        let z1 = y.0 - y.2;
+        let z2 = y.1 .0;
+        let z3 = -y.1 .1;

-    [z0, z1, z2]
-}
+        let [x0, x2] = ifft2_real_unreduced([z0, z2]);
+        let [x1, x3] = ifft2_real_unreduced([z1, z3]);

-#[inline(always)]
-fn block3(x: [i64; 3], y: [i64; 3]) -> [i64; 3] {
-    let [x0, x1, x2] = x;
-    let [y0, y1, y2] = y;
-    let z0 = x0 * y0 - x1 * y2 - x2 * y1;
-    let z1 = x0 * y1 + x1 * y0 - x2 * y2;
-    let z2 = x0 * y2 + x1 * y1 + x2 * y0;
-
-    [z0, z1, z2]
+        [x0, x1, x2, x3]
+    }
 }

 #[cfg(test)]