Merge branch 'main' into bignum-basic

2026-02-20 13:53:12 +00:00 · 2023-03-15 19:33:58 -07:00 · 2023-03-15 19:33:58 -07:00 · 1c71fb3407
commit 1c71fb3407
parent e97e818833 1576a300b8
30 changed files with 4386 additions and 880 deletions
--- a/evm/src/arithmetic/mod.rs
+++ b/evm/src/arithmetic/mod.rs
@ -1,6 +1,7 @@
 use ethereum_types::U256;
 use plonky2::field::types::PrimeField64;

+use crate::bn254_arithmetic::BN_BASE;
 use crate::util::{addmod, mulmod, submod};

 mod addcy;
@ -47,9 +48,9 @@ impl BinaryOperator {
            }
            BinaryOperator::Lt => U256::from((input0 < input1) as u8),
            BinaryOperator::Gt => U256::from((input0 > input1) as u8),
-            BinaryOperator::AddFp254 => addmod(input0, input1, BN_BASE_ORDER),
-            BinaryOperator::MulFp254 => mulmod(input0, input1, BN_BASE_ORDER),
-            BinaryOperator::SubFp254 => submod(input0, input1, BN_BASE_ORDER),
+            BinaryOperator::AddFp254 => addmod(input0, input1, BN_BASE),
+            BinaryOperator::MulFp254 => mulmod(input0, input1, BN_BASE),
+            BinaryOperator::SubFp254 => submod(input0, input1, BN_BASE),
        }
    }

@ -211,15 +212,7 @@ fn binary_op_to_rows<F: PrimeField64>(
            ternary_op_to_rows::<F>(op.row_filter(), input0, U256::zero(), input1, result)
        }
        BinaryOperator::AddFp254 | BinaryOperator::MulFp254 | BinaryOperator::SubFp254 => {
-            ternary_op_to_rows::<F>(op.row_filter(), input0, input1, BN_BASE_ORDER, result)
+            ternary_op_to_rows::<F>(op.row_filter(), input0, input1, BN_BASE, result)
        }
    }
 }
-
-/// Order of the BN254 base field.
-const BN_BASE_ORDER: U256 = U256([
-    4332616871279656263,
-    10917124144477883021,
-    13281191951274694749,
-    3486998266802970665,
-]);
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@ -0,0 +1,876 @@
+use std::mem::transmute;
+use std::ops::{Add, Div, Mul, Neg, Sub};
+
+use ethereum_types::U256;
+use rand::distributions::{Distribution, Standard};
+use rand::Rng;
+
+pub const BN_BASE: U256 = U256([
+    0x3c208c16d87cfd47,
+    0x97816a916871ca8d,
+    0xb85045b68181585d,
+    0x30644e72e131a029,
+]);
+
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct Fp {
+    pub val: U256,
+}
+
+impl Fp {
+    pub fn new(val: usize) -> Fp {
+        Fp {
+            val: U256::from(val),
+        }
+    }
+}
+
+impl Distribution<Fp> for Standard {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Fp {
+        let xs = rng.gen::<[u64; 4]>();
+        Fp {
+            val: U256(xs) % BN_BASE,
+        }
+    }
+}
+
+impl Add for Fp {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        Fp {
+            val: (self.val + other.val) % BN_BASE,
+        }
+    }
+}
+
+impl Neg for Fp {
+    type Output = Self;
+
+    fn neg(self) -> Self::Output {
+        Fp {
+            val: (BN_BASE - self.val) % BN_BASE,
+        }
+    }
+}
+
+impl Sub for Fp {
+    type Output = Self;
+
+    fn sub(self, other: Self) -> Self {
+        Fp {
+            val: (BN_BASE + self.val - other.val) % BN_BASE,
+        }
+    }
+}
+
+#[allow(clippy::suspicious_arithmetic_impl)]
+impl Mul for Fp {
+    type Output = Self;
+
+    fn mul(self, other: Self) -> Self {
+        Fp {
+            val: U256::try_from((self.val).full_mul(other.val) % BN_BASE).unwrap(),
+        }
+    }
+}
+
+impl Fp {
+    pub fn inv(self) -> Fp {
+        exp_fp(self, BN_BASE - 2)
+    }
+}
+
+#[allow(clippy::suspicious_arithmetic_impl)]
+impl Div for Fp {
+    type Output = Self;
+
+    fn div(self, rhs: Self) -> Self::Output {
+        self * rhs.inv()
+    }
+}
+
+pub const ZERO_FP: Fp = Fp { val: U256::zero() };
+pub const UNIT_FP: Fp = Fp { val: U256::one() };
+
+fn exp_fp(x: Fp, e: U256) -> Fp {
+    let mut current = x;
+    let mut product = Fp { val: U256::one() };
+
+    for j in 0..256 {
+        if e.bit(j) {
+            product = product * current;
+        }
+        current = current * current;
+    }
+    product
+}
+
+/// The degree 2 field extension Fp2 is given by adjoining i, the square root of -1, to Fp
+/// The arithmetic in this extension is standard complex arithmetic
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct Fp2 {
+    pub re: Fp,
+    pub im: Fp,
+}
+
+pub const ZERO_FP2: Fp2 = Fp2 {
+    re: ZERO_FP,
+    im: ZERO_FP,
+};
+
+pub const UNIT_FP2: Fp2 = Fp2 {
+    re: UNIT_FP,
+    im: ZERO_FP,
+};
+
+impl Distribution<Fp2> for Standard {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Fp2 {
+        let (re, im) = rng.gen::<(Fp, Fp)>();
+        Fp2 { re, im }
+    }
+}
+
+impl Add for Fp2 {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        Fp2 {
+            re: self.re + other.re,
+            im: self.im + other.im,
+        }
+    }
+}
+
+impl Neg for Fp2 {
+    type Output = Self;
+
+    fn neg(self) -> Self::Output {
+        Fp2 {
+            re: -self.re,
+            im: -self.im,
+        }
+    }
+}
+
+impl Sub for Fp2 {
+    type Output = Self;
+
+    fn sub(self, other: Self) -> Self {
+        Fp2 {
+            re: self.re - other.re,
+            im: self.im - other.im,
+        }
+    }
+}
+
+impl Mul for Fp2 {
+    type Output = Self;
+
+    fn mul(self, other: Self) -> Self {
+        Fp2 {
+            re: self.re * other.re - self.im * other.im,
+            im: self.re * other.im + self.im * other.re,
+        }
+    }
+}
+
+impl Fp2 {
+    // We preemptively define a helper function which multiplies an Fp2 element by 9 + i
+    fn i9(self) -> Fp2 {
+        let nine = Fp::new(9);
+        Fp2 {
+            re: nine * self.re - self.im,
+            im: self.re + nine * self.im,
+        }
+    }
+
+    // This function scalar multiplies an Fp2 by an Fp
+    pub fn scale(self, x: Fp) -> Fp2 {
+        Fp2 {
+            re: x * self.re,
+            im: x * self.im,
+        }
+    }
+
+    /// Return the complex conjugate z' of z: Fp2
+    /// This also happens to be the frobenius map
+    ///     z -> z^p
+    /// since p == 3 mod 4 and hence
+    ///     i^p = i^3 = -i
+    fn conj(self) -> Fp2 {
+        Fp2 {
+            re: self.re,
+            im: -self.im,
+        }
+    }
+
+    // Return the magnitude squared of a complex number
+    fn norm_sq(self) -> Fp {
+        self.re * self.re + self.im * self.im
+    }
+
+    /// The inverse of z is given by z'/||z||^2 since ||z||^2 = zz'
+    pub fn inv(self) -> Fp2 {
+        let norm_sq = self.norm_sq();
+        self.conj().scale(norm_sq.inv())
+    }
+}
+
+#[allow(clippy::suspicious_arithmetic_impl)]
+impl Div for Fp2 {
+    type Output = Self;
+
+    fn div(self, rhs: Self) -> Self::Output {
+        self * rhs.inv()
+    }
+}
+
+/// The degree 3 field extension Fp6 over Fp2 is given by adjoining t, where t^3 = 9 + i
+// Fp6 has basis 1, t, t^2 over Fp2
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct Fp6 {
+    pub t0: Fp2,
+    pub t1: Fp2,
+    pub t2: Fp2,
+}
+
+pub const ZERO_FP6: Fp6 = Fp6 {
+    t0: ZERO_FP2,
+    t1: ZERO_FP2,
+    t2: ZERO_FP2,
+};
+
+pub const UNIT_FP6: Fp6 = Fp6 {
+    t0: UNIT_FP2,
+    t1: ZERO_FP2,
+    t2: ZERO_FP2,
+};
+
+impl Distribution<Fp6> for Standard {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Fp6 {
+        let (t0, t1, t2) = rng.gen::<(Fp2, Fp2, Fp2)>();
+        Fp6 { t0, t1, t2 }
+    }
+}
+
+impl Add for Fp6 {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        Fp6 {
+            t0: self.t0 + other.t0,
+            t1: self.t1 + other.t1,
+            t2: self.t2 + other.t2,
+        }
+    }
+}
+
+impl Neg for Fp6 {
+    type Output = Self;
+
+    fn neg(self) -> Self::Output {
+        Fp6 {
+            t0: -self.t0,
+            t1: -self.t1,
+            t2: -self.t2,
+        }
+    }
+}
+
+impl Sub for Fp6 {
+    type Output = Self;
+
+    fn sub(self, other: Self) -> Self {
+        Fp6 {
+            t0: self.t0 - other.t0,
+            t1: self.t1 - other.t1,
+            t2: self.t2 - other.t2,
+        }
+    }
+}
+
+impl Mul for Fp6 {
+    type Output = Self;
+
+    fn mul(self, other: Self) -> Self {
+        Fp6 {
+            t0: self.t0 * other.t0 + (self.t1 * other.t2 + self.t2 * other.t1).i9(),
+            t1: self.t0 * other.t1 + self.t1 * other.t0 + (self.t2 * other.t2).i9(),
+            t2: self.t0 * other.t2 + self.t1 * other.t1 + self.t2 * other.t0,
+        }
+    }
+}
+
+impl Fp6 {
+    // This function scalar multiplies an Fp6 by an Fp2
+    fn scale(self, x: Fp2) -> Fp6 {
+        Fp6 {
+            t0: x * self.t0,
+            t1: x * self.t1,
+            t2: x * self.t2,
+        }
+    }
+
+    /// This function multiplies an Fp6 element by t, and hence shifts the bases,
+    /// where the t^2 coefficient picks up a factor of 9+i as the 1 coefficient of the output
+    fn sh(self) -> Fp6 {
+        Fp6 {
+            t0: self.t2.i9(),
+            t1: self.t0,
+            t2: self.t1,
+        }
+    }
+
+    /// The nth frobenius endomorphism of a p^q field is given by mapping
+    ///     x to x^(p^n)
+    /// which sends a + bt + ct^2: Fp6 to
+    ///     a^(p^n) + b^(p^n) * t^(p^n) + c^(p^n) * t^(2p^n)
+    /// The Fp2 coefficients are determined by the comment in the conj method,
+    /// while the values of
+    ///     t^(p^n) and t^(2p^n)
+    /// are precomputed in the constant arrays FROB_T1 and FROB_T2
+    pub fn frob(self, n: usize) -> Fp6 {
+        let n = n % 6;
+        let frob_t1 = FROB_T1[n];
+        let frob_t2 = FROB_T2[n];
+
+        if n % 2 != 0 {
+            Fp6 {
+                t0: self.t0.conj(),
+                t1: frob_t1 * self.t1.conj(),
+                t2: frob_t2 * self.t2.conj(),
+            }
+        } else {
+            Fp6 {
+                t0: self.t0,
+                t1: frob_t1 * self.t1,
+                t2: frob_t2 * self.t2,
+            }
+        }
+    }
+
+    /// Let x_n = x^(p^n) and note that
+    ///     x_0 = x^(p^0) = x^1 = x
+    ///     (x_n)_m = (x^(p^n))^(p^m) = x^(p^n * p^m) = x^(p^(n+m)) = x_{n+m}
+    /// By Galois Theory, given x: Fp6, the product
+    ///     phi = x_0 * x_1 * x_2 * x_3 * x_4 * x_5
+    /// lands in Fp, and hence the inverse of x is given by
+    ///     (x_1 * x_2 * x_3 * x_4 * x_5) / phi
+    /// We can save compute by rearranging the numerator:
+    ///     (x_1 * x_3) * x_5 * (x_1 * x_3)_1
+    /// By Galois theory, the following are in Fp2 and are complex conjugates
+    ///     x_1 * x_3 * x_5,  x_0 * x_2 * x_4
+    /// and therefore
+    ///     phi = ||x_1 * x_3 * x_5||^2
+    /// and hence the inverse is given by
+    ///     ([x_1 * x_3] * x_5) * [x_1 * x_3]_1 / ||[x_1 * x_3] * x_5||^2
+    pub fn inv(self) -> Fp6 {
+        let prod_13 = self.frob(1) * self.frob(3);
+        let prod_135 = (prod_13 * self.frob(5)).t0;
+        let phi = prod_135.norm_sq();
+        let prod_odds_over_phi = prod_135.scale(phi.inv());
+        let prod_24 = prod_13.frob(1);
+        prod_24.scale(prod_odds_over_phi)
+    }
+
+    pub fn on_stack(self) -> Vec<U256> {
+        let f: [U256; 6] = unsafe { transmute(self) };
+        f.into_iter().collect()
+    }
+}
+
+#[allow(clippy::suspicious_arithmetic_impl)]
+impl Div for Fp6 {
+    type Output = Self;
+
+    fn div(self, rhs: Self) -> Self::Output {
+        self * rhs.inv()
+    }
+}
+
+/// The degree 2 field extension Fp12 over Fp6 is given by adjoining z, where z^2 = t.
+/// It thus has basis 1, z over Fp6
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct Fp12 {
+    pub z0: Fp6,
+    pub z1: Fp6,
+}
+
+pub const UNIT_FP12: Fp12 = Fp12 {
+    z0: UNIT_FP6,
+    z1: ZERO_FP6,
+};
+
+impl Distribution<Fp12> for Standard {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Fp12 {
+        let (z0, z1) = rng.gen::<(Fp6, Fp6)>();
+        Fp12 { z0, z1 }
+    }
+}
+
+impl Mul for Fp12 {
+    type Output = Self;
+
+    fn mul(self, other: Self) -> Self {
+        let h0 = self.z0 * other.z0;
+        let h1 = self.z1 * other.z1;
+        let h01 = (self.z0 + self.z1) * (other.z0 + other.z1);
+        Fp12 {
+            z0: h0 + h1.sh(),
+            z1: h01 - (h0 + h1),
+        }
+    }
+}
+
+impl Fp12 {
+    // This function scalar multiplies an Fp12 by an Fp6
+    fn scale(self, x: Fp6) -> Fp12 {
+        Fp12 {
+            z0: x * self.z0,
+            z1: x * self.z1,
+        }
+    }
+
+    fn conj(self) -> Fp12 {
+        Fp12 {
+            z0: self.z0,
+            z1: -self.z1,
+        }
+    }
+    /// The nth frobenius endomorphism of a p^q field is given by mapping
+    ///     x to x^(p^n)
+    /// which sends a + bz: Fp12 to
+    ///     a^(p^n) + b^(p^n) * z^(p^n)
+    /// where the values of z^(p^n) are precomputed in the constant array FROB_Z
+    pub fn frob(self, n: usize) -> Fp12 {
+        let n = n % 12;
+        Fp12 {
+            z0: self.z0.frob(n),
+            z1: self.z1.frob(n).scale(FROB_Z[n]),
+        }
+    }
+
+    /// By Galois Theory, given x: Fp12, the product
+    ///     phi = Prod_{i=0}^11 x_i
+    /// lands in Fp, and hence the inverse of x is given by
+    ///     (Prod_{i=1}^11 x_i) / phi
+    /// The 6th Frob map is nontrivial but leaves Fp6 fixed and hence must be the conjugate:
+    ///     x_6 = (a + bz)_6 = a - bz = x.conj()
+    /// Letting prod_17 = x_1 * x_7, the remaining factors in the numerator can be expresed as:
+    ///     [(prod_17) * (prod_17)_2] * (prod_17)_4 * [(prod_17) * (prod_17)_2]_1
+    /// By Galois theory, both the following are in Fp2 and are complex conjugates
+    ///     prod_odds,  prod_evens
+    /// Thus phi = ||prod_odds||^2, and hence the inverse is given by
+    ///    prod_odds * prod_evens_except_six * x.conj() / ||prod_odds||^2
+    pub fn inv(self) -> Fp12 {
+        let prod_17 = (self.frob(1) * self.frob(7)).z0;
+        let prod_1379 = prod_17 * prod_17.frob(2);
+        let prod_odds = (prod_1379 * prod_17.frob(4)).t0;
+        let phi = prod_odds.norm_sq();
+        let prod_odds_over_phi = prod_odds.scale(phi.inv());
+        let prod_evens_except_six = prod_1379.frob(1);
+        let prod_except_six = prod_evens_except_six.scale(prod_odds_over_phi);
+        self.conj().scale(prod_except_six)
+    }
+
+    pub fn on_stack(self) -> Vec<U256> {
+        let f: [U256; 12] = unsafe { transmute(self) };
+        f.into_iter().collect()
+    }
+}
+
+#[allow(clippy::suspicious_arithmetic_impl)]
+impl Div for Fp12 {
+    type Output = Self;
+
+    fn div(self, rhs: Self) -> Self::Output {
+        self * rhs.inv()
+    }
+}
+
+const FROB_T1: [Fp2; 6] = [
+    Fp2 {
+        re: Fp { val: U256::one() },
+        im: Fp { val: U256::zero() },
+    },
+    Fp2 {
+        re: Fp {
+            val: U256([
+                0x99e39557176f553d,
+                0xb78cc310c2c3330c,
+                0x4c0bec3cf559b143,
+                0x2fb347984f7911f7,
+            ]),
+        },
+        im: Fp {
+            val: U256([
+                0x1665d51c640fcba2,
+                0x32ae2a1d0b7c9dce,
+                0x4ba4cc8bd75a0794,
+                0x16c9e55061ebae20,
+            ]),
+        },
+    },
+    Fp2 {
+        re: Fp {
+            val: U256([
+                0xe4bd44e5607cfd48,
+                0xc28f069fbb966e3d,
+                0x5e6dd9e7e0acccb0,
+                0x30644e72e131a029,
+            ]),
+        },
+        im: Fp { val: U256::zero() },
+    },
+    Fp2 {
+        re: Fp {
+            val: U256([
+                0x7b746ee87bdcfb6d,
+                0x805ffd3d5d6942d3,
+                0xbaff1c77959f25ac,
+                0x0856e078b755ef0a,
+            ]),
+        },
+        im: Fp {
+            val: U256([
+                0x380cab2baaa586de,
+                0x0fdf31bf98ff2631,
+                0xa9f30e6dec26094f,
+                0x04f1de41b3d1766f,
+            ]),
+        },
+    },
+    Fp2 {
+        re: Fp {
+            val: U256([
+                0x5763473177fffffe,
+                0xd4f263f1acdb5c4f,
+                0x59e26bcea0d48bac,
+                0x0,
+            ]),
+        },
+        im: Fp { val: U256::zero() },
+    },
+    Fp2 {
+        re: Fp {
+            val: U256([
+                0x62e913ee1dada9e4,
+                0xf71614d4b0b71f3a,
+                0x699582b87809d9ca,
+                0x28be74d4bb943f51,
+            ]),
+        },
+        im: Fp {
+            val: U256([
+                0xedae0bcec9c7aac7,
+                0x54f40eb4c3f6068d,
+                0xc2b86abcbe01477a,
+                0x14a88ae0cb747b99,
+            ]),
+        },
+    },
+];
+
+const FROB_T2: [Fp2; 6] = [
+    Fp2 {
+        re: Fp { val: U256::one() },
+        im: Fp { val: U256::zero() },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x848a1f55921ea762,
+                    0xd33365f7be94ec72,
+                    0x80f3c0b75a181e84,
+                    0x05b54f5e64eea801,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0xc13b4711cd2b8126,
+                    0x3685d2ea1bdec763,
+                    0x9f3a80b03b0b1c92,
+                    0x2c145edbe7fd8aee,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x5763473177fffffe,
+                    0xd4f263f1acdb5c4f,
+                    0x59e26bcea0d48bac,
+                    0x0,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x0e1a92bc3ccbf066,
+                    0xe633094575b06bcb,
+                    0x19bee0f7b5b2444e,
+                    0xbc58c6611c08dab,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0x5fe3ed9d730c239f,
+                    0xa44a9e08737f96e5,
+                    0xfeb0f6ef0cd21d04,
+                    0x23d5e999e1910a12,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xe4bd44e5607cfd48,
+                    0xc28f069fbb966e3d,
+                    0x5e6dd9e7e0acccb0,
+                    0x30644e72e131a029,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xa97bda050992657f,
+                    0xde1afb54342c724f,
+                    0x1d9da40771b6f589,
+                    0x1ee972ae6a826a7d,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0x5721e37e70c255c9,
+                    0x54326430418536d1,
+                    0xd2b513cdbb257724,
+                    0x10de546ff8d4ab51,
+                ]),
+            }
+        },
+    },
+];
+
+const FROB_Z: [Fp2; 12] = [
+    Fp2 {
+        re: { Fp { val: U256::one() } },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xd60b35dadcc9e470,
+                    0x5c521e08292f2176,
+                    0xe8b99fdd76e68b60,
+                    0x1284b71c2865a7df,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0xca5cf05f80f362ac,
+                    0x747992778eeec7e5,
+                    0xa6327cfe12150b8e,
+                    0x246996f3b4fae7e6,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xe4bd44e5607cfd49,
+                    0xc28f069fbb966e3d,
+                    0x5e6dd9e7e0acccb0,
+                    0x30644e72e131a029,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xe86f7d391ed4a67f,
+                    0x894cb38dbe55d24a,
+                    0xefe9608cd0acaa90,
+                    0x19dc81cfcc82e4bb,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0x7694aa2bf4c0c101,
+                    0x7f03a5e397d439ec,
+                    0x06cbeee33576139d,
+                    0xabf8b60be77d73,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xe4bd44e5607cfd48,
+                    0xc28f069fbb966e3d,
+                    0x5e6dd9e7e0acccb0,
+                    0x30644e72e131a029,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x1264475e420ac20f,
+                    0x2cfa95859526b0d4,
+                    0x072fc0af59c61f30,
+                    0x757cab3a41d3cdc,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0xe85845e34c4a5b9c,
+                    0xa20b7dfd71573c93,
+                    0x18e9b79ba4e2606c,
+                    0xca6b035381e35b6,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x3c208c16d87cfd46,
+                    0x97816a916871ca8d,
+                    0xb85045b68181585d,
+                    0x30644e72e131a029,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x6615563bfbb318d7,
+                    0x3b2f4c893f42a916,
+                    0xcf96a5d90a9accfd,
+                    0x1ddf9756b8cbf849,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0x71c39bb757899a9b,
+                    0x2307d819d98302a7,
+                    0x121dc8b86f6c4ccf,
+                    0x0bfab77f2c36b843,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x5763473177fffffe,
+                    0xd4f263f1acdb5c4f,
+                    0x59e26bcea0d48bac,
+                    0x0,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x53b10eddb9a856c8,
+                    0x0e34b703aa1bf842,
+                    0xc866e529b0d4adcd,
+                    0x1687cca314aebb6d,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0xc58be1eae3bc3c46,
+                    0x187dc4add09d90a0,
+                    0xb18456d34c0b44c0,
+                    0x2fb855bcd54a22b6,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x5763473177ffffff,
+                    0xd4f263f1acdb5c4f,
+                    0x59e26bcea0d48bac,
+                    0x0,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x29bc44b896723b38,
+                    0x6a86d50bd34b19b9,
+                    0xb120850727bb392d,
+                    0x290c83bf3d14634d,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0x53c846338c32a1ab,
+                    0xf575ec93f71a8df9,
+                    0x9f668e1adc9ef7f0,
+                    0x23bd9e3da9136a73,
+                ]),
+            }
+        },
+    },
+];
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@ -0,0 +1,353 @@
+use std::ops::Add;
+
+use rand::Rng;
+
+use crate::bn254_arithmetic::{Fp, Fp12, Fp2, Fp6, UNIT_FP12, ZERO_FP, ZERO_FP2};
+
+// The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct Curve {
+    pub x: Fp,
+    pub y: Fp,
+}
+
+/// Standard addition formula for elliptic curves, restricted to the cases  
+/// where neither inputs nor output would ever be the identity O. source:
+/// https://en.wikipedia.org/wiki/Elliptic_curve#Algebraic_interpretation
+impl Add for Curve {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        let m = if self == other {
+            Fp::new(3) * self.x * self.x / (Fp::new(2) * self.y)
+        } else {
+            (other.y - self.y) / (other.x - self.x)
+        };
+        let x = m * m - (self.x + other.x);
+        Curve {
+            x,
+            y: m * (self.x - x) - self.y,
+        }
+    }
+}
+
+// The twisted curve consists of pairs (x, y): (Fp2, Fp2) | y^2 = x^3 + 3/(9 + i)
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct TwistedCurve {
+    pub x: Fp2,
+    pub y: Fp2,
+}
+
+// The tate pairing takes a point each from the curve and its twist and outputs an Fp12 element
+pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
+    let miller_output = miller_loop(p, q);
+    invariant_exponent(miller_output)
+}
+
+/// Standard code for miller loop, can be found on page 99 at this url:
+/// https://static1.squarespace.com/static/5fdbb09f31d71c1227082339/t/5ff394720493bd28278889c6/1609798774687/PairingsForBeginners.pdf#page=107
+/// where EXP is a hardcoding of the array of Booleans that the loop traverses
+pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
+    let mut r = p;
+    let mut acc = UNIT_FP12;
+    let mut line;
+
+    for i in EXP {
+        line = tangent(r, q);
+        r = r + r;
+        acc = line * acc * acc;
+        if i {
+            line = cord(p, r, q);
+            r = r + p;
+            acc = line * acc;
+        }
+    }
+    acc
+}
+
+/// The sloped line function for doubling a point
+pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
+    let cx = -Fp::new(3) * p.x * p.x;
+    let cy = Fp::new(2) * p.y;
+    sparse_embed(p.y * p.y - Fp::new(9), q.x.scale(cx), q.y.scale(cy))
+}
+
+/// The sloped line function for adding two points
+pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
+    let cx = p2.y - p1.y;
+    let cy = p1.x - p2.x;
+    sparse_embed(p1.y * p2.x - p2.y * p1.x, q.x.scale(cx), q.y.scale(cy))
+}
+
+/// The tangent and cord functions output sparse Fp12 elements.
+/// This map embeds the nonzero coefficients into an Fp12.
+pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
+    let g0 = Fp6 {
+        t0: Fp2 {
+            re: g000,
+            im: ZERO_FP,
+        },
+        t1: g01,
+        t2: ZERO_FP2,
+    };
+
+    let g1 = Fp6 {
+        t0: ZERO_FP2,
+        t1: g11,
+        t2: ZERO_FP2,
+    };
+
+    Fp12 { z0: g0, z1: g1 }
+}
+
+pub fn gen_fp12_sparse<R: Rng + ?Sized>(rng: &mut R) -> Fp12 {
+    sparse_embed(rng.gen::<Fp>(), rng.gen::<Fp2>(), rng.gen::<Fp2>())
+}
+
+/// The output y of the miller loop is not an invariant,
+/// but one gets an invariant by raising y to the power
+///     (p^12 - 1)/N = (p^6 - 1)(p^2 + 1)(p^4 - p^2 + 1)/N
+/// where N is the cyclic group order of the curve.
+/// To achieve this, we first exponentiate y by p^6 - 1 via
+///     y = y_6 / y
+/// and then exponentiate the result by p^2 + 1 via
+///     y = y_2 * y
+/// We then note that (p^4 - p^2 + 1)/N can be rewritten as
+///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
+/// where 0 < a0, a1, a2 < p. Then the final power is given by
+///     y = y_3 * (y^a2)_2 * (y^-a1)_1 * (y^-a0)
+pub fn invariant_exponent(f: Fp12) -> Fp12 {
+    let mut y = f.frob(6) / f;
+    y = y.frob(2) * y;
+    let (y_a2, y_a1, y_a0) = get_custom_powers(y);
+    y.frob(3) * y_a2.frob(2) * y_a1.frob(1) * y_a0
+}
+
+/// We first together (so as to avoid repeated steps) compute
+///     y^a4, y^a2, y^a0
+/// where a1 is given by
+///     a1 = a4 + 2a2 - a0
+/// we then invert y^a0 and return
+///     y^a2, y^a1 = y^a4 * y^a2 * y^a2 * y^(-a0), y^(-a0)
+///
+/// Representing a4, a2, a0 in *little endian* binary, define
+///     EXPS4 = [(a4[i], a2[i], a0[i]) for i in       0..len(a4)]
+///     EXPS2 = [       (a2[i], a0[i]) for i in len(a4)..len(a2)]
+///     EXPS0 = [               a0[i]  for i in len(a2)..len(a0)]
+fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
+    let mut sq: Fp12 = f;
+    let mut y0: Fp12 = UNIT_FP12;
+    let mut y2: Fp12 = UNIT_FP12;
+    let mut y4: Fp12 = UNIT_FP12;
+
+    // proceed via standard squaring algorithm for exponentiation
+
+    // must keep multiplying all three values: a4, a2, a0
+    for (a, b, c) in EXPS4 {
+        if a {
+            y4 = y4 * sq;
+        }
+        if b {
+            y2 = y2 * sq;
+        }
+        if c {
+            y0 = y0 * sq;
+        }
+        sq = sq * sq;
+    }
+    // leading term of a4 is always 1
+    y4 = y4 * sq;
+
+    // must keep multiplying remaining two values: a2, a0
+    for (a, b) in EXPS2 {
+        if a {
+            y2 = y2 * sq;
+        }
+        if b {
+            y0 = y0 * sq;
+        }
+        sq = sq * sq;
+    }
+    // leading term of a2 is always 1
+    y2 = y2 * sq;
+
+    // must keep multiplying final remaining value: a0
+    for a in EXPS0 {
+        if a {
+            y0 = y0 * sq;
+        }
+        sq = sq * sq;
+    }
+    // leading term of a0 is always 1
+    y0 = y0 * sq;
+
+    // invert y0 to compute y^(-a0)
+    let y0_inv = y0.inv();
+
+    // return y^a2 = y2, y^a1 = y4 * y2^2 * y^(-a0), y^(-a0)
+    (y2, y4 * y2 * y2 * y0_inv, y0_inv)
+}
+
+const EXP: [bool; 253] = [
+    true, false, false, false, false, false, true, true, false, false, true, false, false, false,
+    true, false, false, true, true, true, false, false, true, true, true, false, false, true,
+    false, true, true, true, false, false, false, false, true, false, false, true, true, false,
+    false, false, true, true, false, true, false, false, false, false, false, false, false, true,
+    false, true, false, false, true, true, false, true, true, true, false, false, false, false,
+    true, false, true, false, false, false, false, false, true, false, false, false, true, false,
+    true, true, false, true, true, false, true, true, false, true, false, false, false, false,
+    false, false, true, true, false, false, false, false, false, false, true, false, true, false,
+    true, true, false, false, false, false, true, false, true, true, true, false, true, false,
+    false, true, false, true, false, false, false, false, false, true, true, false, false, true,
+    true, true, true, true, false, true, false, false, false, false, true, false, false, true,
+    false, false, false, false, true, true, true, true, false, false, true, true, false, true,
+    true, true, false, false, true, false, true, true, true, false, false, false, false, true,
+    false, false, true, false, false, false, true, false, true, false, false, false, false, true,
+    true, true, true, true, false, false, false, false, true, true, true, true, true, false, true,
+    false, true, true, false, false, true, false, false, true, true, true, true, true, true, false,
+    false, false, false, false, false, false, false, false, false, false, false, false, false,
+    false, false, false, false, false, false, false, false, false, false, false, false, false,
+    false,
+];
+
+// The folowing constants are defined above get_custom_powers
+
+const EXPS4: [(bool, bool, bool); 64] = [
+    (true, true, false),
+    (true, true, true),
+    (true, true, true),
+    (false, false, false),
+    (false, false, true),
+    (true, false, true),
+    (false, true, false),
+    (true, false, true),
+    (true, true, false),
+    (true, false, true),
+    (false, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (false, true, false),
+    (false, true, false),
+    (false, false, true),
+    (true, false, true),
+    (true, true, false),
+    (false, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (false, false, true),
+    (false, false, true),
+    (true, false, true),
+    (true, false, true),
+    (true, true, false),
+    (true, false, false),
+    (true, true, false),
+    (false, true, false),
+    (true, true, false),
+    (true, false, false),
+    (false, true, false),
+    (false, false, false),
+    (true, false, false),
+    (true, false, false),
+    (true, false, true),
+    (false, false, true),
+    (false, true, true),
+    (false, false, true),
+    (false, true, true),
+    (false, true, true),
+    (false, false, false),
+    (true, true, true),
+    (true, false, true),
+    (true, false, true),
+    (false, true, true),
+    (true, false, true),
+    (false, true, true),
+    (false, true, true),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, false, false),
+    (false, false, true),
+    (true, false, false),
+    (false, false, true),
+    (true, false, true),
+    (true, true, false),
+    (true, true, true),
+    (false, true, true),
+    (false, true, false),
+    (true, true, true),
+];
+
+const EXPS2: [(bool, bool); 62] = [
+    (true, false),
+    (true, true),
+    (false, false),
+    (true, false),
+    (true, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (true, false),
+    (false, true),
+    (false, true),
+    (true, true),
+    (true, true),
+    (false, false),
+    (true, true),
+    (false, false),
+    (false, false),
+    (false, true),
+    (false, true),
+    (true, true),
+    (true, true),
+    (true, true),
+    (false, true),
+    (true, true),
+    (false, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (false, false),
+    (true, true),
+    (true, true),
+    (true, false),
+    (false, false),
+    (false, true),
+    (false, false),
+    (true, true),
+    (false, true),
+    (false, false),
+    (true, false),
+    (false, true),
+    (false, true),
+    (true, false),
+    (false, true),
+    (false, false),
+    (false, false),
+    (false, false),
+    (false, true),
+    (true, false),
+    (true, true),
+    (false, true),
+    (true, true),
+    (true, false),
+    (false, true),
+    (false, false),
+    (true, false),
+    (false, true),
+    (true, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (false, true),
+    (true, true),
+];
+
+const EXPS0: [bool; 65] = [
+    false, false, true, false, false, true, true, false, true, false, true, true, true, false,
+    true, false, false, false, true, false, false, true, false, true, false, true, true, false,
+    false, false, false, false, true, false, true, false, true, true, true, false, false, true,
+    true, true, true, false, true, false, true, true, false, false, true, false, false, false,
+    true, true, true, true, false, false, true, true, false,
+];
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@ -34,12 +34,19 @@ pub(crate) fn combined_kernel() -> Kernel {
        include_str!("asm/core/terminate.asm"),
        include_str!("asm/core/transfer.asm"),
        include_str!("asm/core/util.asm"),
-        include_str!("asm/curve/bn254/curve_add.asm"),
-        include_str!("asm/curve/bn254/curve_mul.asm"),
-        include_str!("asm/curve/bn254/moddiv.asm"),
-        include_str!("asm/curve/bn254/glv.asm"),
-        include_str!("asm/curve/bn254/msm.asm"),
-        include_str!("asm/curve/bn254/precomputation.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/glv.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/invariant_exponent.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/msm.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/precomputation.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/degree_6_mul.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/degree_12_mul.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/util.asm"),
        include_str!("asm/curve/common.asm"),
        include_str!("asm/curve/secp256k1/curve_add.asm"),
        include_str!("asm/curve/secp256k1/ecrecover.asm"),
@ -50,9 +57,6 @@ pub(crate) fn combined_kernel() -> Kernel {
        include_str!("asm/curve/secp256k1/precomputation.asm"),
        include_str!("asm/curve/wnaf.asm"),
        include_str!("asm/exp.asm"),
-        include_str!("asm/fields/fp6_macros.asm"),
-        include_str!("asm/fields/fp6_mul.asm"),
-        include_str!("asm/fields/fp12_mul.asm"),
        include_str!("asm/halt.asm"),
        include_str!("asm/hash/blake2b/addresses.asm"),
        include_str!("asm/hash/blake2b/compression.asm"),
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@ -0,0 +1,88 @@
+/// miller_data is defined by
+/// (1) taking the binary expansion of N254, the order of the elliptic curve group
+/// (2) popping the first and last elements, then appending a 0:
+///     exp = bin(N254)[1:-1] + [0]
+/// (3) counting the lengths of runs of 1s then 0s in exp, e.g.
+///     if exp = 1100010011110, then EXP = [(2,3), (1,2), (4,1)]
+/// (4) byte encoding each pair (n,m) as follows:
+///     miller_data = [(0x20)n + m for (n,m) in EXP]
+
+global miller_data:
+    BYTES 0xdc, 0x22, 0x42, 0x21
+    BYTES 0xa1, 0xa4, 0x24, 0x21
+    BYTES 0x23, 0x22, 0x64, 0x21
+    BYTES 0x62, 0x41, 0x82, 0x24
+    BYTES 0x22, 0x24, 0xa1, 0x42
+    BYTES 0x25, 0x21, 0x22, 0x61
+    BYTES 0x21, 0x44, 0x21, 0x21
+    BYTES 0x46, 0x26, 0x41, 0x41
+    BYTES 0x41, 0x21, 0x23, 0x25
+    BYTES 0x21, 0x64, 0x41, 0x22
+    BYTES 0x21, 0x27, 0x41, 0x43
+    BYTES 0x22, 0x64, 0x21, 0x62
+    BYTES 0x62, 0x22, 0x23, 0x42
+    BYTES 0x25
+
+
+/// final_exp first computes y^a4, y^a2, y^a0
+/// representing a4, a2, a0 in *little endian* binary, define
+///     EXPS4 = [(a4[i], a2[i], a0[i]) for i in       0..len(a4)]
+///     EXPS2 = [       (a2[i], a0[i]) for i in len(a4)..len(a2)]
+///     EXPS0 = [               a0[i]  for i in len(a2)..len(a0)]
+/// power_data_n is simply a reverse-order byte encoding of EXPSn
+///     where (i,j,k) is sent to (100)i + (10)j + k
+
+global power_data_4:
+    BYTES 111, 010, 011, 111
+    BYTES 110, 101, 001, 100
+    BYTES 001, 100, 110, 110
+    BYTES 110, 011, 011, 101
+    BYTES 011, 101, 101, 111
+    BYTES 000, 011, 011, 001
+    BYTES 011, 001, 101, 100
+    BYTES 100, 000, 010, 100
+    BYTES 110, 010, 110, 100
+    BYTES 110, 101, 101, 001
+    BYTES 001, 110, 110, 110
+    BYTES 010, 110, 101, 001
+    BYTES 010, 010, 110, 110
+    BYTES 110, 010, 101, 110
+    BYTES 101, 010, 101, 001
+    BYTES 000, 111, 111, 110
+
+global power_data_2:
+    BYTES 11, 01, 11, 10
+    BYTES 11, 10, 01, 10
+    BYTES 00, 01, 10, 11
+    BYTES 01, 11, 10, 01
+    BYTES 00, 00, 00, 01
+    BYTES 10, 01, 01, 10
+    BYTES 00, 01, 11, 00
+    BYTES 01, 00, 10, 11
+    BYTES 11, 00, 11, 10
+    BYTES 11, 00, 11, 01
+    BYTES 11, 11, 11, 01
+    BYTES 01, 00, 00, 11
+    BYTES 00, 11, 11, 01
+    BYTES 01, 10, 11, 10
+    BYTES 11, 10, 10, 00
+    BYTES 11, 10
+
+global power_data_0:
+    BYTES 0, 1, 1, 0
+    BYTES 0, 1, 1, 1
+    BYTES 1, 0, 0, 0
+    BYTES 1, 0, 0, 1
+    BYTES 1, 0, 1, 0
+    BYTES 1, 1, 1, 1
+    BYTES 0, 0, 1, 1
+    BYTES 1, 0, 1, 0
+    BYTES 1, 0, 0, 0
+    BYTES 0, 0, 1, 1
+    BYTES 0, 1, 0, 1
+    BYTES 0, 0, 1, 0
+    BYTES 0, 0, 1, 0
+    BYTES 1, 1, 1, 0
+    BYTES 1, 0, 1, 1
+    BYTES 0, 0, 1, 0
+    BYTES 0
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@ -1,16 +1,6 @@
-// #define N 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 // BN254 base field order
-
 // BN254 elliptic curve addition.
 // Uses the standard affine addition formula.
 global bn_add:
-    // Uncomment for test inputs.
-    // PUSH 0xdeadbeef
-    // PUSH 2
-    // PUSH 1
-    // PUSH 0x1bf9384aa3f0b3ad763aee81940cacdde1af71617c06f46e11510f14f3d5d121
-    // PUSH 0xe7313274bb29566ff0c8220eb9841de1d96c2923c6a4028f7dd3c6a14cee770
-    // stack: x0, y0, x1, y1, retdest
-
    // Check if points are valid BN254 points.
    DUP2
    // stack: y0, x0, y0, x1, y1, retdest
@ -46,7 +36,7 @@ global bn_add_valid_points:
    // stack: x0, y0, x0, y0, x1, y1, retdest
    %ec_isidentity
    // stack: (x0,y0)==(0,0), x0, y0, x1, y1, retdest
-    %jumpi(bn_add_first_zero)
+    %jumpi(bn_add_fst_zero)
    // stack: x0, y0, x1, y1, retdest

    // Check if the second point is the identity.
@ -75,21 +65,21 @@ global bn_add_valid_points:
    // stack: y1, x0, y0, x1, y1, retdest
    DUP3
    // stack: y0, y1, x0, y0, x1, y1, retdest
-    %submod
+    SUBFP254
    // stack: y0 - y1, x0, y0, x1, y1, retdest
    DUP4
    // stack: x1, y0 - y1, x0, y0, x1, y1, retdest
    DUP3
    // stack: x0, x1, y0 - y1, x0, y0, x1, y1, retdest
-    %submod
+    SUBFP254
    // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest
-    %moddiv
+    %divr_fp254
    // stack: lambda, x0, y0, x1, y1, retdest
    %jump(bn_add_valid_points_with_lambda)

 // BN254 elliptic curve addition.
 // Assumption: (x0,y0) == (0,0)
-bn_add_first_zero:
+bn_add_fst_zero:
    // stack: x0, y0, x1, y1, retdest
    // Just return (x1,y1)
    %stack (x0, y0, x1, y1, retdest) -> (retdest, x1, y1)
@ -114,37 +104,33 @@ bn_add_valid_points_with_lambda:
    // stack: x0, lambda, x0, y0, x1, y1, retdest
    DUP5
    // stack: x1, x0, lambda, x0, y0, x1, y1, retdest
-    %bn_base
-    // stack: N, x1, x0, lambda, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest
+    DUP3
+    // stack: lambda, x1, x0, lambda, x0, y0, x1, y1, retdest
    DUP1
-    // stack: lambda, lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest
-    MULMOD
+    // stack: lambda, lambda, x1, x0, lambda, x0, y0, x1, y1, retdest
+    MULFP254
    // stack: lambda^2, x1, x0, lambda, x0, y0, x1, y1, retdest
-    %submod
+    SUBFP254
    // stack: lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest
-    %submod
+    SUBFP254
    // stack: x2, lambda, x0, y0, x1, y1, retdest

    // Compute y2 = lambda*(x1 - x2) - y1
-    %bn_base
-    // stack: N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP2
-    // stack: x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP7
-    // stack: x1, x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    %submod
-    // stack: x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: lambda, x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    MULMOD
+    DUP1
+    // stack: x2, x2, lambda, x0, y0, x1, y1, retdest
+    DUP6
+    // stack: x1, x2, x2, lambda, x0, y0, x1, y1, retdest
+    SUBFP254
+    // stack: x1 - x2, x2, lambda, x0, y0, x1, y1, retdest
+    DUP3
+    // stack: lambda, x1 - x2, x2, lambda, x0, y0, x1, y1, retdest
+    MULFP254
    // stack: lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
    DUP7
    // stack: y1, lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
    SWAP1
    // stack: lambda * (x1 - x2), y1, x2, lambda, x0, y0, x1, y1, retdest
-    %submod
+    SUBFP254
    // stack: y2, x2, lambda, x0, y0, x1, y1, retdest

    // Return x2,y2
@ -185,23 +171,19 @@ bn_add_equal_points:
    // stack: x0, y0, x1, y1, retdest

    // Compute lambda = 3/2 * x0^2 / y0
-    %bn_base
-    // stack: N, x0, y0, x1, y1, retdest
-    %bn_base
-    // stack: N, N, x0, y0, x1, y1, retdest
-    DUP3
-    // stack: x0, N, N, x0, y0, x1, y1, retdest
    DUP1
-    // stack: x0, x0, N, N, x0, y0, x1, y1, retdest
-    MULMOD
-    // stack: x0^2, N, x0, y0, x1, y1, retdest with
+    // stack: x0, x0, y0, x1, y1, retdest
+    DUP1
+    // stack: x0, x0, x0, y0, x1, y1, retdest
+    MULFP254
+    // stack: x0^2, x0, y0, x1, y1, retdest with
    PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5 // 3/2 in the base field
-    // stack: 3/2, x0^2, N, x0, y0, x1, y1, retdest
-    MULMOD
+    // stack: 3/2, x0^2, x0, y0, x1, y1, retdest
+    MULFP254
    // stack: 3/2 * x0^2, x0, y0, x1, y1, retdest
    DUP3
    // stack: y0, 3/2 * x0^2, x0, y0, x1, y1, retdest
-    %moddiv
+    %divr_fp254
    // stack: lambda, x0, y0, x1, y1, retdest
    %jump(bn_add_valid_points_with_lambda)

@ -217,79 +199,59 @@ global bn_double:
    // stack: x, y, x, y, retdest
    %jump(bn_add_equal_points)

-// Push the order of the BN254 base field.
-%macro bn_base
-    PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47
-%endmacro
-
-// Assumption: x, y < N and 2N < 2^256.
-// Note: Doesn't hold for Secp256k1 base field.
-%macro submod
-    // stack: x, y
-    %bn_base
-    // stack: N, x, y
-    ADD
-    // stack: N + x, y // Doesn't overflow since 2N < 2^256
-    SUB
-    // stack: N + x - y // Doesn't underflow since y < N
-    %bn_base
-    // stack: N, N + x - y
-    SWAP1
-    // stack: N + x - y, N
-    MOD
-    // stack: (N + x - y) % N = (x-y) % N
-%endmacro
-
 // Check if (x,y) is a valid curve point.
-// Puts y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) on top of the stack.
+// Returns (range & curve) || is_identity
+// where
+//     range = (x < N) & (y < N) 
+//     curve = y^2 == (x^3 + 3) 
+//     ident = (x,y) == (0,0)
 %macro bn_check
-    // stack: x, y
-    %bn_base
-    // stack: N, x, y
-    DUP2
-    // stack: x, N, x, y
-    LT
-    // stack: x < N, x, y
-    %bn_base
-    // stack: N, x < N, x, y
-    DUP4
-    // stack: y, N, x < N, x, y
-    LT
-    // stack: y < N, x < N, x, y
-    AND
-    // stack: (y < N) & (x < N), x, y
-    %stack (b, x, y) -> (x, x, @BN_BASE, x, @BN_BASE, @BN_BASE, x, y, b)
-    // stack: x, x, N, x, N, N, x, y, b
-    MULMOD
-    // stack: x^2 % N, x, N, N, x, y, b
-    MULMOD
-    // stack: x^3 % N, N, x, y, b
-    PUSH 3
-    // stack: 3, x^3 % N, N, x, y, b
-    ADDMOD
-    // stack: (x^3 + 3) % N, x, y, b
-    DUP3
-    // stack: y, (x^3 + 3) % N, x, y, b
-    %bn_base
-    // stack: N, y, (x^3 + 3) % N, x, y, b
-    SWAP1
-    // stack: y, N, (x^3 + 3) % N, x, y, b
+    // stack:                       x, y
    DUP1
-    // stack: y, y, N, (x^3 + 3) % N, x, y, b
-    MULMOD
-    // stack: y^2 % N, (x^3 + 3) % N, x, y, b
-    EQ
-    // stack: y^2 % N == (x^3 + 3) % N, x, y, b
+    // stack:                    x, x, y
+    PUSH @BN_BASE
+    // stack:                N , x, x, y
+    DUP1
+    // stack:             N, N , x, x, y
+    DUP5
+    // stack:         y , N, N , x, x, y
+    LT  
+    // stack:         y < N, N , x, x, y
    SWAP2
-    // stack: y, x, y^2 % N == (x^3 + 3) % N, b
-    %ec_isidentity
-    // stack: (x,y)==(0,0), y^2 % N == (x^3 + 3) % N, b
-    SWAP2
-    // stack: b, y^2 % N == (x^3 + 3) % N, (x,y)==(0,0)
+    // stack:         x , N, y < N, x, y
+    LT
+    // stack:         x < N, y < N, x, y
    AND
-    // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N), (x,y)==(0,0)
+    // stack:                range, x, y
+    SWAP2
+    // stack:                y, x, range
+    DUP2 
+    // stack:           x  , y, x, range
+    DUP1 
+    DUP1
+    MULFP254
+    MULFP254
+    // stack:           x^3, y, x, range
+    PUSH 3
+    ADDFP254
+    // stack:       3 + x^3, y, x, range
+    DUP2
+    // stack:  y  , 3 + x^3, y, x, range
+    DUP1
+    MULFP254
+    // stack:  y^2, 3 + x^3, y, x, range
+    EQ
+    // stack:         curve, y, x, range
+    SWAP2
+    // stack:         x, y, curve, range
+    %ec_isidentity
+    // stack:       ident , curve, range
+    SWAP2
+    // stack:       range , curve, ident
+    AND
+    // stack:       range & curve, ident
    OR
-    // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0)
+    // stack:                   is_valid
 %endmacro

 // Return (u256::MAX, u256::MAX) which is used to indicate the input was invalid.
@ -297,9 +259,9 @@ global bn_double:
    // stack: retdest
    PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
    // stack: u256::MAX, retdest
-    PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+    DUP1
    // stack: u256::MAX, u256::MAX, retdest
    SWAP2
    // stack: retdest, u256::MAX, u256::MAX
    JUMP
-%endmacro
+%endmacro
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
@ -0,0 +1,319 @@
+/// To make the Tate pairing an invariant, the final step is to exponentiate by
+///     (p^12 - 1)/N = (p^6 - 1) * (p^2 + 1) * (p^4 - p^2 + 1)/N
+/// and thus we can exponentiate by each factor sequentially.
+///
+/// def bn254_invariant_exponent(y: Fp12):
+///     y = first_exp(y)
+///     y = second_exp(y)
+///     return final_exp(y)
+
+global bn254_invariant_exponent:
+
+/// first, exponentiate by (p^6 - 1) via
+///     def first_exp(y):
+///         return y.frob(6) / y
+    // stack:                    out, retdest  {out: y}
+    %stack (out) -> (out, 0, first_exp, out)         
+    // stack: out, 0, first_exp, out, retdest  {out: y}
+    %jump(inv_fp254_12)
+first_exp:
+    // stack:                           out, retdest  {out: y  , 0: y^-1}
+    %frob_fp254_12_6
+    // stack:                           out, retdest  {out: y_6, 0: y^-1}
+    %stack (out) -> (out, 0, out, second_exp, out)
+    // stack:  out, 0, out, second_exp, out, retdest  {out: y_6, 0: y^-1}
+    %jump(mul_fp254_12)
+
+/// second, exponentiate by (p^2 + 1) via 
+///     def second_exp(y):
+///         return y.frob(2) * y
+second_exp:
+    // stack:                              out, retdest  {out: y}
+    %stack (out) -> (out, 0, out, out, final_exp, out)
+    // stack: out, 0, out, out, final_exp, out, retdest  {out: y}
+    %frob_fp254_12_2_
+    // stack:      0, out, out, final_exp, out, retdest  {out: y, 0: y_2}
+    %jump(mul_fp254_12)
+
+/// Finally, we must exponentiate by (p^4 - p^2 + 1)/N
+/// To do so efficiently, we can express this power as
+///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
+/// and simultaneously compute y^a4, y^a2, y^a0 where
+///     a1 = a4 + 2a2 - a0
+/// We first initialize these powers as 1 and then use 
+/// binary algorithms for exponentiation.
+///
+/// def final_exp(y):
+///     y4, y2, y0 = 1, 1, 1
+///     power_loop_4()
+///     power_loop_2()
+///     power_loop_0()
+///     custom_powers()
+///     final_power()
+
+final_exp:
+    // stack:                 val, retdest
+    %stack (val) -> (val, 12, val)
+    // stack:        val, 12, val, retdest
+    %move_fp254_12
+    // stack:             12, val, retdest  {12: sqr}
+    %stack () -> (1, 1, 1)
+    // stack:    1, 1, 1, 12, val, retdest
+    %mstore_kernel_bn254_pairing(24)  
+    %mstore_kernel_bn254_pairing(36)  
+    %mstore_kernel_bn254_pairing(48)
+    // stack:             12, val, retdest  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (64, 62, 65)
+    // stack: 64, 62, 65, 12, val, retdest  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(power_loop_4)
+
+/// After computing the powers 
+///     y^a4, y^a2, y^a0
+/// we would like to transform them to
+///     y^a2, y^-a1, y^-a0
+///
+/// def custom_powers()
+///     y0 = y0^{-1}
+///     y1 = y4 * y2^2 * y0
+///     return y2, y1, y0
+///
+/// And finally, upon doing so, compute the final power
+///     y^(p^3) * (y^a2)^(p^2) * (y^-a1)^p * (y^-a0)
+///
+/// def final_power()
+///     y  = y.frob(3)
+///     y2 = y2.frob(2)
+///     y1 = y1.frob(1)
+///     return y * y2 * y1 * y0
+
+custom_powers:
+    // stack:                           val, retdest  {24: y0, 36: y2, 48: y4}
+    %stack () -> (24, 60, make_term_1)
+    // stack:      24, 60, make_term_1, val, retdest  {24: y0, 36: y2, 48: y4}
+    %jump(inv_fp254_12)
+make_term_1:
+    // stack:                           val, retdest  {36: y2, 48: y4, 60: y0^-1}
+    %stack () -> (36, 48, 48, make_term_2)
+    // stack:  36, 48, 48, make_term_2, val, retdest  {36: y2, 48: y4, 60: y0^-1}
+    %jump(mul_fp254_12)
+make_term_2:
+    // stack:                           val, retdest  {36: y2, 48: y4 * y2, 60: y0^-1}
+    %stack () -> (36, 48, 48, make_term_3)
+    // stack:  36, 48, 48, make_term_3, val, retdest  {36: y2, 48: y4 * y2, 60: y0^-1}
+    %jump(mul_fp254_12)
+make_term_3:
+    // stack:                           val, retdest  {36: y2, 48: y4 * y2^2, 60: y0^-1}
+    %stack () -> (60, 48, 48, final_power)
+    // stack:  60, 48, 48, final_power, val, retdest  {36: y2, 48: y4 * y2^2, 60: y0^-1}
+    %jump(mul_fp254_12)
+final_power:
+    // stack:                           val, retdest  {val: y  , 36:  y^a2   , 48:  y^a1   , 60: y^a0}
+    %frob_fp254_12_3
+    // stack:                           val, retdest  {val: y_3, 36:  y^a2   , 48:  y^a1   , 60: y^a0}
+    %stack () -> (36, 36)
+    %frob_fp254_12_2_
+    POP
+    // stack:                           val, retdest  {val: y_3, 36: (y^a2)_2, 48:  y^a1   , 60: y^a0}
+    PUSH 48
+    %frob_fp254_12_1
+    POP
+    // stack:                           val, retdest  {val: y_3, 36: (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
+    %stack (val) -> (36, val, val, penult_mul, val)
+    // stack: 36, val, val, penult_mul, val, retdest  {val: y_3, 36: (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
+    %jump(mul_fp254_12)
+penult_mul:
+    // stack:                           val, retdest  {val: y_3 * (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
+    %stack (val) -> (48, val, val, final_mul, val)
+    // stack:  48, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
+    %jump(mul_fp254_12)
+final_mul: 
+    // stack:                           val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 60: y^a0}
+    %stack (val) -> (60, val, val)
+    // stack:                  60, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 60: y^a0}
+    %jump(mul_fp254_12)
+
+
+/// def power_loop_4():
+///     for i in range(64):
+///         abc = load(i, power_data_4)
+///         if a:
+///             y4 *= acc
+///         if b:
+///             y2 *= acc
+///         if c:
+///             y0 *= acc
+///         acc = square_fp254_12(acc)
+///     y4 *= acc
+///
+/// def power_loop_2():
+///     for i in range(62):
+///        ab = load(i, power_data_2)
+///        if a:
+///            y2 *= acc
+///        if b:
+///            y0 *= acc
+///        acc = square_fp254_12(acc)
+///     y2 *= acc
+///
+/// def power_loop_0():
+///     for i in range(65):
+///         a = load(i, power_data_0)
+///         if a:
+///             y0 *= acc
+///         acc = square_fp254_12(acc)
+///     y0 *= acc
+
+power_loop_4:
+    // stack:                                   i  , j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP1  
+    ISZERO
+    // stack:                           break?, i  , j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jumpi(power_loop_4_end)
+    // stack:                                   i  , j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %sub_const(1)
+    // stack:                                   i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP1  
+    %mload_kernel_code(power_data_4)
+    // stack:                              abc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP1  
+    %lt_const(100)
+    // stack:                       skip?, abc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jumpi(power_loop_4_b)
+    // stack:                              abc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %sub_const(100)
+    // stack:                               bc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (48, 48, power_loop_4_b)
+    // stack:      48, 48, power_loop_4_b,  bc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP8
+    // stack: sqr, 48, 48, power_loop_4_b,  bc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(mul_fp254_12)
+power_loop_4_b:
+    // stack:                             bc, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP1  
+    %lt_const(10)
+    // stack:                      skip?, bc, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jumpi(power_loop_4_c)
+    // stack:                             bc, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %sub_const(10)
+    // stack:                              c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (36, 36, power_loop_4_c)
+    // stack:      36, 36, power_loop_4_c, c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP8
+    // stack: sqr, 36, 36, power_loop_4_c, c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(mul_fp254_12)
+power_loop_4_c:
+    // stack:                            c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    ISZERO
+    // stack:                        skip?, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jumpi(power_loop_4_sq)
+    // stack:                               i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (24, 24, power_loop_4_sq)
+    // stack:      24, 24, power_loop_4_sq, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP7
+    // stack: sqr, 24, 24, power_loop_4_sq, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(mul_fp254_12)
+power_loop_4_sq:
+    // stack:                         i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    PUSH power_loop_4  
+    // stack:           power_loop_4, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP5  
+    DUP1
+    // stack: sqr, sqr, power_loop_4, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(square_fp254_12)
+power_loop_4_end:
+    // stack:                         0, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    POP  
+    // stack:                            j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (48, 48, power_loop_2) 
+    // stack:      48, 48, power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP6
+    // stack: sqr, 48, 48, power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(mul_fp254_12)
+
+power_loop_2:
+    // stack:                                   j  , k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP1  
+    ISZERO
+    // stack:                         break?, j  , k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jumpi(power_loop_2_end)
+    // stack:                                 j  , k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %sub_const(1)
+    // stack:                                 j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP1  
+    %mload_kernel_code(power_data_2)
+    // stack:                             ab, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP1  
+    %lt_const(10)
+    // stack:                      skip?, ab, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jumpi(power_loop_2_b)
+    // stack:                             ab, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %sub_const(10)
+    // stack:                              b, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (36, 36, power_loop_2_b) 
+    // stack:      36, 36, power_loop_2_b, b, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP7
+    // stack: sqr, 36, 36, power_loop_2_b, b, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(mul_fp254_12)
+power_loop_2_b:
+    // stack:                            b, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    ISZERO
+    // stack:                        skip?, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jumpi(power_loop_2_sq)
+    // stack:                               j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (24, 24, power_loop_2_sq) 
+    // stack:      24, 24, power_loop_2_sq, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP6
+    // stack: sqr, 24, 24, power_loop_2_sq, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(mul_fp254_12)
+power_loop_2_sq:
+    // stack:                         j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    PUSH power_loop_2  
+    // stack:           power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP4  
+    DUP1
+    // stack: sqr, sqr, power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(square_fp254_12)
+power_loop_2_end:
+    // stack:                         0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    POP  
+    // stack:                            k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (36, 36, power_loop_0)
+    // stack:      36, 36, power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP5
+    // stack: sqr, 36, 36, power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(mul_fp254_12)
+
+power_loop_0:
+    // stack:                               k  , sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP1  
+    ISZERO
+    // stack:                       break?, k  , sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jumpi(power_loop_0_end)
+    // stack:                               k  , sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %sub_const(1)
+    // stack:                               k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP1  
+    %mload_kernel_code(power_data_0)
+    // stack:                            a, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    ISZERO
+    // stack:                        skip?, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jumpi(power_loop_0_sq)
+    // stack:                               k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (24, 24, power_loop_0_sq)  
+    // stack:      24, 24, power_loop_0_sq, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP5
+    // stack: sqr, 24, 24, power_loop_0_sq, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(mul_fp254_12)
+power_loop_0_sq:
+    // stack:                         k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    PUSH power_loop_0  
+    // stack:           power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    DUP3  
+    DUP1
+    // stack: sqr, sqr, power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(square_fp254_12)
+power_loop_0_end:
+    // stack:                       0, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack (i, sqr) -> (24, sqr, 24, custom_powers)
+    // stack:   24, sqr, 24, custom_powers  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %jump(mul_fp254_12)    
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/msm.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/msm.asm
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/precomputation.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/precomputation.asm
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@ -0,0 +1,292 @@
+/// def tate(P: Curve, Q: TwistedCurve) -> Fp12:
+///     out = miller_loop(P, Q)
+///     return bn254_invariant_exponent(P, Q)
+global bn254_tate:
+    // stack:                                inp, out, retdest
+    %stack (inp, out) -> (inp, out, bn254_invariant_exponent, out)
+    // stack: inp, out, bn254_invariant_exponent, out, retdest
+    %jump(bn254_miller)
+
+/// def miller(P, Q):
+///     miller_init()
+///     miller_loop()
+///
+/// def miller_init():
+///     out = 1
+///     O = P
+///     times = 61
+///
+/// def miller_loop():
+///     while times:
+///         0xnm = load(miller_data)
+///         while 0xnm > 0x20:
+///             miller_one()
+///         while 0xnm:
+///             miller_zero()
+///         times -= 1
+///
+/// def miller_one():
+///     0xnm -= 0x20
+///     mul_tangent()
+///     mul_cord()
+///
+/// def miller_zero():
+///     0xnm -= 1
+///     mul_tangent()
+
+global bn254_miller:
+    // stack:         ptr, out, retdest
+    %stack (ptr, out) -> (out, 1, ptr, out)
+    // stack: out, 1, ptr, out, retdest
+    %mstore_kernel_bn254_pairing
+    // stack:         ptr, out, retdest
+    %load_fp254_6
+    // stack:        P, Q, out, retdest
+    %stack (P: 2) -> (0, 53, P, P)
+    // stack: 0, 53, O, P, Q, out, retdest
+    // the head 0 lets miller_loop start with POP
+miller_loop:
+    POP
+    // stack:          times  , O, P, Q, out, retdest
+    DUP1  
+    ISZERO
+    // stack:  break?, times  , O, P, Q, out, retdest
+    %jumpi(miller_return)
+    // stack:          times  , O, P, Q, out, retdest
+    %sub_const(1)
+    // stack:          times-1, O, P, Q, out, retdest
+    DUP1
+    // stack: times-1, times-1, O, P, Q, out, retdest
+    %mload_kernel_code(miller_data)
+    // stack:    0xnm, times-1, O, P, Q, out, retdest
+    %jump(miller_one)
+miller_return:
+    // stack: times, O, P, Q, out, retdest
+    %stack (times, O: 2, P: 2, Q: 4, out, retdest) -> (retdest)
+    // stack:                      retdest
+    JUMP 
+
+miller_one:
+    // stack:               0xnm, times, O, P, Q, out, retdest
+    DUP1  
+    %lt_const(0x20) 
+    // stack:        skip?, 0xnm, times, O, P, Q, out, retdest
+    %jumpi(miller_zero)
+    // stack:               0xnm, times, O, P, Q, out, retdest
+    %sub_const(0x20)
+    // stack:           0x{n-1}m, times, O, P, Q, out, retdest
+    PUSH mul_cord
+    // stack: mul_cord, 0x{n-1}m, times, O, P, Q, out, retdest
+    %jump(mul_tangent)
+
+miller_zero:
+    // stack:              m  , times, O, P, Q, out, retdest
+    DUP1  
+    ISZERO
+    // stack:       skip?, m  , times, O, P, Q, out, retdest
+    %jumpi(miller_loop)
+    // stack:              m  , times, O, P, Q, out, retdest
+    %sub_const(1)
+    // stack:              m-1, times, O, P, Q, out, retdest
+    PUSH miller_zero
+    // stack: miller_zero, m-1, times, O, P, Q, out, retdest
+    %jump(mul_tangent)
+
+
+/// def mul_tangent()
+///     out = square_fp254_12(out)
+///     line = tangent(O, Q)
+///     out = mul_fp254_12_sparse(out, line)
+///     O += O
+
+mul_tangent:
+    // stack:                                              retdest, 0xnm, times, O, P, Q, out
+    PUSH mul_tangent_2  
+    DUP13  
+    PUSH mul_tangent_1
+    // stack:           mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    %stack (mul_tangent_1, out) -> (out, out, mul_tangent_1, out)
+    // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    %jump(square_fp254_12)
+mul_tangent_1:
+    // stack:         out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    DUP13
+    DUP13
+    DUP13
+    DUP13
+    // stack:      Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    DUP11  
+    DUP11
+    // stack:   O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    %tangent
+    // stack:         out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {0: line}
+    %stack (out) -> (out, 0, out)
+    // stack: out, 0, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {0: line}
+    %jump(mul_fp254_12_sparse)
+mul_tangent_2:
+    // stack:                  retdest, 0xnm, times,   O, P, Q, out  {0: line}
+    PUSH after_double
+    // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {0: line}
+    DUP6  
+    DUP6
+    // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {0: line}
+    %jump(bn_double)
+after_double:
+    // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {0: line}
+    SWAP5
+    POP
+    SWAP5
+    POP
+    // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {0: line}
+    JUMP
+
+/// def mul_cord()
+///     line = cord(P, O, Q)
+///     out = mul_fp254_12_sparse(out, line)
+///     O += P
+
+mul_cord:
+    // stack:                          0xnm, times, O, P, Q, out
+    PUSH mul_cord_1
+    // stack:              mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP11  
+    DUP11  
+    DUP11  
+    DUP11
+    // stack:           Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP9  
+    DUP9
+    // stack:        O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP13  
+    DUP13
+    // stack:     P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    %cord 
+    // stack:              mul_cord_1, 0xnm, times, O, P, Q, out  {0: line}
+    DUP12
+    // stack:         out, mul_cord_1, 0xnm, times, O, P, Q, out  {0: line}
+    %stack (out) -> (out, 0, out)
+    // stack: out, 0, out, mul_cord_1, 0xnm, times, O, P, Q, out  {0: line}
+    %jump(mul_fp254_12_sparse)
+mul_cord_1:
+    // stack:                   0xnm, times, O  , P, Q, out
+    PUSH after_add
+    // stack:        after_add, 0xnm, times, O  , P, Q, out
+    DUP7  
+    DUP7  
+    DUP7  
+    DUP7
+    // stack: O , P, after_add, 0xnm, times, O  , P, Q, out
+    %jump(bn_add_valid_points)
+after_add:
+    // stack:            O + P, 0xnm, times, O  , P, Q, out
+    SWAP4
+    POP
+    SWAP4
+    POP
+    // stack:                   0xnm, times, O+P, P, Q, out
+    %jump(miller_one)
+
+
+/// def tangent(px, py, qx, qy):
+///     return sparse_store(
+///         py**2 - 9, 
+///         (-3px**2) * qx, 
+///         (2py)     * qy,
+///     )
+
+%macro tangent
+    // stack:                px, py, qx, qx_,  qy, qy_
+    %stack (px, py) -> (py, py , 9, px, py)
+    // stack:    py, py , 9, px, py, qx, qx_,  qy, qy_
+    MULFP254
+    // stack:      py^2 , 9, px, py, qx, qx_,  qy, qy_
+    SUBFP254
+    // stack:      py^2 - 9, px, py, qx, qx_,  qy, qy_
+    %mstore_kernel_bn254_pairing(0)
+    // stack:                px, py, qx, qx_,  qy, qy_
+    DUP1  
+    MULFP254
+    // stack:              px^2, py, qx, qx_,  qy, qy_
+    PUSH 3  
+    MULFP254
+    // stack:            3*px^2, py, qx, qx_,  qy, qy_
+    PUSH 0  
+    SUBFP254
+    // stack:           -3*px^2, py, qx, qx_,  qy, qy_
+    SWAP2
+    // stack:            qx, py, -3px^2, qx_,  qy, qy_
+    DUP3  
+    MULFP254
+    // stack:   (-3*px^2)qx, py, -3px^2, qx_,  qy, qy_ 
+    %mstore_kernel_bn254_pairing(2)
+    // stack:                py, -3px^2, qx_,  qy, qy_ 
+    PUSH 2  
+    MULFP254
+    // stack:               2py, -3px^2, qx_,  qy, qy_ 
+    SWAP3 
+    // stack:                qy, -3px^2, qx_, 2py, qy_ 
+    DUP4  
+    MULFP254
+    // stack:           (2py)qy, -3px^2, qx_, 2py, qy_ 
+    %mstore_kernel_bn254_pairing(8)
+    // stack:                    -3px^2, qx_, 2py, qy_ 
+    MULFP254
+    // stack:                   (-3px^2)*qx_, 2py, qy_ 
+    %mstore_kernel_bn254_pairing(3)
+    // stack:                                 2py, qy_ 
+    MULFP254
+    // stack:                                (2py)*qy_ 
+    %mstore_kernel_bn254_pairing(9)
+%endmacro
+
+/// def cord(p1x, p1y, p2x, p2y, qx, qy):
+///     return sparse_store(
+///         p1y*p2x - p2y*p1x, 
+///         (p2y - p1y) * qx, 
+///         (p1x - p2x) * qy,
+///     )
+
+%macro cord
+    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    DUP1  
+    DUP5  
+    MULFP254
+    // stack:           p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    DUP3  
+    DUP5  
+    MULFP254
+    // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    SUBFP254
+    // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    %mstore_kernel_bn254_pairing(0)
+    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    SWAP3
+    // stack:                    p2y , p1y, p2x , p1x, qx, qx_, qy, qy_
+    SUBFP254
+    // stack:                    p2y - p1y, p2x , p1x, qx, qx_, qy, qy_
+    SWAP2
+    // stack:                    p1x , p2x, p2y - p1y, qx, qx_, qy, qy_
+    SUBFP254
+    // stack:                    p1x - p2x, p2y - p1y, qx, qx_, qy, qy_
+    SWAP4
+    // stack:                    qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
+    DUP5
+    MULFP254
+    // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
+    %mstore_kernel_bn254_pairing(8)
+    // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
+    SWAP1
+    // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
+    DUP2
+    MULFP254
+    // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
+    %mstore_kernel_bn254_pairing(2)
+    // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
+    MULFP254
+    // stack:                            (p2y - p1y)qx_, p1x - p2x, qy_
+    %mstore_kernel_bn254_pairing(3)
+    // stack:                                            p1x - p2x, qy_
+    MULFP254
+    // stack:                                           (p1x - p2x)*qy_
+    %mstore_kernel_bn254_pairing(9)
+%endmacro
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@ -0,0 +1,378 @@
+///////////////////////////////////////
+///// GENERAL FP12 MULTIPLICATION /////
+///////////////////////////////////////
+
+/// cost: 1063
+
+/// fp254_6 functions:
+///  fn    | num | ops | cost
+///  -------------------------
+///  load  |   8 |  40 |  320
+///  store |   5 |  40 |  200
+///  dup   |   5 |   6 |   30
+///  swap  |   4 |  16 |   64
+///  add   |   4 |  16 |   64
+///  subr  |   1 |  17 |   17
+///  mul   |   3 | 157 |  471
+///  i9    |   1 |   9 |    9
+///
+/// lone stack operations:
+///  op    | num 
+///  ------------
+///  ADD   |   3
+///  SWAP  |   2
+///  DUP   |   6
+///  PUSH  |   6
+///  POP   |   2
+///  JUMP  |   6
+///
+/// TOTAL: 1201
+
+/// inputs:
+///     F = f + f'z
+///     G = g + g'z
+///
+/// output:
+///     H = h + h'z = FG
+///
+///     h  = fg + sh(f'g')
+///     h' = (f+f')(g+g') - fg - f'g'
+///
+/// memory pointers [ind' = ind+6]
+///     {inA: f, inA: f', inB: g, inB':g', out: h, out': h'}
+///
+/// f, f', g, g' consist of six elements on the stack
+
+global mul_fp254_12:
+    // stack:                                   inA, inB, out 
+    DUP1  
+    %add_const(6) 
+    // stack:                             inA', inA, inB, out 
+    %load_fp254_6
+    // stack:                               f', inA, inB, out 
+    DUP8  
+    %add_const(6)
+    // stack:                         inB', f', inA, inB, out 
+    %load_fp254_6
+    // stack:                           g', f', inA, inB, out 
+    PUSH mul_fp254_12_1
+    // stack:           mul_fp254_12_1, g', f', inA, inB, out 
+    %dup_fp254_6_7
+    // stack:       f', mul_fp254_12_1, g', f', inA, inB, out 
+    %dup_fp254_6_7
+    // stack:   g', f', mul_fp254_12_1, g', f', inA, inB, out 
+    %jump(mul_fp254_6)
+mul_fp254_12_1:
+    // stack:                   f'g', g'  , f', inA, inB, out 
+    %dup_fp254_6_0
+    // stack:             f'g', f'g', g'  , f', inA, inB, out 
+    %store_fp254_6_sh(84)                                    
+    // stack:                   f'g', g'  , f', inA, inB, out  {84: sh(f'g')}
+    %store_fp254_6(90)
+    // stack:                         g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    DUP13
+    // stack:                    inA, g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    DUP15  
+    // stack:               inB, inA, g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    %load_fp254_6
+    // stack:                g , inA, g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    %stack (f: 6, x, g: 6) -> (g, x, f)
+    // stack:                g', inA, g   , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    %dup_fp254_6_7
+    // stack:              g,g', inA, g   , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    %add_fp254_6
+    // stack:              g+g', inA, g   , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    %stack (f: 6, x, g: 6) -> (g, x, f)
+    // stack:                 g, inA, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    PUSH mul_fp254_12_2
+    // stack: mul_fp254_12_2, g, inA, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    SWAP7
+    // stack: inA, g, mul_fp254_12_2, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    %load_fp254_6
+    // stack:   f, g, mul_fp254_12_2, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    %jump(mul_fp254_6)
+mul_fp254_12_2:    
+    // stack:                     fg, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    %store_fp254_6(96)
+    // stack:                         g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %stack (x: 6, y: 6) -> (y, x)
+    // stack:                         f', g+g', inA, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    PUSH mul_fp254_12_3
+    // stack:         mul_fp254_12_3, f', g+g', inA, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    SWAP13
+    // stack:         inA, f', g+g', mul_fp254_12_3, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %load_fp254_6
+    // stack:            f,f', g+g', mul_fp254_12_3, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %add_fp254_6
+    // stack:            f+f', g+g', mul_fp254_12_3, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %jump(mul_fp254_6)
+mul_fp254_12_3:
+    // stack:                          (f+f')(g+g'), inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %load_fp254_6(96)
+    // stack:                      fg, (f+f')(g+g'), inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %stack (x: 6, y: 6) -> (y, x)
+    // stack:                      (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %dup_fp254_6_6
+    // stack:                  fg, (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %load_fp254_6(90)
+    // stack:             f'g',fg, (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %add_fp254_6
+    // stack:             f'g'+fg, (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %subr_fp254_6
+    // stack:          (f+f')(g+g') - (f'g'+fg), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}   
+    DUP14  
+    %add_const(6) 
+    // stack:    out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}   
+    %store_fp254_6
+    // stack:                                    fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %load_fp254_6(84)
+    // stack:                         sh(f'g') , fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %add_fp254_6
+    // stack:                         sh(f'g') + fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    DUP8
+    // stack:                    out, sh(f'g') + fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %store_fp254_6
+    // stack:                                        inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %pop2  
+    JUMP
+
+
+//////////////////////////////////////
+///// SPARSE FP12 MULTIPLICATION /////
+//////////////////////////////////////
+
+/// cost: 645
+
+/// fp254_6 functions:
+///  fn      | num | ops | cost
+///  ---------------------------
+///  load    |   2 |  40 |   80
+///  store   |   2 |  40 |   80
+///  dup     |   4 |   6 |   24
+///  swap    |   4 |  16 |   64
+///  add     |   4 |  16 |   64
+///  mul_fp254_  |   2 |  21 |   42
+///  mul_fp254_2 |   4 |  59 |  236
+///
+/// lone stack operations:
+///  op    | num 
+///  ------------
+///  ADD   |   6
+///  DUP   |   9
+///  PUSH  |   6
+///  POP   |   5
+///
+/// TOTAL: 618
+
+/// input:
+///     F = f + f'z
+///     G = g0 + (G1)t + (G2)tz
+///
+/// output:
+///     H = h + h'z = FG
+///       = g0 * [f + f'z] + G1 * [sh(f) + sh(f')z] + G2 * [sh2(f') + sh(f)z]
+///     
+///     h  = g0 * f  + G1 * sh(f ) + G2 * sh2(f') 
+///     h' = g0 * f' + G1 * sh(f') + G2 * sh (f )
+///
+/// memory pointers [ind' = ind+6, inB2 = inB1 + 2 = inB + 3]
+///     { inA: f, inA': f', inB: g0, inB1: G1, inB2: G2, out: h, out': h'}
+///
+/// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element 
+
+global mul_fp254_12_sparse:
+    // stack:                                                                    inA, inB, out
+    DUP1  
+    %add_const(6)
+    // stack:                                                              inA', inA, inB, out
+    %load_fp254_6
+    // stack:                                                                f', inA, inB, out
+    DUP8 
+    // stack:                                                           inB, f', inA, inB, out
+    DUP8
+    // stack:                                                      inA, inB, f', inA, inB, out
+    %load_fp254_6
+    // stack:                                                        f, inB, f', inA, inB, out
+    DUP16
+    // stack:                                                   out, f, inB, f', inA, inB, out
+    %dup_fp254_6_8 
+    // stack:                                               f', out, f, inB, f', inA, inB, out
+    DUP14
+    // stack:                                          inB, f', out, f, inB, f', inA, inB, out
+    %dup_fp254_6_8
+    // stack:                                       f, inB, f', out, f, inB, f', inA, inB, out
+    DUP7
+    // stack:                                  inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %dup_fp254_6_8
+    // stack:                              f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %dup_fp254_6_7
+    // stack:                           f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    DUP13 
+    // stack:                      inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %mload_kernel_bn254_pairing
+    // stack:                      g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %scale_re_fp254_6
+    // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %stack (x: 6, y: 6) -> (y, x)
+    // stack:                    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    DUP13
+    %add_const(8)
+    // stack:           inB2,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %load_fp254_2
+    // stack:           G2  ,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %scale_fp254_6_sh2
+    // stack:           G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %add_fp254_6
+    // stack:           G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %stack (f: 6, x, g: 6) -> (g, x, f)
+    // stack:          f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    DUP7  %add_const(2)
+    // stack: inB1,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    %load_fp254_2
+    // stack:  G1 ,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    %scale_fp254_6_sh
+    // stack:  G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    %add_fp254_6_hole
+    // stack:      G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    DUP14
+    // stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    %store_fp254_6
+    // stack:                                          inB, f', out, f, inB, f', inA, inB, out
+    %mload_kernel_bn254_pairing
+    // stack:                                          g0 , f', out, f, inB, f', inA, inB, out
+    %scale_re_fp254_6
+    // stack:                                          g0 * f', out, f, inB, f', inA, inB, out
+    %stack (f: 6, x, g: 6) -> (g, x, f)
+    // stack:                                        f  , out, g0 * f', inB, f', inA, inB, out
+    DUP14
+    %add_const(8)
+    // stack:                               inB2,    f  , out, g0 * f', inB, f', inA, inB, out
+    %load_fp254_2
+    // stack:                                G2 ,    f  , out, g0 * f', inB, f', inA, inB, out
+    %scale_fp254_6_sh
+    // stack:                                G2 * sh(f) , out, g0 * f', inB, f', inA, inB, out
+    %add_fp254_6_hole
+    // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
+    %stack (f: 6, x, g: 6) -> (g, x, f)
+    // stack:                                    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
+    DUP7
+    %add_const(2)
+    // stack:                           inB1,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
+    %load_fp254_2
+    // stack:                            G1 ,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
+    %scale_fp254_6_sh
+    // stack:                            G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
+    %add_fp254_6_hole
+    // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
+    DUP9
+    %add_const(6)
+    // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
+    %store_fp254_6
+    // stack:                                                                    inA, inB, out
+    %pop3
+    JUMP
+
+
+/////////////////////////
+///// FP12 SQUARING /////
+/////////////////////////
+
+/// cost: 646
+
+/// fp254_6 functions:
+///  fn    | num | ops | cost
+///  -------------------------
+///  load  |   2 |  40 |   80
+///  store |   2 |  40 |   80
+///  dup   |   2 |   6 |   12
+///  swap  |   2 |  16 |   32
+///  add   |   1 |  16 |   16
+///  mul   |   1 | 157 |  157
+///  sq    |   2 | 101 |  202
+///  dbl   |   1 |  13 |   13
+///
+/// lone stack operations:
+///  op    | num 
+///  ------------
+///  ADD   |   3
+///  SWAP  |   4
+///  DUP   |   5
+///  PUSH  |   6
+///  POP   |   3
+///  JUMP  |   4
+///
+/// TOTAL: 
+
+/// input:
+///     F = f + f'z
+///
+/// output:
+///     H = h + h'z = FF
+///
+///     h  = ff + sh(f'f')
+///     h' = 2ff'
+///
+/// memory pointers [ind' = ind+6]
+///     {inp: f, inp: f', out: h, out': h'}
+///
+/// f, f' consist of six elements on the stack
+
+global square_fp254_12:
+    // stack:                                                                               inp, out
+    DUP1
+    // stack:                                                                          inp, inp, out
+    %load_fp254_6 
+    // stack:                                                                            f, inp, out
+    PUSH square_fp254_12_3
+    // stack:                                                         square_fp254_12_3, f, inp, out
+    SWAP7
+    // stack:                                                         inp, f, square_fp254_12_3, out
+    PUSH square_fp254_12_2
+    // stack:                                      square_fp254_12_2, inp, f, square_fp254_12_3, out 
+    %dup_fp254_6_2
+    // stack:                                  f , square_fp254_12_2, inp, f, square_fp254_12_3, out
+    DUP16
+    %add_const(6)
+    // stack:                            out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
+    PUSH square_fp254_12_1
+    // stack:         square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
+    DUP10
+    %add_const(6)
+    // stack:   inp', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
+    %load_fp254_6
+    // stack:     f', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
+    %stack (f: 6, x: 2, g: 6) -> (g, x, f)
+    // stack:     f , square_fp254_12_1, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    %dup_fp254_6_8
+    // stack: f', f , square_fp254_12_1, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    %jump(mul_fp254_6)
+square_fp254_12_1:
+    // stack:                       f'f, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    DUP7
+    // stack:                 out', f'f, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    %store_fp254_6_double
+    // stack:                            out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    POP
+    // stack:                                  f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    %jump(square_fp254_6)
+square_fp254_12_2:
+    // stack:                                                   f'f', inp, f, square_fp254_12_3, out
+    %sh_fp254_6
+    // stack:                                               sh(f'f'), inp, f, square_fp254_12_3, out
+    %stack (f: 6, x, g: 6) -> (g, x, f)
+    // stack:                                               f, inp, sh(f'f'), square_fp254_12_3, out
+    SWAP6
+    SWAP13
+    SWAP6
+    // stack:                                               f, square_fp254_12_3, sh(f'f'), inp, out
+    %jump(square_fp254_6)
+square_fp254_12_3:
+    // stack:                                                                ff , sh(f'f'), inp, out
+    %add_fp254_6
+    // stack:                                                                ff + sh(f'f'), inp, out
+    DUP8
+    // stack:                                                           out, ff + sh(f'f'), inp, out
+    %store_fp254_6
+    // stack:                                                                               inp, out
+    %pop2
+    JUMP
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_6_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_6_mul.asm
@ -1,3 +1,7 @@
+//////////////////////////////////////
+///// GENERAL FP6 MULTIPLICATION /////
+//////////////////////////////////////
+
 /// inputs:
 ///     C = C0 + C1t + C2t^2 
 ///       = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2
@ -54,9 +58,8 @@
 /// e2  = c0d2  + c1d1  + c2d0  - (c0_d2_ + c1_d1_ + c2_d0_)
 /// e2_ = c0d2_ + c0_d2 + c1d1_ +  c1_d1  + c2d0_  + c2_d0

-
 // cost: 157
-global mul_fp6:
+global mul_fp254_6:
    // e2
    // make c0_d2_ + c1_d1_ + c2_d0_
    DUP8
@ -256,3 +259,177 @@ global mul_fp6:

    // stack: retdest, e0, e0_, e1, e1_, e2, e2_
    JUMP
+
+
+////////////////////////
+///// FP6 SQUARING /////
+////////////////////////
+
+/// inputs:
+///     C = C0 + C1t + C2t^2 
+///       = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2
+///
+/// output:
+///     E = E0 + E1t + E2t^2 = C^2
+///       = (e0 + e0_i) + (e1 + e1_i)t + (e2 + e2_i)t^2
+///
+/// initial stack: c0, c0_, c1, c1_, c2, c2_, retdest
+/// final   stack: e0, e0_, e1, e1_, e2, e2_
+
+/// computations:
+///
+/// E0 = C0C0 + i9(2C1C2) = (c0+c0_i)^2 + i9(2(c1+c1_i)(c2+c2_i))
+///    = (c0^2 - c0_^2) + (2c0c0_)i + i9[2(c1c2 - c1_c2_) + 2(c1_c2 + c1c2_)i]
+///
+/// E1 = 2*C0C1 + i9(C2C2) = 2(c0+c0_i)(c1+c1_i) + i9((c2+c2_i)(c2+c2_i))
+///    = 2(c0c1 - c0_c1_) + 2(c0c1_ + c0_c1)i + i9[(c2^2 - c2_^2) + (2c2c2_)i]
+///
+/// E2 = 2*C0C2 + C1C1
+///    = 2(c0c2 - c0_c2_) + 2(c0_c2 + c2c0_)i + (c1^2 - c1_^2) + (2c1c1_)i
+///
+/// e0  = (c0^2 - c0_^2) + x0
+/// e0_ = 2c0c0_ + x0_
+///     where x0_, x0 = %i9 c1c2 - c1_c2_, c1_c2 + c1c2_
+///
+/// e1  = 2(c0c1 - c0_c1_) + x1
+/// e1_ = 2(c0c1_ + c0_c1) + x1_
+///     where x1_, x1 = %i9 c2^2 - c2_^2, 2c2c2_
+///
+/// e2  = 2(c0c2 - c0_c2_) + (c1^2 - c1_^2)
+/// e2_ = 2(c0_c2 + c2c0_) + 2c1c1_
+
+// cost: 101
+global square_fp254_6:
+    /// e0  = (c0^2 - c0_^2) + x0
+    /// e0_ = 2c0c0_ + x0_
+    ///     where x0_, x0 = %i9 2(c1c2 - c1_c2_), 2(c1_c2 + c1c2_)
+    DUP6
+    DUP4
+    MULFP254
+    DUP6
+    DUP6
+    MULFP254
+    ADDFP254
+    PUSH 2
+    MULFP254
+    DUP7
+    DUP6
+    MULFP254
+    DUP7
+    DUP6
+    MULFP254
+    SUBFP254
+    PUSH 2
+    MULFP254
+    %i9
+    // stack:          x0_, x0
+    DUP3
+    DUP5
+    MULFP254
+    PUSH 2
+    MULFP254
+    // stack:  2c0c0_, x0_, x0
+    ADDFP254
+    // stack:          e0_, x0
+    SWAP4
+    SWAP1
+    // stack:               x0
+    DUP4
+    DUP1
+    MULFP254
+    DUP4
+    DUP1
+    MULFP254
+    SUBFP254
+    // stack: c0^2 - c0_^2, x0
+    ADDFP254
+    // stack:               e0
+    SWAP3
+
+    /// e1  = 2(c0c1  - c0_c1_) + x1
+    /// e1_ = 2(c0c1_ + c0_c1 ) + x1_
+    ///     where x1_, x1 = %i9 c2^2 - c2_^2, 2c2c2_
+    DUP7
+    DUP9
+    MULFP254
+    PUSH 2
+    MULFP254
+    DUP9
+    DUP1
+    MULFP254
+    DUP9
+    DUP1
+    MULFP254
+    SUBFP254
+    %i9
+    // stack:                    x1_, x1
+    DUP4
+    DUP4
+    MULFP254
+    DUP9
+    DUP7
+    MULFP254
+    ADDFP254
+    PUSH 2
+    MULFP254
+    // stack:  2(c0c1_ + c0_c1), x1_, x1
+    ADDFP254
+    // stack:                    e1_, x1
+    SWAP8
+    SWAP1
+    // stack:                         x1
+    DUP8
+    DUP4
+    MULFP254
+    DUP5
+    DUP7
+    MULFP254
+    SUBFP254
+    PUSH 2
+    MULFP254
+    // stack:      2(c0c1  - c0_c1_), x1
+    ADDFP254
+    SWAP7
+
+    /// e2  = 2(c0c2 - c0_c2_) + (c1^2 - c1_^2)
+    /// e2_ = 2(c0_c2 + c2c0_ + c1c1_)
+    DUP1
+    DUP1
+    MULFP254
+    DUP5
+    DUP1
+    MULFP254
+    SUBFP254
+    DUP11
+    DUP5
+    MULFP254
+    DUP4
+    DUP8
+    MULFP254
+    SUBFP254
+    PUSH 2
+    MULFP254
+    ADDFP254
+    // stack: e2
+    SWAP10
+    // stack: c2_, c1_, c2, c0_, c1, c0
+    SWAP4
+    MULFP254
+    // stack:   c1c1_, c2, c0_, c2_, c0
+    SWAP2
+    MULFP254
+    // stack:    c0_c2 , c1c1_, c2_, c0
+    ADDFP254
+    // stack:    c0_c2 + c1c1_, c2_, c0
+    SWAP2
+    MULFP254
+    // stack:     c0c2_ , c0_c2 + c1c1_
+    ADDFP254
+    // stack:     c0c2_ + c0_c2 + c1c1_
+    PUSH 2
+    MULFP254
+    // stack:                       e2_
+    SWAP6
+
+    // stack: retdest, e0, e0_, e1, e1_, e2, e2_
+    JUMP
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@ -0,0 +1,272 @@
+// frob_fp12 tests
+
+global test_frob_fp254_12_1:
+    // stack:         ptr
+    %frob_fp254_12_1
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+global test_frob_fp254_12_2:
+    // stack:         ptr 
+    DUP1
+    // stack:    ptr, ptr
+    %frob_fp254_12_2_
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+global test_frob_fp254_12_3:
+    // stack:         ptr
+    %frob_fp254_12_3
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+global test_frob_fp254_12_6:
+    // stack:         ptr
+    %frob_fp254_12_6
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+
+/// def frob_fp254_12_n(f, f'):
+///     g  =             frob_fp254_6(n, f )
+///     g' = FROB_z[n] * frob_fp254_6(n, f')
+///     return g, g'
+
+%macro frob_fp254_12_1
+    // stack:           ptr
+    DUP1
+    // stack:      ptr, ptr 
+    %load_fp254_6
+    // stack:        f, ptr
+    %frob_fp254_6_1
+    // stack:        g, ptr
+    DUP7
+    // stack:   ptr, g, ptr
+    %store_fp254_6
+    // stack:           ptr
+    DUP1  %add_const(6)
+    // stack:     ptr', ptr
+    %load_fp254_6
+    // stack:       f', ptr
+    %frobz_1
+    // stack:       g', ptr
+    DUP7  %add_const(6)
+    // stack: ptr', g', ptr
+    %store_fp254_6
+    // stack:           ptr
+%endmacro 
+
+// Note: this is the only one with distinct input and output pointers
+%macro frob_fp254_12_2_
+    // stack:           ptr , out
+    DUP1
+    // stack:      ptr, ptr , out
+    %load_fp254_6
+    // stack:        f, ptr , out
+    %frob_fp254_6_2
+    // stack:        g, ptr , out
+    DUP8
+    // stack:   out, g, ptr , out
+    %store_fp254_6 
+    // stack:           ptr , out
+    %add_const(6)
+    // stack:           ptr', out
+    %load_fp254_6
+    // stack:             f', out
+    %frobz_2
+    // stack:             g', out
+    DUP7  %add_const(6)
+    // stack:       out', g', out
+    %store_fp254_6
+    // stack:                 out
+%endmacro 
+
+%macro frob_fp254_12_3
+    // stack:           ptr
+    DUP1
+    // stack:      ptr, ptr 
+    %load_fp254_6
+    // stack:        f, ptr
+    %frob_fp254_6_3
+    // stack:        g, ptr
+    DUP7
+    // stack:   ptr, g, ptr
+    %store_fp254_6
+    // stack:           ptr
+    DUP1  %add_const(6)
+    // stack:     ptr', ptr
+    %load_fp254_6
+    // stack:       f', ptr
+    %frobz_3
+    // stack:       g', ptr
+    DUP7  %add_const(6)
+    // stack: ptr', g', ptr
+    %store_fp254_6
+    // stack:           ptr
+%endmacro
+
+%macro frob_fp254_12_6
+    // stack:           ptr
+    DUP1  %add_const(6)
+    // stack:     ptr', ptr
+    %load_fp254_6
+    // stack:       f', ptr
+    %frobz_6
+    // stack:       g', ptr
+    DUP7  %add_const(6)
+    // stack: ptr', g', ptr
+    %store_fp254_6
+    // stack:           ptr
+%endmacro
+
+// frob_fp12 tests
+
+global test_frob_fp254_6_1:
+    // stack:         ptr
+    %frob_fp254_6_1
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+global test_frob_fp254_6_2:
+    // stack:         ptr 
+    %frob_fp254_6_2
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+global test_frob_fp254_6_3:
+    // stack:         ptr
+    %frob_fp254_6_3
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+
+/// let Z` denote the complex conjugate of Z
+
+/// def frob_fp254_6_n(C0, C1, C2):
+///     if n%2:
+///         D0, D1, D2 = C0`, FROB_T1[n] * C1`, FROB_T2[n] * C2`
+///     else: 
+///         D0, D1, D2 = C0 , FROB_T1[n] * C1 , FROB_T2[n] * C2
+///     return D0, D1, D2 
+
+%macro frob_fp254_6_1
+    // stack: C0 , C1 , C2
+    %conj_fp254_2
+    // stack: D0 , C1 , C2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
+    // stack: C2 , C1 , D0
+    %conj_fp254_2
+    // stack: C2`, C1 , D0
+    %frobt2_1
+    // stack: D2 , C1 , D0
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
+    // stack: D0 , C1 , D2
+    %stack (x: 2, y: 2) -> (y, x)
+    // stack: C1 , D0 , D2
+    %conj_fp254_2
+    // stack: C1`, D0 , D2
+    %frobt1_1
+    // stack: D1 , D0 , D2
+    %stack (x: 2, y: 2) -> (y, x)
+    // stack: D0 , D1 , D2
+%endmacro
+
+%macro frob_fp254_6_2
+    // stack: C0, C1, C2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
+    // stack: C2, C1, C0
+    %frobt2_2
+    // stack: D2, C1, C0
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
+    // stack: C0, C1, D2
+    %stack (x: 2, y: 2) -> (y, x)
+    // stack: C1, C0, D2
+    %frobt1_2
+    // stack: D1, C0, D2
+    %stack (x: 2, y: 2) -> (y, x)
+    // stack: D0, D1, D2
+%endmacro
+
+%macro frob_fp254_6_3
+    // stack: C0 , C1 , C2
+    %conj_fp254_2
+    // stack: D0 , C1 , C2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
+    // stack: C2 , C1 , D0
+    %conj_fp254_2
+    // stack: C2`, C1 , D0
+    %frobt2_3
+    // stack: D2 , C1 , D0
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
+    // stack: D0 , C1 , D2
+    %stack (x: 2, y: 2) -> (y, x)
+    // stack: C1 , D0 , D2
+    %conj_fp254_2
+    // stack: C1`, D0 , D2
+    %frobt1_3
+    // stack: D1 , D0 , D2
+    %stack (x: 2, y: 2) -> (y, x)
+    // stack: D0 , D1 , D2
+%endmacro
+
+
+%macro frobz_1
+    %frob_fp254_6_1
+    PUSH 0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac
+    PUSH 0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470
+    %scale_fp254_6
+%endmacro
+
+%macro frobz_2
+    %frob_fp254_6_2
+    PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49
+    %scale_re_fp254_6
+%endmacro
+
+%macro frobz_3
+    %frob_fp254_6_3
+    PUSH 0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101
+    PUSH 0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f
+    %scale_fp254_6
+%endmacro
+
+%macro frobz_6
+    PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46
+    %scale_re_fp254_6
+%endmacro
+
+
+%macro frobt1_1
+    PUSH 0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2
+    PUSH 0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d
+    %mul_fp254_2
+%endmacro
+
+%macro frobt2_1
+    PUSH 0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126
+    PUSH 0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762
+    %mul_fp254_2
+%endmacro
+
+%macro frobt1_2
+    PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48
+    %scale_fp254_2
+%endmacro
+
+%macro frobt2_2
+    PUSH 0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe
+    %scale_fp254_2
+%endmacro
+
+
+%macro frobt1_3
+    PUSH 0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de
+    PUSH 0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d
+    %mul_fp254_2
+%endmacro
+
+%macro frobt2_3
+    PUSH 0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f
+    PUSH 0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066
+    %mul_fp254_2
+%endmacro
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@ -0,0 +1,60 @@
+// Returns reverse order divison y/x, modulo N
+%macro divr_fp254
+    // stack: x   , y
+    %inv_fp254
+    // stack: x^-1, y
+    MULFP254
+%endmacro
+
+// Non-deterministically provide the inverse x^-1 of x modulo N.
+// If x === 0 mod N, this function panics.
+// Although the official prover provides the unique inverse <N
+// this macro only checks that x * x^-1 === 0 mod N
+%macro inv_fp254
+    // stack:        x
+    PROVER_INPUT(ff::bn254_base::inverse)
+    // stack: x^-1 , x
+    SWAP1  
+    DUP2
+    // stack: x^-1 , x, x^-1
+    MULFP254
+    // stack: x^-1 * x, x^-1
+    %assert_eq_const(1)
+    // stack:           x^-1
+%endmacro
+
+
+global inv_fp254_12:
+    // stack:                         inp, out, retdest
+    %prover_inv_fp254_12
+    // stack:                   f^-1, inp, out, retdest
+    DUP14
+    // stack:              out, f^-1, inp, out, retdest
+    %store_fp254_12
+    // stack:                         inp, out, retdest
+    %stack (inp, out) -> (inp, out, 72, check_inv_fp254_12)
+    // stack: inp, out, 72, check_inv_fp254_12, retdest 
+    %jump(mul_fp254_12)
+check_inv_fp254_12:
+    // stack:        retdest
+    PUSH 72  
+    %load_fp254_12
+    // stack: unit?, retdest
+    %assert_eq_unit_fp254_12
+    // stack:        retdest
+    JUMP
+
+%macro prover_inv_fp254_12
+    PROVER_INPUT(ffe::bn254_base::component_11)
+    PROVER_INPUT(ffe::bn254_base::component_10)
+    PROVER_INPUT(ffe::bn254_base::component_9)
+    PROVER_INPUT(ffe::bn254_base::component_8)
+    PROVER_INPUT(ffe::bn254_base::component_7)
+    PROVER_INPUT(ffe::bn254_base::component_6)
+    PROVER_INPUT(ffe::bn254_base::component_5)
+    PROVER_INPUT(ffe::bn254_base::component_4)
+    PROVER_INPUT(ffe::bn254_base::component_3)
+    PROVER_INPUT(ffe::bn254_base::component_2)
+    PROVER_INPUT(ffe::bn254_base::component_1)
+    PROVER_INPUT(ffe::bn254_base::component_0)
+%endmacro
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
--- a/evm/src/cpu/kernel/asm/curve/bn254/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/moddiv.asm
@ -1,39 +0,0 @@
-/// Division modulo 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47, the BN254 base field order
-/// To replace with more efficient method using non-determinism later.
-
-// Returns y * (x^-1) where the inverse is taken modulo N
-%macro moddiv
-    // stack: x, y
-    %inverse
-    // stack: x^-1, y
-    %mulmodn
-%endmacro
-
-%macro mulmodn
-    // stack: x, y
-    %bn_base
-    // stack: N, x, y
-    SWAP2
-    // stack: y, x, N
-    MULMOD
-%endmacro
-
-%macro squaremodn
-    // stack: x
-    DUP1
-    // stack: x, x
-    %mulmodn
-%endmacro
-
-// Non-deterministically provide the inverse modulo N.
-%macro inverse
-    // stack: x
-    PROVER_INPUT(ff::bn254_base::inverse)
-    // stack: x^-1, x
-    %stack (inv, x) -> (inv, x, @BN_BASE, inv)
-    // stack: x^-1, x, N, x^-1
-    MULMOD
-    // stack: x^-1 * x, x^-1
-    %assert_eq_const(1)
-    // stack: x^-1
-%endmacro
--- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
@ -1,166 +0,0 @@
-/// Note: uncomment this to test
-
-/// global test_mul_Fp12:
-///     // stack:      f, in0 , f', g, in1 , g', in1, out, in0,       out
-///     DUP7
-///     // stack: in0, f, in0 , f', g, in1 , g', in1, out, in0,       out
-///     %store_fp6
-///     // stack:         in0 , f', g, in1 , g', in1, out, in0,       out
-///     %add_const(6)
-///     // stack:         in0', f', g, in1 , g', in1, out, in0,       out
-///     %store_fp6
-///     // stack:                   g, in1 , g', in1, out, in0,       out
-///     DUP7
-///     // stack:              in1, g, in1 , g', in1, out, in0,       out
-///     %store_fp6
-///     // stack:                      in1 , g', in1, out, in0,       out
-///     %add_const(6)
-///     // stack:                      in1', g', in1, out, in0,       out
-///     %store_fp6
-///     // stack:                                in1, out, in0,       out
-///     PUSH ret_stack
-///     // stack:                     ret_stack, in1, out, in0,       out
-///     SWAP3
-///     // stack:                           in0, in1, out, ret_stack, out
-///     %jump(mul_Fp12)
-/// ret_stack:
-///     // stack:          out
-///     DUP1  %add_const(6)
-///     // stack:    out', out
-///     %load_fp6
-///     // stack:      h', out
-///     DUP7
-///     // stack: out, h', out
-///     %load_fp6
-///     // stack:   h, h', out
-///     %jump(0xdeadbeef)
-
-
-/// fp6 functions:
-///  fn    | num | ops | cost
-///  -------------------------
-///  load  |   8 |  40 |  320
-///  store |   5 |  40 |  200
-///  dup   |   5 |   6 |   30
-///  swap  |   4 |  16 |   64
-///  add   |   4 |  16 |   64
-///  subr  |   1 |  17 |   17
-///  mul   |   3 | 157 |  471
-///  i9    |   1 |   9 |    9
-///
-/// lone stack operations:
-///  op    | num 
-///  ------------
-///  ADD   |   3
-///  SWAP  |   2
-///  DUP   |   6
-///  PUSH  |   6
-///  POP   |   2
-///  JUMP  |   1
-///
-/// TOTAL: 1196
-
-/// inputs:
-///     F = f + f'z
-///     G = g + g'z
-///
-/// output:
-///     H = h + h'z = FG
-///
-///     h  = fg + sh(f'g')
-///     h' = (f+f')(g+g') - fg - f'g'
-///
-/// memory pointers [ind' = ind+6]
-///     {in0: f, in0: f', in1: g, in1':g', out: h, out': h'}
-///
-/// f, f', g, g' consist of six elements on the stack
-
-global mul_Fp12:
-    // stack:                                in0, in1, out 
-    DUP1  %add_const(6) 
-    // stack:                          in0', in0, in1, out 
-    %load_fp6
-    // stack:                            f', in0, in1, out 
-    DUP8  %add_const(6)
-    // stack:                      in1', f', in0, in1, out 
-    %load_fp6
-    // stack:                        g', f', in0, in1, out 
-    PUSH ret_1
-    // stack:                 ret_1, g', f', in0, in1, out 
-    %dup_fp6_7
-    // stack:             f', ret_1, g', f', in0, in1, out 
-    %dup_fp6_7
-    // stack:         g', f', ret_1, g', f', in0, in1, out 
-    %jump(mul_fp6)
-ret_1:
-    // stack:                f'g', g'  , f', in0, in1, out 
-    %dup_fp6_0
-    // stack:          f'g', f'g', g'  , f', in0, in1, out 
-    %store_fp6_sh(100)                                    
-    // stack:                f'g', g'  , f', in0, in1, out  {100: sh(f'g')}
-    %store_fp6(106)
-    // stack:                      g'  , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    DUP13
-    // stack:                 in0, g'  , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    DUP15  
-    // stack:            in1, in0, g'  , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    %load_fp6
-    // stack:             g , in0, g'  , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    %swap_fp6_hole
-    // stack:             g', in0, g   , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    %dup_fp6_7
-    // stack:           g,g', in0, g   , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    %add_fp6
-    // stack:           g+g', in0, g   , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    %swap_fp6_hole
-    // stack:              g, in0, g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    PUSH ret_2
-    // stack:       ret_2, g, in0, g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    SWAP7
-    // stack:       in0, g, ret_2, g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    %load_fp6
-    // stack:         f, g, ret_2, g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    %jump(mul_fp6)
-ret_2:    
-    // stack:                  fg, g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    %store_fp6(112)
-    // stack:                      g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %swap_fp6
-    // stack:                      f', g+g', in0, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    PUSH ret_3
-    // stack:               ret_3, f', g+g', in0, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    SWAP13
-    // stack:               in0, f', g+g', ret_3, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %load_fp6
-    // stack:                  f,f', g+g', ret_3, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %add_fp6
-    // stack:                  f+f', g+g', ret_3, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %jump(mul_fp6)
-ret_3:
-    // stack:                       (f+f')(g+g'), in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %load_fp6(112)
-    // stack:                   fg, (f+f')(g+g'), in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %swap_fp6
-    // stack:                   (f+f')(g+g'), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %dup_fp6_6
-    // stack:               fg, (f+f')(g+g'), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %load_fp6(106)
-    // stack:          f'g',fg, (f+f')(g+g'), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %add_fp6
-    // stack:          f'g'+fg, (f+f')(g+g'), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %subr_fp6
-    // stack:       (f+f')(g+g') - (f'g'+fg), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}   
-    DUP14  %add_const(6) 
-    // stack: out', (f+f')(g+g') - (f'g'+fg), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}   
-    %store_fp6
-    // stack:                                 fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %load_fp6(100)
-    // stack:                      sh(f'g') , fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %add_fp6
-    // stack:                      sh(f'g') + fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    DUP8
-    // stack:                 out, sh(f'g') + fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %store_fp6
-    // stack:                                     in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %pop2  
-    JUMP
--- a/evm/src/cpu/kernel/asm/fields/fp6_macros.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp6_macros.asm
@ -1,314 +0,0 @@
-// cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
-%macro load_fp6
-    // stack: ptr
-    DUP1  %add_const(4)
-    // stack:                   ind4, ptr
-    %mload_kernel_general
-    // stack:                     x4, ptr
-    DUP2  %add_const(3)
-    // stack:               ind3, x4, ptr
-    %mload_kernel_general
-    // stack:                 x3, x4, ptr
-    DUP3  %add_const(2)
-    // stack:           ind2, x3, x4, ptr
-    %mload_kernel_general
-    // stack:             x2, x3, x4, ptr
-    DUP4  %add_const(1)
-    // stack:       ind1, x2, x3, x4, ptr
-    %mload_kernel_general
-    // stack:         x1, x2, x3, x4, ptr
-    DUP5  %add_const(5)
-    // stack:   ind5, x1, x2, x3, x4, ptr
-    %mload_kernel_general
-    // stack:     x5, x1, x2, x3, x4, ptr
-    SWAP5
-    // stack:   ind0, x1, x2, x3, x4, x5
-    %mload_kernel_general
-    // stack:     x0, x1, x2, x3, x4, x5
-%endmacro
-
-// cost: 6 loads + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
-%macro load_fp6(ptr)
-    // stack:
-    PUSH $ptr  %add_const(5)
-    // stack:                     ind5
-    %mload_kernel_general
-    // stack:                       x5
-    PUSH $ptr  %add_const(4)
-    // stack:                 ind4, x5
-    %mload_kernel_general
-    // stack:                   x4, x5
-    PUSH $ptr  %add_const(3)
-    // stack:             ind3, x4, x5
-    %mload_kernel_general
-    // stack:               x3, x4, x5
-    PUSH $ptr  %add_const(2)
-    // stack:         ind2, x3, x4, x5
-    %mload_kernel_general
-    // stack:           x2, x3, x4, x5
-    PUSH $ptr  %add_const(1)
-    // stack:     ind1, x2, x3, x4, x5
-    %mload_kernel_general
-    // stack:       x1, x2, x3, x4, x5
-    PUSH $ptr
-    // stack: ind0, x1, x2, x3, x4, x5
-    %mload_kernel_general
-    // stack:   x0, x1, x2, x3, x4, x5
-%endmacro
-
-// cost: 6 stores + 6 swaps/dups + 5 adds = 6*4 + 6*1 + 5*2 = 40
-%macro store_fp6
-    // stack:      ptr, x0, x1, x2, x3, x4 , x5
-    SWAP5
-    // stack:       x4, x0, x1, x2, x3, ptr, x5
-    DUP6  %add_const(4)
-    // stack: ind4, x4, x0, x1, x2, x3, ptr, x5
-    %mstore_kernel_general
-    // stack:           x0, x1, x2, x3, ptr, x5
-    DUP5
-    // stack:     ind0, x0, x1, x2, x3, ptr, x5
-    %mstore_kernel_general
-    // stack:               x1, x2, x3, ptr, x5
-    DUP4  %add_const(1)
-    // stack:         ind1, x1, x2, x3, ptr, x5
-    %mstore_kernel_general
-    // stack:                   x2, x3, ptr, x5
-    DUP3  %add_const(2)
-    // stack:             ind2, x2, x3, ptr, x5
-    %mstore_kernel_general
-    // stack:                       x3, ptr, x5
-    DUP2  %add_const(3)
-    // stack:                 ind3, x3, ptr, x5
-    %mstore_kernel_general
-    // stack:                           ptr, x5
-    %add_const(5)
-    // stack:                          ind5, x5
-    %mstore_kernel_general
-    // stack:
-%endmacro
-
-// cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
-%macro store_fp6(ptr)
-    // stack:       x0, x1, x2, x3, x4, x5
-    PUSH $ptr
-    // stack: ind0, x0, x1, x2, x3, x4, x5
-    %mstore_kernel_general
-    // stack:           x1, x2, x3, x4, x5
-    PUSH $ptr  %add_const(1)
-    // stack:     ind1, x1, x2, x3, x4, x5
-    %mstore_kernel_general
-    // stack:               x2, x3, x4, x5
-    PUSH $ptr  %add_const(2)
-    // stack:         ind2, x2, x3, x4, x5
-    %mstore_kernel_general
-    // stack:                   x3, x4, x5
-    PUSH $ptr  %add_const(3)
-    // stack:             ind3, x3, x4, x5
-    %mstore_kernel_general
-    // stack:                       x4, x5
-    PUSH $ptr  %add_const(4)
-    // stack:                 ind4, x4, x5
-    %mstore_kernel_general
-    // stack:                           x5
-    PUSH $ptr  %add_const(5)
-    // stack:                     ind5, x5
-    %mstore_kernel_general
-    // stack:
-%endmacro
-
-// cost: store (40) + i9 (9) = 49
-%macro store_fp6_sh(ptr)
-    // stack:       x0, x1, x2, x3, x4, x5
-    PUSH $ptr  %add_const(2)
-    // stack: ind2, x0, x1, x2, x3, x4, x5
-    %mstore_kernel_general
-    // stack:           x1, x2, x3, x4, x5
-    PUSH $ptr  %add_const(3)
-    // stack:     ind3, x1, x2, x3, x4, x5
-    %mstore_kernel_general
-    // stack:               x2, x3, x4, x5
-    PUSH $ptr  %add_const(4)
-    // stack:         ind4, x2, x3, x4, x5
-    %mstore_kernel_general
-    // stack:                   x3, x4, x5
-    PUSH $ptr  %add_const(5)
-    // stack:             ind5, x3, x4, x5
-    %mstore_kernel_general
-    // stack:                       x4, x5
-    %i9
-    // stack:                       y5, y4
-    PUSH $ptr  %add_const(1)
-    // stack:                 ind1, y5, y4
-    %mstore_kernel_general
-    // stack:                           y4
-    PUSH $ptr
-    // stack:                     ind0, y4
-    %mstore_kernel_general
-    // stack:
-%endmacro
-
-// cost: 9; note this returns y, x for the output x + yi
-%macro i9
-    // stack:          a , b
-    DUP2
-    // stack:      b,  a,  b
-    DUP2
-    // stack:  a , b,  a , b
-    PUSH 9  MULFP254
-    // stack: 9a , b,  a , b
-    SUBFP254
-    // stack: 9a - b,  a , b
-    SWAP2 
-    // stack:  b , a, 9a - b
-    PUSH 9  MULFP254
-    // stack  9b , a, 9a - b
-    ADDFP254
-    // stack: 9b + a, 9a - b 
-%endmacro
-
-// cost: 6
-%macro dup_fp6_0
-    // stack:       f: 6
-    DUP6
-    DUP6
-    DUP6
-    DUP6
-    DUP6
-    DUP6
-    // stack: f: 6, g: 6
-%endmacro 
-
-// cost: 6
-%macro dup_fp6_6
-    // stack:       f: 6, g: 6
-    DUP12
-    DUP12
-    DUP12
-    DUP12
-    DUP12
-    DUP12
-    // stack: g: 6, f: 6, g: 6
-%endmacro
-
-// cost: 6
-%macro dup_fp6_7
-    // stack:       f: 6, g: 6
-    DUP13
-    DUP13
-    DUP13
-    DUP13
-    DUP13
-    DUP13
-    // stack: g: 6, f: 6, g: 6
-%endmacro
-
-// cost: 16
-%macro swap_fp6
-    // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
-    SWAP6
-    // stack: g0, f1, f2, f3, f4, f5, f0, g1, g2, g3, g4, g5
-    SWAP1
-    SWAP7
-    SWAP1
-    // stack: g0, g1, f2, f3, f4, f5, f0, f1, g2, g3, g4, g5
-    SWAP2
-    SWAP8
-    SWAP2
-    // stack: g0, g1, g2, f3, f4, f5, f0, f1, f2, g3, g4, g5
-    SWAP3
-    SWAP9
-    SWAP3    
-    // stack: g0, g1, g2, g3, f4, f5, f0, f1, f2, f3, g4, g5
-    SWAP4
-    SWAP10
-    SWAP4
-    // stack: g0, g1, g2, g3, g4, f5, f0, f1, f2, f3, f4, g5
-    SWAP5
-    SWAP11
-    SWAP5
-    // stack: g0, g1, g2, g3, g4, g5, f0, f1, f2, f3, f4, f5
-%endmacro
-
-// cost: 16
-// swap two fp6 elements with a stack term separating them
-//    (f: 6, x, g: 6) -> (g: 6, x, f: 6)
-%macro swap_fp6_hole
-    // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
-    SWAP7
-    // stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5
-    SWAP1
-    SWAP8
-    SWAP1
-    // stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5
-    SWAP2
-    SWAP9
-    SWAP2
-    // stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5
-    SWAP3
-    SWAP10
-    SWAP3    
-    // stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5
-    SWAP4
-    SWAP11
-    SWAP4
-    // stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5
-    SWAP5
-    SWAP12
-    SWAP5
-    // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
-%endmacro
-
-// cost: 16
-%macro add_fp6
-    // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
-    SWAP7
-    ADDFP254
-    SWAP6
-    // stack: f0,     f2, f3, f4, f5, g0, h1, g2, g3, g4, g5 
-    SWAP7
-    ADDFP254
-    SWAP6
-    // stack: f0,         f3, f4, f5, g0, h1, h2, g3, g4, g5 
-    SWAP7
-    ADDFP254
-    SWAP6
-    // stack: f0,             f4, f5, g0, h1, h2, h3, g4, g5
-    SWAP7
-    ADDFP254
-    SWAP6
-    // stack: f0,                 f5, g0, h1, h2, h3, h4, g5
-    SWAP7
-    ADDFP254
-    SWAP6
-    // stack: f0,                     g0, h1, h2, h3, h4, h5
-    ADDFP254
-    // stack:                         h0, h1, h2, h3, h4, h5
-%endmacro
-
-// *reversed argument subtraction* cost: 17
-%macro subr_fp6
-    // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
-    SWAP7
-    SUBFP254
-    SWAP6
-    // stack: f0,     f2, f3, f4, f5, g0, h1, g2, g3, g4, g5 
-    SWAP7
-    SUBFP254
-    SWAP6
-    // stack: f0,         f3, f4, f5, g0, h1, h2, g3, g4, g5 
-    SWAP7
-    SUBFP254
-    SWAP6
-    // stack: f0,             f4, f5, g0, h1, h2, h3, g4, g5
-    SWAP7
-    SUBFP254
-    SWAP6
-    // stack: f0,                 f5, g0, h1, h2, h3, h4, g5
-    SWAP7
-    SUBFP254
-    SWAP6
-    // stack: f0,                     g0, h1, h2, h3, h4, h5
-    SWAP1
-    SUBFP254
-    // stack:                         h0, h1, h2, h3, h4, h5
-%endmacro
--- a/evm/src/cpu/kernel/asm/hash/ripemd/box.asm
+++ b/evm/src/cpu/kernel/asm/hash/ripemd/box.asm
@ -72,7 +72,7 @@ post_rol:


 %macro get_round
-    // stack: sides, rounds
+    // stack:        sides , rounds
    %mul_const(5)  
    PUSH 10  
    SUB  
--- a/evm/src/cpu/kernel/asm/util/basic_macros.asm
+++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm
@ -297,30 +297,30 @@

 // given u32 bytestring abcd return dcba
 %macro reverse_bytes_u32
-    // stack:                   abcd
+    // stack:              abcd
    DUP1
    PUSH 28
    BYTE
-    // stack:                a, abcd
+    // stack:           a, abcd
    DUP2
    PUSH 29
    BYTE
    %shl_const(8)
-    // stack:            b0, a, abcd 
+    // stack:       b0, a, abcd 
    DUP3
    PUSH 30
    BYTE
    %shl_const(16)
-    // stack:       c00, b0, a, abcd
+    // stack:  c00, b0, a, abcd
    SWAP3
    PUSH 31
    BYTE
    %shl_const(24)
-    // stack:       d000, b0, a, c00
+    // stack:  d000, b0, a, c00
    ADD // OR
    ADD // OR
    ADD // OR
-    // stack:                   dcba
+    // stack:              dcba
 %endmacro

 %macro reverse_bytes_u64
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@ -1,12 +1,14 @@
 //! An EVM interpreter for testing and debugging purposes.

 use std::collections::HashMap;
+use std::ops::Range;

 use anyhow::{anyhow, bail, ensure};
 use ethereum_types::{U256, U512};
 use keccak_hash::keccak;
 use plonky2::field::goldilocks_field::GoldilocksField;

+use crate::bn254_arithmetic::BN_BASE;
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::constants::context_metadata::ContextMetadata;
 use crate::cpu::kernel::constants::global_metadata::GlobalMetadata;
@ -23,14 +25,6 @@ type F = GoldilocksField;
 /// Halt interpreter execution whenever a jump to this offset is done.
 const DEFAULT_HALT_OFFSET: usize = 0xdeadbeef;

-/// Order of the BN254 base field.
-const BN_BASE: U256 = U256([
-    4332616871279656263,
-    10917124144477883021,
-    13281191951274694749,
-    3486998266802970665,
-]);
-
 impl MemoryState {
    pub(crate) fn mload_general(&self, context: usize, segment: Segment, offset: usize) -> U256 {
        self.get(MemoryAddress::new(context, segment, offset))
@ -267,6 +261,18 @@ impl<'a> Interpreter<'a> {
            .content
    }

+    pub fn extract_kernel_memory(self, segment: Segment, range: Range<usize>) -> Vec<U256> {
+        let mut output: Vec<U256> = vec![];
+        for i in range {
+            let term = self
+                .generation_state
+                .memory
+                .get(MemoryAddress::new(0, segment, i));
+            output.push(term);
+        }
+        output
+    }
+
    pub(crate) fn push(&mut self, x: U256) {
        self.stack_mut().push(x);
        self.generation_state.registers.stack_len += 1;
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@ -0,0 +1,319 @@
+use anyhow::Result;
+use ethereum_types::U256;
+use rand::Rng;
+
+use crate::bn254_arithmetic::{Fp, Fp12, Fp2, Fp6};
+use crate::bn254_pairing::{
+    gen_fp12_sparse, invariant_exponent, miller_loop, tate, Curve, TwistedCurve,
+};
+use crate::cpu::kernel::interpreter::{
+    run_interpreter_with_memory, Interpreter, InterpreterMemoryInitialization,
+};
+use crate::memory::segments::Segment::BnPairing;
+
+fn extract_stack(interpreter: Interpreter<'static>) -> Vec<U256> {
+    interpreter
+        .stack()
+        .iter()
+        .rev()
+        .cloned()
+        .collect::<Vec<U256>>()
+}
+
+fn setup_mul_fp6_test(f: Fp6, g: Fp6, label: &str) -> InterpreterMemoryInitialization {
+    let mut stack = f.on_stack();
+    if label == "mul_fp254_6" {
+        stack.extend(g.on_stack());
+    }
+    stack.push(U256::from(0xdeadbeefu32));
+    InterpreterMemoryInitialization {
+        label: label.to_string(),
+        stack,
+        segment: BnPairing,
+        memory: vec![],
+    }
+}
+
+#[test]
+fn test_mul_fp6() -> Result<()> {
+    let mut rng = rand::thread_rng();
+    let f: Fp6 = rng.gen::<Fp6>();
+    let g: Fp6 = rng.gen::<Fp6>();
+
+    let setup_normal: InterpreterMemoryInitialization = setup_mul_fp6_test(f, g, "mul_fp254_6");
+    let setup_square: InterpreterMemoryInitialization = setup_mul_fp6_test(f, f, "square_fp254_6");
+
+    let intrptr_normal: Interpreter = run_interpreter_with_memory(setup_normal).unwrap();
+    let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap();
+
+    let out_normal: Vec<U256> = extract_stack(intrptr_normal);
+    let out_square: Vec<U256> = extract_stack(intrptr_square);
+
+    let exp_normal: Vec<U256> = (f * g).on_stack();
+    let exp_square: Vec<U256> = (f * f).on_stack();
+
+    assert_eq!(out_normal, exp_normal);
+    assert_eq!(out_square, exp_square);
+
+    Ok(())
+}
+
+fn setup_mul_fp12_test(
+    out: usize,
+    f: Fp12,
+    g: Fp12,
+    label: &str,
+) -> InterpreterMemoryInitialization {
+    let in0: usize = 200;
+    let in1: usize = 212;
+
+    let mut stack = vec![
+        U256::from(in0),
+        U256::from(in1),
+        U256::from(out),
+        U256::from(0xdeadbeefu32),
+    ];
+    if label == "square_fp254_12" {
+        stack.remove(0);
+    }
+    InterpreterMemoryInitialization {
+        label: label.to_string(),
+        stack,
+        segment: BnPairing,
+        memory: vec![(in0, f.on_stack()), (in1, g.on_stack())],
+    }
+}
+
+#[test]
+fn test_mul_fp12() -> Result<()> {
+    let out: usize = 224;
+
+    let mut rng = rand::thread_rng();
+    let f: Fp12 = rng.gen::<Fp12>();
+    let g: Fp12 = rng.gen::<Fp12>();
+    let h: Fp12 = gen_fp12_sparse(&mut rng);
+
+    let setup_normal: InterpreterMemoryInitialization =
+        setup_mul_fp12_test(out, f, g, "mul_fp254_12");
+    let setup_sparse: InterpreterMemoryInitialization =
+        setup_mul_fp12_test(out, f, h, "mul_fp254_12_sparse");
+    let setup_square: InterpreterMemoryInitialization =
+        setup_mul_fp12_test(out, f, f, "square_fp254_12");
+
+    let intrptr_normal: Interpreter = run_interpreter_with_memory(setup_normal).unwrap();
+    let intrptr_sparse: Interpreter = run_interpreter_with_memory(setup_sparse).unwrap();
+    let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap();
+
+    let out_normal: Vec<U256> = intrptr_normal.extract_kernel_memory(BnPairing, out..out + 12);
+    let out_sparse: Vec<U256> = intrptr_sparse.extract_kernel_memory(BnPairing, out..out + 12);
+    let out_square: Vec<U256> = intrptr_square.extract_kernel_memory(BnPairing, out..out + 12);
+
+    let exp_normal: Vec<U256> = (f * g).on_stack();
+    let exp_sparse: Vec<U256> = (f * h).on_stack();
+    let exp_square: Vec<U256> = (f * f).on_stack();
+
+    assert_eq!(out_normal, exp_normal);
+    assert_eq!(out_sparse, exp_sparse);
+    assert_eq!(out_square, exp_square);
+
+    Ok(())
+}
+
+fn setup_frob_fp6_test(f: Fp6, n: usize) -> InterpreterMemoryInitialization {
+    InterpreterMemoryInitialization {
+        label: String::from("test_frob_fp254_6_") + &(n.to_string()),
+        stack: f.on_stack(),
+        segment: BnPairing,
+        memory: vec![],
+    }
+}
+
+#[test]
+fn test_frob_fp6() -> Result<()> {
+    let mut rng = rand::thread_rng();
+    let f: Fp6 = rng.gen::<Fp6>();
+    for n in 1..4 {
+        let setup_frob = setup_frob_fp6_test(f, n);
+        let intrptr_frob: Interpreter = run_interpreter_with_memory(setup_frob).unwrap();
+        let out_frob: Vec<U256> = extract_stack(intrptr_frob);
+        let exp_frob: Vec<U256> = f.frob(n).on_stack();
+        assert_eq!(out_frob, exp_frob);
+    }
+    Ok(())
+}
+
+fn setup_frob_fp12_test(ptr: usize, f: Fp12, n: usize) -> InterpreterMemoryInitialization {
+    InterpreterMemoryInitialization {
+        label: String::from("test_frob_fp254_12_") + &(n.to_string()),
+        stack: vec![U256::from(ptr)],
+        segment: BnPairing,
+        memory: vec![(ptr, f.on_stack())],
+    }
+}
+
+#[test]
+fn test_frob_fp12() -> Result<()> {
+    let ptr: usize = 200;
+    let mut rng = rand::thread_rng();
+    let f: Fp12 = rng.gen::<Fp12>();
+    for n in [1, 2, 3, 6] {
+        let setup_frob = setup_frob_fp12_test(ptr, f, n);
+        let intrptr_frob: Interpreter = run_interpreter_with_memory(setup_frob).unwrap();
+        let out_frob: Vec<U256> = intrptr_frob.extract_kernel_memory(BnPairing, ptr..ptr + 12);
+        let exp_frob: Vec<U256> = f.frob(n).on_stack();
+        assert_eq!(out_frob, exp_frob);
+    }
+    Ok(())
+}
+
+#[test]
+fn test_inv_fp12() -> Result<()> {
+    let ptr: usize = 200;
+    let inv: usize = 212;
+    let mut rng = rand::thread_rng();
+    let f: Fp12 = rng.gen::<Fp12>();
+
+    let setup = InterpreterMemoryInitialization {
+        label: "inv_fp254_12".to_string(),
+        stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)],
+        segment: BnPairing,
+        memory: vec![(ptr, f.on_stack())],
+    };
+    let interpreter: Interpreter = run_interpreter_with_memory(setup).unwrap();
+    let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, inv..inv + 12);
+    let expected: Vec<U256> = f.inv().on_stack();
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
+
+#[test]
+fn test_invariant_exponent() -> Result<()> {
+    let ptr: usize = 200;
+    let mut rng = rand::thread_rng();
+    let f: Fp12 = rng.gen::<Fp12>();
+
+    let setup = InterpreterMemoryInitialization {
+        label: "bn254_invariant_exponent".to_string(),
+        stack: vec![U256::from(ptr), U256::from(0xdeadbeefu32)],
+        segment: BnPairing,
+        memory: vec![(ptr, f.on_stack())],
+    };
+
+    let interpreter: Interpreter = run_interpreter_with_memory(setup).unwrap();
+    let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, ptr..ptr + 12);
+    let expected: Vec<U256> = invariant_exponent(f).on_stack();
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
+
+// The curve is cyclic with generator (1, 2)
+pub const CURVE_GENERATOR: Curve = {
+    Curve {
+        x: Fp { val: U256::one() },
+        y: Fp {
+            val: U256([2, 0, 0, 0]),
+        },
+    }
+};
+
+// The twisted curve is cyclic with generator (x, y) as follows
+pub const TWISTED_GENERATOR: TwistedCurve = {
+    TwistedCurve {
+        x: Fp2 {
+            re: Fp {
+                val: U256([
+                    0x46debd5cd992f6ed,
+                    0x674322d4f75edadd,
+                    0x426a00665e5c4479,
+                    0x1800deef121f1e76,
+                ]),
+            },
+            im: Fp {
+                val: U256([
+                    0x97e485b7aef312c2,
+                    0xf1aa493335a9e712,
+                    0x7260bfb731fb5d25,
+                    0x198e9393920d483a,
+                ]),
+            },
+        },
+        y: Fp2 {
+            re: Fp {
+                val: U256([
+                    0x4ce6cc0166fa7daa,
+                    0xe3d1e7690c43d37b,
+                    0x4aab71808dcb408f,
+                    0x12c85ea5db8c6deb,
+                ]),
+            },
+            im: Fp {
+                val: U256([
+                    0x55acdadcd122975b,
+                    0xbc4b313370b38ef3,
+                    0xec9e99ad690c3395,
+                    0x090689d0585ff075,
+                ]),
+            },
+        },
+    }
+};
+
+#[test]
+fn test_miller() -> Result<()> {
+    let ptr: usize = 200;
+    let out: usize = 206;
+    let inputs: Vec<U256> = vec![
+        CURVE_GENERATOR.x.val,
+        CURVE_GENERATOR.y.val,
+        TWISTED_GENERATOR.x.re.val,
+        TWISTED_GENERATOR.x.im.val,
+        TWISTED_GENERATOR.y.re.val,
+        TWISTED_GENERATOR.y.im.val,
+    ];
+
+    let setup = InterpreterMemoryInitialization {
+        label: "bn254_miller".to_string(),
+        stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
+        segment: BnPairing,
+        memory: vec![(ptr, inputs)],
+    };
+    let interpreter = run_interpreter_with_memory(setup).unwrap();
+    let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, out..out + 12);
+    let expected = miller_loop(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
+
+#[test]
+fn test_tate() -> Result<()> {
+    let ptr: usize = 200;
+    let out: usize = 206;
+    let inputs: Vec<U256> = vec![
+        CURVE_GENERATOR.x.val,
+        CURVE_GENERATOR.y.val,
+        TWISTED_GENERATOR.x.re.val,
+        TWISTED_GENERATOR.x.im.val,
+        TWISTED_GENERATOR.y.re.val,
+        TWISTED_GENERATOR.y.im.val,
+    ];
+
+    let setup = InterpreterMemoryInitialization {
+        label: "bn254_tate".to_string(),
+        stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
+        segment: BnPairing,
+        memory: vec![(ptr, inputs)],
+    };
+    let interpreter = run_interpreter_with_memory(setup).unwrap();
+    let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, out..out + 12);
+    let expected = tate(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
--- a/evm/src/cpu/kernel/tests/fields.rs
+++ b/evm/src/cpu/kernel/tests/fields.rs
@ -1,203 +0,0 @@
-use anyhow::Result;
-use ethereum_types::U256;
-use rand::{thread_rng, Rng};
-
-use crate::cpu::kernel::aggregator::KERNEL;
-use crate::cpu::kernel::interpreter::run_interpreter;
-
-// TODO: 107 is hardcoded as a dummy prime for testing
-// should be changed to the proper implementation prime
-// once the run_{add, mul, sub}fp254 fns are implemented
-const P254: u32 = 107;
-
-fn add_fp(x: u32, y: u32) -> u32 {
-    (x + y) % P254
-}
-
-fn add3_fp(x: u32, y: u32, z: u32) -> u32 {
-    (x + y + z) % P254
-}
-
-fn mul_fp(x: u32, y: u32) -> u32 {
-    (x * y) % P254
-}
-
-fn sub_fp(x: u32, y: u32) -> u32 {
-    (P254 + x - y) % P254
-}
-
-fn add_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [add_fp(a, b), add_fp(a_, b_)]
-}
-
-fn add3_fp2(a: [u32; 2], b: [u32; 2], c: [u32; 2]) -> [u32; 2] {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    let [c, c_] = c;
-    [add3_fp(a, b, c), add3_fp(a_, b_, c_)]
-}
-
-// fn sub_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
-//     let [a, a_] = a;
-//     let [b, b_] = b;
-//     [sub_fp(a, b), sub_fp(a_, b_)]
-// }
-
-fn mul_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [
-        sub_fp(mul_fp(a, b), mul_fp(a_, b_)),
-        add_fp(mul_fp(a, b_), mul_fp(a_, b)),
-    ]
-}
-
-fn i9(a: [u32; 2]) -> [u32; 2] {
-    let [a, a_] = a;
-    [sub_fp(mul_fp(9, a), a_), add_fp(a, mul_fp(9, a_))]
-}
-
-// fn add_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
-//     let [c0, c1, c2] = c;
-//     let [d0, d1, d2] = d;
-
-//     let e0 = add_fp2(c0, d0);
-//     let e1 = add_fp2(c1, d1);
-//     let e2 = add_fp2(c2, d2);
-//     [e0, e1, e2]
-// }
-
-// fn sub_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
-//     let [c0, c1, c2] = c;
-//     let [d0, d1, d2] = d;
-
-//     let e0 = sub_fp2(c0, d0);
-//     let e1 = sub_fp2(c1, d1);
-//     let e2 = sub_fp2(c2, d2);
-//     [e0, e1, e2]
-// }
-
-fn mul_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
-    let [c0, c1, c2] = c;
-    let [d0, d1, d2] = d;
-
-    let c0d0 = mul_fp2(c0, d0);
-    let c0d1 = mul_fp2(c0, d1);
-    let c0d2 = mul_fp2(c0, d2);
-    let c1d0 = mul_fp2(c1, d0);
-    let c1d1 = mul_fp2(c1, d1);
-    let c1d2 = mul_fp2(c1, d2);
-    let c2d0 = mul_fp2(c2, d0);
-    let c2d1 = mul_fp2(c2, d1);
-    let c2d2 = mul_fp2(c2, d2);
-    let cd12 = add_fp2(c1d2, c2d1);
-
-    [
-        add_fp2(c0d0, i9(cd12)),
-        add3_fp2(c0d1, c1d0, i9(c2d2)),
-        add3_fp2(c0d2, c1d1, c2d0),
-    ]
-}
-
-// fn sh(c: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
-//     let [c0, c1, c2] = c;
-//     [i9(c2), c0, c1]
-// }
-
-// fn mul_fp12(f: [[[u32; 2]; 3]; 2], g: [[[u32; 2]; 3]; 2]) -> [[[u32; 2]; 3]; 2] {
-//     let [f0, f1] = f;
-//     let [g0, g1] = g;
-
-//     let h0 = mul_fp6(f0, g0);
-//     let h1 = mul_fp6(f1, g1);
-//     let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1));
-//     [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
-// }
-
-fn gen_fp6() -> [[u32; 2]; 3] {
-    let mut rng = thread_rng();
-    [
-        [rng.gen_range(0..P254), rng.gen_range(0..P254)],
-        [rng.gen_range(0..P254), rng.gen_range(0..P254)],
-        [rng.gen_range(0..P254), rng.gen_range(0..P254)],
-    ]
-}
-
-fn as_stack(xs: Vec<u32>) -> Vec<U256> {
-    xs.iter().map(|&x| U256::from(x)).rev().collect()
-}
-
-#[test]
-#[ignore]
-fn test_fp6() -> Result<()> {
-    let c = gen_fp6();
-    let d = gen_fp6();
-
-    let mut input: Vec<u32> = [c, d].into_iter().flatten().flatten().collect();
-    input.push(0xdeadbeef);
-
-    let initial_offset = KERNEL.global_labels["mul_fp6"];
-    let initial_stack: Vec<U256> = as_stack(input);
-    let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
-        .stack()
-        .to_vec();
-
-    let output: Vec<u32> = mul_fp6(c, d).into_iter().flatten().collect();
-    let expected = as_stack(output);
-
-    assert_eq!(final_stack, expected);
-
-    Ok(())
-}
-
-// fn make_initial_stack(
-//     f0: [[u32; 2]; 3],
-//     f1: [[u32; 2]; 3],
-//     g0: [[u32; 2]; 3],
-//     g1: [[u32; 2]; 3],
-// ) -> Vec<U256> {
-//     // stack: in0, f, in0', f', in1, g, in1', g', in1, out, in0, out
-//     let f0: Vec<u32> = f0.into_iter().flatten().collect();
-//     let f1: Vec<u32> = f1.into_iter().flatten().collect();
-//     let g0: Vec<u32> = g0.into_iter().flatten().collect();
-//     let g1: Vec<u32> = g1.into_iter().flatten().collect();
-
-//     let mut input = f0;
-//     input.extend(vec![0]);
-//     input.extend(f1);
-//     input.extend(g0);
-//     input.extend(vec![12]);
-//     input.extend(g1);
-//     input.extend(vec![12, 24, 0, 24]);
-
-//     as_stack(input)
-// }
-
-// #[test]
-// fn test_fp12() -> Result<()> {
-//     let f0 = gen_fp6();
-//     let f1 = gen_fp6();
-//     let g0 = gen_fp6();
-//     let g1 = gen_fp6();
-
-//     let kernel = combined_kernel();
-//     let initial_offset = kernel.global_labels["test_mul_Fp12"];
-//     let initial_stack: Vec<U256> = make_initial_stack(f0, f1, g0, g1);
-//     let final_stack: Vec<U256> = run_with_kernel(&kernel, initial_offset, initial_stack)?
-//         .stack()
-//         .to_vec();
-
-//     let mut output: Vec<u32> = mul_fp12([f0, f1], [g0, g1])
-//         .into_iter()
-//         .flatten()
-//         .flatten()
-//         .collect();
-//     output.extend(vec![24]);
-//     let expected = as_stack(output);
-
-//     assert_eq!(final_stack, expected);
-
-//     Ok(())
-// }
--- a/evm/src/cpu/kernel/tests/mod.rs
+++ b/evm/src/cpu/kernel/tests/mod.rs
@ -1,10 +1,10 @@
 mod account_code;
 mod balance;
 mod bignum;
+mod bn254;
 mod core;
 mod ecc;
 mod exp;
-mod fields;
 mod hash;
 mod mpt;
 mod packing;
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@ -1,15 +1,18 @@
+use std::mem::transmute;
 use std::str::FromStr;

 use anyhow::{bail, Error};
 use ethereum_types::{BigEndianHash, H256, U256};
 use plonky2::field::types::Field;

+use crate::bn254_arithmetic::Fp12;
 use crate::generation::prover_input::EvmField::{
    Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar,
 };
 use crate::generation::prover_input::FieldOp::{Inverse, Sqrt};
 use crate::generation::state::GenerationState;
-use crate::witness::util::stack_peek;
+use crate::memory::segments::Segment::BnPairing;
+use crate::witness::util::{kernel_peek, stack_peek};

 /// Prover input function represented as a scoped function name.
 /// Example: `PROVER_INPUT(ff::bn254_base::inverse)` is represented as `ProverInputFn([ff, bn254_base, inverse])`.
@ -27,6 +30,7 @@ impl<F: Field> GenerationState<F> {
        match input_fn.0[0].as_str() {
            "end_of_txns" => self.run_end_of_txns(),
            "ff" => self.run_ff(input_fn),
+            "ffe" => self.run_ffe(input_fn),
            "mpt" => self.run_mpt(),
            "rlp" => self.run_rlp(),
            "account_code" => self.run_account_code(input_fn),
@ -52,6 +56,31 @@ impl<F: Field> GenerationState<F> {
        field.op(op, x)
    }

+    /// Finite field extension operations.
+    fn run_ffe(&self, input_fn: &ProverInputFn) -> U256 {
+        let field = EvmField::from_str(input_fn.0[1].as_str()).unwrap();
+        let n = input_fn.0[2]
+            .as_str()
+            .split('_')
+            .nth(1)
+            .unwrap()
+            .parse::<usize>()
+            .unwrap();
+        let ptr = stack_peek(self, 11 - n).expect("Empty stack").as_usize();
+
+        let f: [U256; 12] = match field {
+            Bn254Base => {
+                let mut f: [U256; 12] = [U256::zero(); 12];
+                for i in 0..12 {
+                    f[i] = kernel_peek(self, BnPairing, ptr + i);
+                }
+                f
+            }
+            _ => todo!(),
+        };
+        field.field_extension_inverse(n, f)
+    }
+
    /// MPT data.
    fn run_mpt(&mut self) -> U256 {
        self.mpt_prover_inputs
@ -176,6 +205,12 @@ impl EvmField {
        );
        modexp(x, q, n)
    }
+
+    fn field_extension_inverse(&self, n: usize, f: [U256; 12]) -> U256 {
+        let f: Fp12 = unsafe { transmute(f) };
+        let f_inv: [U256; 12] = unsafe { transmute(f.inv()) };
+        f_inv[n]
+    }
 }

 fn modexp(x: U256, e: U256, n: U256) -> U256 {
--- a/evm/src/lib.rs
+++ b/evm/src/lib.rs
@ -8,6 +8,8 @@

 pub mod all_stark;
 pub mod arithmetic;
+pub mod bn254_arithmetic;
+pub mod bn254_pairing;
 pub mod config;
 pub mod constraint_consumer;
 pub mod cpu;
--- a/evm/src/memory/segments.rs
+++ b/evm/src/memory/segments.rs
@ -43,10 +43,11 @@ pub enum Segment {
    BnWnafA = 19,
    BnWnafB = 20,
    BnTableQ = 21,
+    BnPairing = 22,
 }

 impl Segment {
-    pub(crate) const COUNT: usize = 22;
+    pub(crate) const COUNT: usize = 23;

    pub(crate) fn all() -> [Self; Self::COUNT] {
        [
@ -72,6 +73,7 @@ impl Segment {
            Self::BnWnafA,
            Self::BnWnafB,
            Self::BnTableQ,
+            Self::BnPairing,
        ]
    }

@ -100,6 +102,7 @@ impl Segment {
            Segment::BnWnafA => "SEGMENT_KERNEL_BN_WNAF_A",
            Segment::BnWnafB => "SEGMENT_KERNEL_BN_WNAF_B",
            Segment::BnTableQ => "SEGMENT_KERNEL_BN_TABLE_Q",
+            Segment::BnPairing => "SEGMENT_KERNEL_BN_PAIRING",
        }
    }

@ -128,6 +131,7 @@ impl Segment {
            Segment::BnWnafA => 8,
            Segment::BnWnafB => 8,
            Segment::BnTableQ => 256,
+            Segment::BnPairing => 256,
        }
    }
 }
--- a/evm/src/witness/util.rs
+++ b/evm/src/witness/util.rs
@ -27,7 +27,7 @@ fn to_bits_le<F: Field>(n: u8) -> [F; 8] {
    res
 }

-/// Peak at the stack item `i`th from the top. If `i=0` this gives the tip.
+/// Peek at the stack item `i`th from the top. If `i=0` this gives the tip.
 pub(crate) fn stack_peek<F: Field>(state: &GenerationState<F>, i: usize) -> Option<U256> {
    if i >= state.registers.stack_len {
        return None;
@ -39,6 +39,17 @@ pub(crate) fn stack_peek<F: Field>(state: &GenerationState<F>, i: usize) -> Opti
    )))
 }

+/// Peek at kernel at specified segment and address
+pub(crate) fn kernel_peek<F: Field>(
+    state: &GenerationState<F>,
+    segment: Segment,
+    virt: usize,
+) -> U256 {
+    state
+        .memory
+        .get(MemoryAddress::new(state.registers.context, segment, virt))
+}
+
 pub(crate) fn mem_read_with_log<F: Field>(
    channel: MemoryChannel,
    address: MemoryAddress,