diff --git a/evm/src/arithmetic/mod.rs b/evm/src/arithmetic/mod.rs index 6ba8ed12..8d9592b7 100644 --- a/evm/src/arithmetic/mod.rs +++ b/evm/src/arithmetic/mod.rs @@ -1,6 +1,7 @@ use ethereum_types::U256; use plonky2::field::types::PrimeField64; +use crate::bn254_arithmetic::BN_BASE; use crate::util::{addmod, mulmod, submod}; mod addcy; @@ -47,9 +48,9 @@ impl BinaryOperator { } BinaryOperator::Lt => U256::from((input0 < input1) as u8), BinaryOperator::Gt => U256::from((input0 > input1) as u8), - BinaryOperator::AddFp254 => addmod(input0, input1, BN_BASE_ORDER), - BinaryOperator::MulFp254 => mulmod(input0, input1, BN_BASE_ORDER), - BinaryOperator::SubFp254 => submod(input0, input1, BN_BASE_ORDER), + BinaryOperator::AddFp254 => addmod(input0, input1, BN_BASE), + BinaryOperator::MulFp254 => mulmod(input0, input1, BN_BASE), + BinaryOperator::SubFp254 => submod(input0, input1, BN_BASE), } } @@ -211,15 +212,7 @@ fn binary_op_to_rows( ternary_op_to_rows::(op.row_filter(), input0, U256::zero(), input1, result) } BinaryOperator::AddFp254 | BinaryOperator::MulFp254 | BinaryOperator::SubFp254 => { - ternary_op_to_rows::(op.row_filter(), input0, input1, BN_BASE_ORDER, result) + ternary_op_to_rows::(op.row_filter(), input0, input1, BN_BASE, result) } } } - -/// Order of the BN254 base field. -const BN_BASE_ORDER: U256 = U256([ - 4332616871279656263, - 10917124144477883021, - 13281191951274694749, - 3486998266802970665, -]); diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs new file mode 100644 index 00000000..c2f1e3d4 --- /dev/null +++ b/evm/src/bn254_arithmetic.rs @@ -0,0 +1,876 @@ +use std::mem::transmute; +use std::ops::{Add, Div, Mul, Neg, Sub}; + +use ethereum_types::U256; +use rand::distributions::{Distribution, Standard}; +use rand::Rng; + +pub const BN_BASE: U256 = U256([ + 0x3c208c16d87cfd47, + 0x97816a916871ca8d, + 0xb85045b68181585d, + 0x30644e72e131a029, +]); + +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct Fp { + pub val: U256, +} + +impl Fp { + pub fn new(val: usize) -> Fp { + Fp { + val: U256::from(val), + } + } +} + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> Fp { + let xs = rng.gen::<[u64; 4]>(); + Fp { + val: U256(xs) % BN_BASE, + } + } +} + +impl Add for Fp { + type Output = Self; + + fn add(self, other: Self) -> Self { + Fp { + val: (self.val + other.val) % BN_BASE, + } + } +} + +impl Neg for Fp { + type Output = Self; + + fn neg(self) -> Self::Output { + Fp { + val: (BN_BASE - self.val) % BN_BASE, + } + } +} + +impl Sub for Fp { + type Output = Self; + + fn sub(self, other: Self) -> Self { + Fp { + val: (BN_BASE + self.val - other.val) % BN_BASE, + } + } +} + +#[allow(clippy::suspicious_arithmetic_impl)] +impl Mul for Fp { + type Output = Self; + + fn mul(self, other: Self) -> Self { + Fp { + val: U256::try_from((self.val).full_mul(other.val) % BN_BASE).unwrap(), + } + } +} + +impl Fp { + pub fn inv(self) -> Fp { + exp_fp(self, BN_BASE - 2) + } +} + +#[allow(clippy::suspicious_arithmetic_impl)] +impl Div for Fp { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + self * rhs.inv() + } +} + +pub const ZERO_FP: Fp = Fp { val: U256::zero() }; +pub const UNIT_FP: Fp = Fp { val: U256::one() }; + +fn exp_fp(x: Fp, e: U256) -> Fp { + let mut current = x; + let mut product = Fp { val: U256::one() }; + + for j in 0..256 { + if e.bit(j) { + product = product * current; + } + current = current * current; + } + product +} + +/// The degree 2 field extension Fp2 is given by adjoining i, the square root of -1, to Fp +/// The arithmetic in this extension is standard complex arithmetic +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct Fp2 { + pub re: Fp, + pub im: Fp, +} + +pub const ZERO_FP2: Fp2 = Fp2 { + re: ZERO_FP, + im: ZERO_FP, +}; + +pub const UNIT_FP2: Fp2 = Fp2 { + re: UNIT_FP, + im: ZERO_FP, +}; + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> Fp2 { + let (re, im) = rng.gen::<(Fp, Fp)>(); + Fp2 { re, im } + } +} + +impl Add for Fp2 { + type Output = Self; + + fn add(self, other: Self) -> Self { + Fp2 { + re: self.re + other.re, + im: self.im + other.im, + } + } +} + +impl Neg for Fp2 { + type Output = Self; + + fn neg(self) -> Self::Output { + Fp2 { + re: -self.re, + im: -self.im, + } + } +} + +impl Sub for Fp2 { + type Output = Self; + + fn sub(self, other: Self) -> Self { + Fp2 { + re: self.re - other.re, + im: self.im - other.im, + } + } +} + +impl Mul for Fp2 { + type Output = Self; + + fn mul(self, other: Self) -> Self { + Fp2 { + re: self.re * other.re - self.im * other.im, + im: self.re * other.im + self.im * other.re, + } + } +} + +impl Fp2 { + // We preemptively define a helper function which multiplies an Fp2 element by 9 + i + fn i9(self) -> Fp2 { + let nine = Fp::new(9); + Fp2 { + re: nine * self.re - self.im, + im: self.re + nine * self.im, + } + } + + // This function scalar multiplies an Fp2 by an Fp + pub fn scale(self, x: Fp) -> Fp2 { + Fp2 { + re: x * self.re, + im: x * self.im, + } + } + + /// Return the complex conjugate z' of z: Fp2 + /// This also happens to be the frobenius map + /// z -> z^p + /// since p == 3 mod 4 and hence + /// i^p = i^3 = -i + fn conj(self) -> Fp2 { + Fp2 { + re: self.re, + im: -self.im, + } + } + + // Return the magnitude squared of a complex number + fn norm_sq(self) -> Fp { + self.re * self.re + self.im * self.im + } + + /// The inverse of z is given by z'/||z||^2 since ||z||^2 = zz' + pub fn inv(self) -> Fp2 { + let norm_sq = self.norm_sq(); + self.conj().scale(norm_sq.inv()) + } +} + +#[allow(clippy::suspicious_arithmetic_impl)] +impl Div for Fp2 { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + self * rhs.inv() + } +} + +/// The degree 3 field extension Fp6 over Fp2 is given by adjoining t, where t^3 = 9 + i +// Fp6 has basis 1, t, t^2 over Fp2 +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct Fp6 { + pub t0: Fp2, + pub t1: Fp2, + pub t2: Fp2, +} + +pub const ZERO_FP6: Fp6 = Fp6 { + t0: ZERO_FP2, + t1: ZERO_FP2, + t2: ZERO_FP2, +}; + +pub const UNIT_FP6: Fp6 = Fp6 { + t0: UNIT_FP2, + t1: ZERO_FP2, + t2: ZERO_FP2, +}; + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> Fp6 { + let (t0, t1, t2) = rng.gen::<(Fp2, Fp2, Fp2)>(); + Fp6 { t0, t1, t2 } + } +} + +impl Add for Fp6 { + type Output = Self; + + fn add(self, other: Self) -> Self { + Fp6 { + t0: self.t0 + other.t0, + t1: self.t1 + other.t1, + t2: self.t2 + other.t2, + } + } +} + +impl Neg for Fp6 { + type Output = Self; + + fn neg(self) -> Self::Output { + Fp6 { + t0: -self.t0, + t1: -self.t1, + t2: -self.t2, + } + } +} + +impl Sub for Fp6 { + type Output = Self; + + fn sub(self, other: Self) -> Self { + Fp6 { + t0: self.t0 - other.t0, + t1: self.t1 - other.t1, + t2: self.t2 - other.t2, + } + } +} + +impl Mul for Fp6 { + type Output = Self; + + fn mul(self, other: Self) -> Self { + Fp6 { + t0: self.t0 * other.t0 + (self.t1 * other.t2 + self.t2 * other.t1).i9(), + t1: self.t0 * other.t1 + self.t1 * other.t0 + (self.t2 * other.t2).i9(), + t2: self.t0 * other.t2 + self.t1 * other.t1 + self.t2 * other.t0, + } + } +} + +impl Fp6 { + // This function scalar multiplies an Fp6 by an Fp2 + fn scale(self, x: Fp2) -> Fp6 { + Fp6 { + t0: x * self.t0, + t1: x * self.t1, + t2: x * self.t2, + } + } + + /// This function multiplies an Fp6 element by t, and hence shifts the bases, + /// where the t^2 coefficient picks up a factor of 9+i as the 1 coefficient of the output + fn sh(self) -> Fp6 { + Fp6 { + t0: self.t2.i9(), + t1: self.t0, + t2: self.t1, + } + } + + /// The nth frobenius endomorphism of a p^q field is given by mapping + /// x to x^(p^n) + /// which sends a + bt + ct^2: Fp6 to + /// a^(p^n) + b^(p^n) * t^(p^n) + c^(p^n) * t^(2p^n) + /// The Fp2 coefficients are determined by the comment in the conj method, + /// while the values of + /// t^(p^n) and t^(2p^n) + /// are precomputed in the constant arrays FROB_T1 and FROB_T2 + pub fn frob(self, n: usize) -> Fp6 { + let n = n % 6; + let frob_t1 = FROB_T1[n]; + let frob_t2 = FROB_T2[n]; + + if n % 2 != 0 { + Fp6 { + t0: self.t0.conj(), + t1: frob_t1 * self.t1.conj(), + t2: frob_t2 * self.t2.conj(), + } + } else { + Fp6 { + t0: self.t0, + t1: frob_t1 * self.t1, + t2: frob_t2 * self.t2, + } + } + } + + /// Let x_n = x^(p^n) and note that + /// x_0 = x^(p^0) = x^1 = x + /// (x_n)_m = (x^(p^n))^(p^m) = x^(p^n * p^m) = x^(p^(n+m)) = x_{n+m} + /// By Galois Theory, given x: Fp6, the product + /// phi = x_0 * x_1 * x_2 * x_3 * x_4 * x_5 + /// lands in Fp, and hence the inverse of x is given by + /// (x_1 * x_2 * x_3 * x_4 * x_5) / phi + /// We can save compute by rearranging the numerator: + /// (x_1 * x_3) * x_5 * (x_1 * x_3)_1 + /// By Galois theory, the following are in Fp2 and are complex conjugates + /// x_1 * x_3 * x_5, x_0 * x_2 * x_4 + /// and therefore + /// phi = ||x_1 * x_3 * x_5||^2 + /// and hence the inverse is given by + /// ([x_1 * x_3] * x_5) * [x_1 * x_3]_1 / ||[x_1 * x_3] * x_5||^2 + pub fn inv(self) -> Fp6 { + let prod_13 = self.frob(1) * self.frob(3); + let prod_135 = (prod_13 * self.frob(5)).t0; + let phi = prod_135.norm_sq(); + let prod_odds_over_phi = prod_135.scale(phi.inv()); + let prod_24 = prod_13.frob(1); + prod_24.scale(prod_odds_over_phi) + } + + pub fn on_stack(self) -> Vec { + let f: [U256; 6] = unsafe { transmute(self) }; + f.into_iter().collect() + } +} + +#[allow(clippy::suspicious_arithmetic_impl)] +impl Div for Fp6 { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + self * rhs.inv() + } +} + +/// The degree 2 field extension Fp12 over Fp6 is given by adjoining z, where z^2 = t. +/// It thus has basis 1, z over Fp6 +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct Fp12 { + pub z0: Fp6, + pub z1: Fp6, +} + +pub const UNIT_FP12: Fp12 = Fp12 { + z0: UNIT_FP6, + z1: ZERO_FP6, +}; + +impl Distribution for Standard { + fn sample(&self, rng: &mut R) -> Fp12 { + let (z0, z1) = rng.gen::<(Fp6, Fp6)>(); + Fp12 { z0, z1 } + } +} + +impl Mul for Fp12 { + type Output = Self; + + fn mul(self, other: Self) -> Self { + let h0 = self.z0 * other.z0; + let h1 = self.z1 * other.z1; + let h01 = (self.z0 + self.z1) * (other.z0 + other.z1); + Fp12 { + z0: h0 + h1.sh(), + z1: h01 - (h0 + h1), + } + } +} + +impl Fp12 { + // This function scalar multiplies an Fp12 by an Fp6 + fn scale(self, x: Fp6) -> Fp12 { + Fp12 { + z0: x * self.z0, + z1: x * self.z1, + } + } + + fn conj(self) -> Fp12 { + Fp12 { + z0: self.z0, + z1: -self.z1, + } + } + /// The nth frobenius endomorphism of a p^q field is given by mapping + /// x to x^(p^n) + /// which sends a + bz: Fp12 to + /// a^(p^n) + b^(p^n) * z^(p^n) + /// where the values of z^(p^n) are precomputed in the constant array FROB_Z + pub fn frob(self, n: usize) -> Fp12 { + let n = n % 12; + Fp12 { + z0: self.z0.frob(n), + z1: self.z1.frob(n).scale(FROB_Z[n]), + } + } + + /// By Galois Theory, given x: Fp12, the product + /// phi = Prod_{i=0}^11 x_i + /// lands in Fp, and hence the inverse of x is given by + /// (Prod_{i=1}^11 x_i) / phi + /// The 6th Frob map is nontrivial but leaves Fp6 fixed and hence must be the conjugate: + /// x_6 = (a + bz)_6 = a - bz = x.conj() + /// Letting prod_17 = x_1 * x_7, the remaining factors in the numerator can be expresed as: + /// [(prod_17) * (prod_17)_2] * (prod_17)_4 * [(prod_17) * (prod_17)_2]_1 + /// By Galois theory, both the following are in Fp2 and are complex conjugates + /// prod_odds, prod_evens + /// Thus phi = ||prod_odds||^2, and hence the inverse is given by + /// prod_odds * prod_evens_except_six * x.conj() / ||prod_odds||^2 + pub fn inv(self) -> Fp12 { + let prod_17 = (self.frob(1) * self.frob(7)).z0; + let prod_1379 = prod_17 * prod_17.frob(2); + let prod_odds = (prod_1379 * prod_17.frob(4)).t0; + let phi = prod_odds.norm_sq(); + let prod_odds_over_phi = prod_odds.scale(phi.inv()); + let prod_evens_except_six = prod_1379.frob(1); + let prod_except_six = prod_evens_except_six.scale(prod_odds_over_phi); + self.conj().scale(prod_except_six) + } + + pub fn on_stack(self) -> Vec { + let f: [U256; 12] = unsafe { transmute(self) }; + f.into_iter().collect() + } +} + +#[allow(clippy::suspicious_arithmetic_impl)] +impl Div for Fp12 { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + self * rhs.inv() + } +} + +const FROB_T1: [Fp2; 6] = [ + Fp2 { + re: Fp { val: U256::one() }, + im: Fp { val: U256::zero() }, + }, + Fp2 { + re: Fp { + val: U256([ + 0x99e39557176f553d, + 0xb78cc310c2c3330c, + 0x4c0bec3cf559b143, + 0x2fb347984f7911f7, + ]), + }, + im: Fp { + val: U256([ + 0x1665d51c640fcba2, + 0x32ae2a1d0b7c9dce, + 0x4ba4cc8bd75a0794, + 0x16c9e55061ebae20, + ]), + }, + }, + Fp2 { + re: Fp { + val: U256([ + 0xe4bd44e5607cfd48, + 0xc28f069fbb966e3d, + 0x5e6dd9e7e0acccb0, + 0x30644e72e131a029, + ]), + }, + im: Fp { val: U256::zero() }, + }, + Fp2 { + re: Fp { + val: U256([ + 0x7b746ee87bdcfb6d, + 0x805ffd3d5d6942d3, + 0xbaff1c77959f25ac, + 0x0856e078b755ef0a, + ]), + }, + im: Fp { + val: U256([ + 0x380cab2baaa586de, + 0x0fdf31bf98ff2631, + 0xa9f30e6dec26094f, + 0x04f1de41b3d1766f, + ]), + }, + }, + Fp2 { + re: Fp { + val: U256([ + 0x5763473177fffffe, + 0xd4f263f1acdb5c4f, + 0x59e26bcea0d48bac, + 0x0, + ]), + }, + im: Fp { val: U256::zero() }, + }, + Fp2 { + re: Fp { + val: U256([ + 0x62e913ee1dada9e4, + 0xf71614d4b0b71f3a, + 0x699582b87809d9ca, + 0x28be74d4bb943f51, + ]), + }, + im: Fp { + val: U256([ + 0xedae0bcec9c7aac7, + 0x54f40eb4c3f6068d, + 0xc2b86abcbe01477a, + 0x14a88ae0cb747b99, + ]), + }, + }, +]; + +const FROB_T2: [Fp2; 6] = [ + Fp2 { + re: Fp { val: U256::one() }, + im: Fp { val: U256::zero() }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0x848a1f55921ea762, + 0xd33365f7be94ec72, + 0x80f3c0b75a181e84, + 0x05b54f5e64eea801, + ]), + } + }, + im: { + Fp { + val: U256([ + 0xc13b4711cd2b8126, + 0x3685d2ea1bdec763, + 0x9f3a80b03b0b1c92, + 0x2c145edbe7fd8aee, + ]), + } + }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0x5763473177fffffe, + 0xd4f263f1acdb5c4f, + 0x59e26bcea0d48bac, + 0x0, + ]), + } + }, + im: { Fp { val: U256::zero() } }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0x0e1a92bc3ccbf066, + 0xe633094575b06bcb, + 0x19bee0f7b5b2444e, + 0xbc58c6611c08dab, + ]), + } + }, + im: { + Fp { + val: U256([ + 0x5fe3ed9d730c239f, + 0xa44a9e08737f96e5, + 0xfeb0f6ef0cd21d04, + 0x23d5e999e1910a12, + ]), + } + }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0xe4bd44e5607cfd48, + 0xc28f069fbb966e3d, + 0x5e6dd9e7e0acccb0, + 0x30644e72e131a029, + ]), + } + }, + im: { Fp { val: U256::zero() } }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0xa97bda050992657f, + 0xde1afb54342c724f, + 0x1d9da40771b6f589, + 0x1ee972ae6a826a7d, + ]), + } + }, + im: { + Fp { + val: U256([ + 0x5721e37e70c255c9, + 0x54326430418536d1, + 0xd2b513cdbb257724, + 0x10de546ff8d4ab51, + ]), + } + }, + }, +]; + +const FROB_Z: [Fp2; 12] = [ + Fp2 { + re: { Fp { val: U256::one() } }, + im: { Fp { val: U256::zero() } }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0xd60b35dadcc9e470, + 0x5c521e08292f2176, + 0xe8b99fdd76e68b60, + 0x1284b71c2865a7df, + ]), + } + }, + im: { + Fp { + val: U256([ + 0xca5cf05f80f362ac, + 0x747992778eeec7e5, + 0xa6327cfe12150b8e, + 0x246996f3b4fae7e6, + ]), + } + }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0xe4bd44e5607cfd49, + 0xc28f069fbb966e3d, + 0x5e6dd9e7e0acccb0, + 0x30644e72e131a029, + ]), + } + }, + im: { Fp { val: U256::zero() } }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0xe86f7d391ed4a67f, + 0x894cb38dbe55d24a, + 0xefe9608cd0acaa90, + 0x19dc81cfcc82e4bb, + ]), + } + }, + im: { + Fp { + val: U256([ + 0x7694aa2bf4c0c101, + 0x7f03a5e397d439ec, + 0x06cbeee33576139d, + 0xabf8b60be77d73, + ]), + } + }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0xe4bd44e5607cfd48, + 0xc28f069fbb966e3d, + 0x5e6dd9e7e0acccb0, + 0x30644e72e131a029, + ]), + } + }, + im: { Fp { val: U256::zero() } }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0x1264475e420ac20f, + 0x2cfa95859526b0d4, + 0x072fc0af59c61f30, + 0x757cab3a41d3cdc, + ]), + } + }, + im: { + Fp { + val: U256([ + 0xe85845e34c4a5b9c, + 0xa20b7dfd71573c93, + 0x18e9b79ba4e2606c, + 0xca6b035381e35b6, + ]), + } + }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0x3c208c16d87cfd46, + 0x97816a916871ca8d, + 0xb85045b68181585d, + 0x30644e72e131a029, + ]), + } + }, + im: { Fp { val: U256::zero() } }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0x6615563bfbb318d7, + 0x3b2f4c893f42a916, + 0xcf96a5d90a9accfd, + 0x1ddf9756b8cbf849, + ]), + } + }, + im: { + Fp { + val: U256([ + 0x71c39bb757899a9b, + 0x2307d819d98302a7, + 0x121dc8b86f6c4ccf, + 0x0bfab77f2c36b843, + ]), + } + }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0x5763473177fffffe, + 0xd4f263f1acdb5c4f, + 0x59e26bcea0d48bac, + 0x0, + ]), + } + }, + im: { Fp { val: U256::zero() } }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0x53b10eddb9a856c8, + 0x0e34b703aa1bf842, + 0xc866e529b0d4adcd, + 0x1687cca314aebb6d, + ]), + } + }, + im: { + Fp { + val: U256([ + 0xc58be1eae3bc3c46, + 0x187dc4add09d90a0, + 0xb18456d34c0b44c0, + 0x2fb855bcd54a22b6, + ]), + } + }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0x5763473177ffffff, + 0xd4f263f1acdb5c4f, + 0x59e26bcea0d48bac, + 0x0, + ]), + } + }, + im: { Fp { val: U256::zero() } }, + }, + Fp2 { + re: { + Fp { + val: U256([ + 0x29bc44b896723b38, + 0x6a86d50bd34b19b9, + 0xb120850727bb392d, + 0x290c83bf3d14634d, + ]), + } + }, + im: { + Fp { + val: U256([ + 0x53c846338c32a1ab, + 0xf575ec93f71a8df9, + 0x9f668e1adc9ef7f0, + 0x23bd9e3da9136a73, + ]), + } + }, + }, +]; diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs new file mode 100644 index 00000000..bf5db74a --- /dev/null +++ b/evm/src/bn254_pairing.rs @@ -0,0 +1,353 @@ +use std::ops::Add; + +use rand::Rng; + +use crate::bn254_arithmetic::{Fp, Fp12, Fp2, Fp6, UNIT_FP12, ZERO_FP, ZERO_FP2}; + +// The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2 +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct Curve { + pub x: Fp, + pub y: Fp, +} + +/// Standard addition formula for elliptic curves, restricted to the cases +/// where neither inputs nor output would ever be the identity O. source: +/// https://en.wikipedia.org/wiki/Elliptic_curve#Algebraic_interpretation +impl Add for Curve { + type Output = Self; + + fn add(self, other: Self) -> Self { + let m = if self == other { + Fp::new(3) * self.x * self.x / (Fp::new(2) * self.y) + } else { + (other.y - self.y) / (other.x - self.x) + }; + let x = m * m - (self.x + other.x); + Curve { + x, + y: m * (self.x - x) - self.y, + } + } +} + +// The twisted curve consists of pairs (x, y): (Fp2, Fp2) | y^2 = x^3 + 3/(9 + i) +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct TwistedCurve { + pub x: Fp2, + pub y: Fp2, +} + +// The tate pairing takes a point each from the curve and its twist and outputs an Fp12 element +pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 { + let miller_output = miller_loop(p, q); + invariant_exponent(miller_output) +} + +/// Standard code for miller loop, can be found on page 99 at this url: +/// https://static1.squarespace.com/static/5fdbb09f31d71c1227082339/t/5ff394720493bd28278889c6/1609798774687/PairingsForBeginners.pdf#page=107 +/// where EXP is a hardcoding of the array of Booleans that the loop traverses +pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 { + let mut r = p; + let mut acc = UNIT_FP12; + let mut line; + + for i in EXP { + line = tangent(r, q); + r = r + r; + acc = line * acc * acc; + if i { + line = cord(p, r, q); + r = r + p; + acc = line * acc; + } + } + acc +} + +/// The sloped line function for doubling a point +pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 { + let cx = -Fp::new(3) * p.x * p.x; + let cy = Fp::new(2) * p.y; + sparse_embed(p.y * p.y - Fp::new(9), q.x.scale(cx), q.y.scale(cy)) +} + +/// The sloped line function for adding two points +pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 { + let cx = p2.y - p1.y; + let cy = p1.x - p2.x; + sparse_embed(p1.y * p2.x - p2.y * p1.x, q.x.scale(cx), q.y.scale(cy)) +} + +/// The tangent and cord functions output sparse Fp12 elements. +/// This map embeds the nonzero coefficients into an Fp12. +pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 { + let g0 = Fp6 { + t0: Fp2 { + re: g000, + im: ZERO_FP, + }, + t1: g01, + t2: ZERO_FP2, + }; + + let g1 = Fp6 { + t0: ZERO_FP2, + t1: g11, + t2: ZERO_FP2, + }; + + Fp12 { z0: g0, z1: g1 } +} + +pub fn gen_fp12_sparse(rng: &mut R) -> Fp12 { + sparse_embed(rng.gen::(), rng.gen::(), rng.gen::()) +} + +/// The output y of the miller loop is not an invariant, +/// but one gets an invariant by raising y to the power +/// (p^12 - 1)/N = (p^6 - 1)(p^2 + 1)(p^4 - p^2 + 1)/N +/// where N is the cyclic group order of the curve. +/// To achieve this, we first exponentiate y by p^6 - 1 via +/// y = y_6 / y +/// and then exponentiate the result by p^2 + 1 via +/// y = y_2 * y +/// We then note that (p^4 - p^2 + 1)/N can be rewritten as +/// (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0 +/// where 0 < a0, a1, a2 < p. Then the final power is given by +/// y = y_3 * (y^a2)_2 * (y^-a1)_1 * (y^-a0) +pub fn invariant_exponent(f: Fp12) -> Fp12 { + let mut y = f.frob(6) / f; + y = y.frob(2) * y; + let (y_a2, y_a1, y_a0) = get_custom_powers(y); + y.frob(3) * y_a2.frob(2) * y_a1.frob(1) * y_a0 +} + +/// We first together (so as to avoid repeated steps) compute +/// y^a4, y^a2, y^a0 +/// where a1 is given by +/// a1 = a4 + 2a2 - a0 +/// we then invert y^a0 and return +/// y^a2, y^a1 = y^a4 * y^a2 * y^a2 * y^(-a0), y^(-a0) +/// +/// Representing a4, a2, a0 in *little endian* binary, define +/// EXPS4 = [(a4[i], a2[i], a0[i]) for i in 0..len(a4)] +/// EXPS2 = [ (a2[i], a0[i]) for i in len(a4)..len(a2)] +/// EXPS0 = [ a0[i] for i in len(a2)..len(a0)] +fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) { + let mut sq: Fp12 = f; + let mut y0: Fp12 = UNIT_FP12; + let mut y2: Fp12 = UNIT_FP12; + let mut y4: Fp12 = UNIT_FP12; + + // proceed via standard squaring algorithm for exponentiation + + // must keep multiplying all three values: a4, a2, a0 + for (a, b, c) in EXPS4 { + if a { + y4 = y4 * sq; + } + if b { + y2 = y2 * sq; + } + if c { + y0 = y0 * sq; + } + sq = sq * sq; + } + // leading term of a4 is always 1 + y4 = y4 * sq; + + // must keep multiplying remaining two values: a2, a0 + for (a, b) in EXPS2 { + if a { + y2 = y2 * sq; + } + if b { + y0 = y0 * sq; + } + sq = sq * sq; + } + // leading term of a2 is always 1 + y2 = y2 * sq; + + // must keep multiplying final remaining value: a0 + for a in EXPS0 { + if a { + y0 = y0 * sq; + } + sq = sq * sq; + } + // leading term of a0 is always 1 + y0 = y0 * sq; + + // invert y0 to compute y^(-a0) + let y0_inv = y0.inv(); + + // return y^a2 = y2, y^a1 = y4 * y2^2 * y^(-a0), y^(-a0) + (y2, y4 * y2 * y2 * y0_inv, y0_inv) +} + +const EXP: [bool; 253] = [ + true, false, false, false, false, false, true, true, false, false, true, false, false, false, + true, false, false, true, true, true, false, false, true, true, true, false, false, true, + false, true, true, true, false, false, false, false, true, false, false, true, true, false, + false, false, true, true, false, true, false, false, false, false, false, false, false, true, + false, true, false, false, true, true, false, true, true, true, false, false, false, false, + true, false, true, false, false, false, false, false, true, false, false, false, true, false, + true, true, false, true, true, false, true, true, false, true, false, false, false, false, + false, false, true, true, false, false, false, false, false, false, true, false, true, false, + true, true, false, false, false, false, true, false, true, true, true, false, true, false, + false, true, false, true, false, false, false, false, false, true, true, false, false, true, + true, true, true, true, false, true, false, false, false, false, true, false, false, true, + false, false, false, false, true, true, true, true, false, false, true, true, false, true, + true, true, false, false, true, false, true, true, true, false, false, false, false, true, + false, false, true, false, false, false, true, false, true, false, false, false, false, true, + true, true, true, true, false, false, false, false, true, true, true, true, true, false, true, + false, true, true, false, false, true, false, false, true, true, true, true, true, true, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, + false, +]; + +// The folowing constants are defined above get_custom_powers + +const EXPS4: [(bool, bool, bool); 64] = [ + (true, true, false), + (true, true, true), + (true, true, true), + (false, false, false), + (false, false, true), + (true, false, true), + (false, true, false), + (true, false, true), + (true, true, false), + (true, false, true), + (false, true, false), + (true, true, false), + (true, true, false), + (true, true, false), + (false, true, false), + (false, true, false), + (false, false, true), + (true, false, true), + (true, true, false), + (false, true, false), + (true, true, false), + (true, true, false), + (true, true, false), + (false, false, true), + (false, false, true), + (true, false, true), + (true, false, true), + (true, true, false), + (true, false, false), + (true, true, false), + (false, true, false), + (true, true, false), + (true, false, false), + (false, true, false), + (false, false, false), + (true, false, false), + (true, false, false), + (true, false, true), + (false, false, true), + (false, true, true), + (false, false, true), + (false, true, true), + (false, true, true), + (false, false, false), + (true, true, true), + (true, false, true), + (true, false, true), + (false, true, true), + (true, false, true), + (false, true, true), + (false, true, true), + (true, true, false), + (true, true, false), + (true, true, false), + (true, false, false), + (false, false, true), + (true, false, false), + (false, false, true), + (true, false, true), + (true, true, false), + (true, true, true), + (false, true, true), + (false, true, false), + (true, true, true), +]; + +const EXPS2: [(bool, bool); 62] = [ + (true, false), + (true, true), + (false, false), + (true, false), + (true, false), + (true, true), + (true, false), + (true, true), + (true, false), + (false, true), + (false, true), + (true, true), + (true, true), + (false, false), + (true, true), + (false, false), + (false, false), + (false, true), + (false, true), + (true, true), + (true, true), + (true, true), + (false, true), + (true, true), + (false, false), + (true, true), + (true, false), + (true, true), + (false, false), + (true, true), + (true, true), + (true, false), + (false, false), + (false, true), + (false, false), + (true, true), + (false, true), + (false, false), + (true, false), + (false, true), + (false, true), + (true, false), + (false, true), + (false, false), + (false, false), + (false, false), + (false, true), + (true, false), + (true, true), + (false, true), + (true, true), + (true, false), + (false, true), + (false, false), + (true, false), + (false, true), + (true, false), + (true, true), + (true, false), + (true, true), + (false, true), + (true, true), +]; + +const EXPS0: [bool; 65] = [ + false, false, true, false, false, true, true, false, true, false, true, true, true, false, + true, false, false, false, true, false, false, true, false, true, false, true, true, false, + false, false, false, false, true, false, true, false, true, true, true, false, false, true, + true, true, true, false, true, false, true, true, false, false, true, false, false, false, + true, true, true, true, false, false, true, true, false, +]; diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index e204f26f..80dd9392 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -27,12 +27,19 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/core/terminate.asm"), include_str!("asm/core/transfer.asm"), include_str!("asm/core/util.asm"), - include_str!("asm/curve/bn254/curve_add.asm"), - include_str!("asm/curve/bn254/curve_mul.asm"), - include_str!("asm/curve/bn254/moddiv.asm"), - include_str!("asm/curve/bn254/glv.asm"), - include_str!("asm/curve/bn254/msm.asm"), - include_str!("asm/curve/bn254/precomputation.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/glv.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/invariant_exponent.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/msm.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/precomputation.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"), + include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"), + include_str!("asm/curve/bn254/field_arithmetic/degree_6_mul.asm"), + include_str!("asm/curve/bn254/field_arithmetic/degree_12_mul.asm"), + include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"), + include_str!("asm/curve/bn254/field_arithmetic/util.asm"), include_str!("asm/curve/common.asm"), include_str!("asm/curve/secp256k1/curve_add.asm"), include_str!("asm/curve/secp256k1/ecrecover.asm"), @@ -43,9 +50,6 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/curve/secp256k1/precomputation.asm"), include_str!("asm/curve/wnaf.asm"), include_str!("asm/exp.asm"), - include_str!("asm/fields/fp6_macros.asm"), - include_str!("asm/fields/fp6_mul.asm"), - include_str!("asm/fields/fp12_mul.asm"), include_str!("asm/halt.asm"), include_str!("asm/hash/blake2b/addresses.asm"), include_str!("asm/hash/blake2b/compression.asm"), diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm new file mode 100644 index 00000000..20882c05 --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm @@ -0,0 +1,88 @@ +/// miller_data is defined by +/// (1) taking the binary expansion of N254, the order of the elliptic curve group +/// (2) popping the first and last elements, then appending a 0: +/// exp = bin(N254)[1:-1] + [0] +/// (3) counting the lengths of runs of 1s then 0s in exp, e.g. +/// if exp = 1100010011110, then EXP = [(2,3), (1,2), (4,1)] +/// (4) byte encoding each pair (n,m) as follows: +/// miller_data = [(0x20)n + m for (n,m) in EXP] + +global miller_data: + BYTES 0xdc, 0x22, 0x42, 0x21 + BYTES 0xa1, 0xa4, 0x24, 0x21 + BYTES 0x23, 0x22, 0x64, 0x21 + BYTES 0x62, 0x41, 0x82, 0x24 + BYTES 0x22, 0x24, 0xa1, 0x42 + BYTES 0x25, 0x21, 0x22, 0x61 + BYTES 0x21, 0x44, 0x21, 0x21 + BYTES 0x46, 0x26, 0x41, 0x41 + BYTES 0x41, 0x21, 0x23, 0x25 + BYTES 0x21, 0x64, 0x41, 0x22 + BYTES 0x21, 0x27, 0x41, 0x43 + BYTES 0x22, 0x64, 0x21, 0x62 + BYTES 0x62, 0x22, 0x23, 0x42 + BYTES 0x25 + + +/// final_exp first computes y^a4, y^a2, y^a0 +/// representing a4, a2, a0 in *little endian* binary, define +/// EXPS4 = [(a4[i], a2[i], a0[i]) for i in 0..len(a4)] +/// EXPS2 = [ (a2[i], a0[i]) for i in len(a4)..len(a2)] +/// EXPS0 = [ a0[i] for i in len(a2)..len(a0)] +/// power_data_n is simply a reverse-order byte encoding of EXPSn +/// where (i,j,k) is sent to (100)i + (10)j + k + +global power_data_4: + BYTES 111, 010, 011, 111 + BYTES 110, 101, 001, 100 + BYTES 001, 100, 110, 110 + BYTES 110, 011, 011, 101 + BYTES 011, 101, 101, 111 + BYTES 000, 011, 011, 001 + BYTES 011, 001, 101, 100 + BYTES 100, 000, 010, 100 + BYTES 110, 010, 110, 100 + BYTES 110, 101, 101, 001 + BYTES 001, 110, 110, 110 + BYTES 010, 110, 101, 001 + BYTES 010, 010, 110, 110 + BYTES 110, 010, 101, 110 + BYTES 101, 010, 101, 001 + BYTES 000, 111, 111, 110 + +global power_data_2: + BYTES 11, 01, 11, 10 + BYTES 11, 10, 01, 10 + BYTES 00, 01, 10, 11 + BYTES 01, 11, 10, 01 + BYTES 00, 00, 00, 01 + BYTES 10, 01, 01, 10 + BYTES 00, 01, 11, 00 + BYTES 01, 00, 10, 11 + BYTES 11, 00, 11, 10 + BYTES 11, 00, 11, 01 + BYTES 11, 11, 11, 01 + BYTES 01, 00, 00, 11 + BYTES 00, 11, 11, 01 + BYTES 01, 10, 11, 10 + BYTES 11, 10, 10, 00 + BYTES 11, 10 + +global power_data_0: + BYTES 0, 1, 1, 0 + BYTES 0, 1, 1, 1 + BYTES 1, 0, 0, 0 + BYTES 1, 0, 0, 1 + BYTES 1, 0, 1, 0 + BYTES 1, 1, 1, 1 + BYTES 0, 0, 1, 1 + BYTES 1, 0, 1, 0 + BYTES 1, 0, 0, 0 + BYTES 0, 0, 1, 1 + BYTES 0, 1, 0, 1 + BYTES 0, 0, 1, 0 + BYTES 0, 0, 1, 0 + BYTES 1, 1, 1, 0 + BYTES 1, 0, 1, 1 + BYTES 0, 0, 1, 0 + BYTES 0 diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm similarity index 63% rename from evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm index 3e917120..aacb7d3a 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm @@ -1,16 +1,6 @@ -// #define N 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 // BN254 base field order - // BN254 elliptic curve addition. // Uses the standard affine addition formula. global bn_add: - // Uncomment for test inputs. - // PUSH 0xdeadbeef - // PUSH 2 - // PUSH 1 - // PUSH 0x1bf9384aa3f0b3ad763aee81940cacdde1af71617c06f46e11510f14f3d5d121 - // PUSH 0xe7313274bb29566ff0c8220eb9841de1d96c2923c6a4028f7dd3c6a14cee770 - // stack: x0, y0, x1, y1, retdest - // Check if points are valid BN254 points. DUP2 // stack: y0, x0, y0, x1, y1, retdest @@ -46,7 +36,7 @@ global bn_add_valid_points: // stack: x0, y0, x0, y0, x1, y1, retdest %ec_isidentity // stack: (x0,y0)==(0,0), x0, y0, x1, y1, retdest - %jumpi(bn_add_first_zero) + %jumpi(bn_add_fst_zero) // stack: x0, y0, x1, y1, retdest // Check if the second point is the identity. @@ -75,21 +65,21 @@ global bn_add_valid_points: // stack: y1, x0, y0, x1, y1, retdest DUP3 // stack: y0, y1, x0, y0, x1, y1, retdest - %submod + SUBFP254 // stack: y0 - y1, x0, y0, x1, y1, retdest DUP4 // stack: x1, y0 - y1, x0, y0, x1, y1, retdest DUP3 // stack: x0, x1, y0 - y1, x0, y0, x1, y1, retdest - %submod + SUBFP254 // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest - %moddiv + %divr_fp254 // stack: lambda, x0, y0, x1, y1, retdest %jump(bn_add_valid_points_with_lambda) // BN254 elliptic curve addition. // Assumption: (x0,y0) == (0,0) -bn_add_first_zero: +bn_add_fst_zero: // stack: x0, y0, x1, y1, retdest // Just return (x1,y1) %stack (x0, y0, x1, y1, retdest) -> (retdest, x1, y1) @@ -114,37 +104,33 @@ bn_add_valid_points_with_lambda: // stack: x0, lambda, x0, y0, x1, y1, retdest DUP5 // stack: x1, x0, lambda, x0, y0, x1, y1, retdest - %bn_base - // stack: N, x1, x0, lambda, x0, y0, x1, y1, retdest - DUP4 - // stack: lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest + DUP3 + // stack: lambda, x1, x0, lambda, x0, y0, x1, y1, retdest DUP1 - // stack: lambda, lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest - MULMOD + // stack: lambda, lambda, x1, x0, lambda, x0, y0, x1, y1, retdest + MULFP254 // stack: lambda^2, x1, x0, lambda, x0, y0, x1, y1, retdest - %submod + SUBFP254 // stack: lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest - %submod + SUBFP254 // stack: x2, lambda, x0, y0, x1, y1, retdest // Compute y2 = lambda*(x1 - x2) - y1 - %bn_base - // stack: N, x2, lambda, x0, y0, x1, y1, retdest - DUP2 - // stack: x2, N, x2, lambda, x0, y0, x1, y1, retdest - DUP7 - // stack: x1, x2, N, x2, lambda, x0, y0, x1, y1, retdest - %submod - // stack: x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest - DUP4 - // stack: lambda, x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest - MULMOD + DUP1 + // stack: x2, x2, lambda, x0, y0, x1, y1, retdest + DUP6 + // stack: x1, x2, x2, lambda, x0, y0, x1, y1, retdest + SUBFP254 + // stack: x1 - x2, x2, lambda, x0, y0, x1, y1, retdest + DUP3 + // stack: lambda, x1 - x2, x2, lambda, x0, y0, x1, y1, retdest + MULFP254 // stack: lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest DUP7 // stack: y1, lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest SWAP1 // stack: lambda * (x1 - x2), y1, x2, lambda, x0, y0, x1, y1, retdest - %submod + SUBFP254 // stack: y2, x2, lambda, x0, y0, x1, y1, retdest // Return x2,y2 @@ -185,23 +171,19 @@ bn_add_equal_points: // stack: x0, y0, x1, y1, retdest // Compute lambda = 3/2 * x0^2 / y0 - %bn_base - // stack: N, x0, y0, x1, y1, retdest - %bn_base - // stack: N, N, x0, y0, x1, y1, retdest - DUP3 - // stack: x0, N, N, x0, y0, x1, y1, retdest DUP1 - // stack: x0, x0, N, N, x0, y0, x1, y1, retdest - MULMOD - // stack: x0^2, N, x0, y0, x1, y1, retdest with + // stack: x0, x0, y0, x1, y1, retdest + DUP1 + // stack: x0, x0, x0, y0, x1, y1, retdest + MULFP254 + // stack: x0^2, x0, y0, x1, y1, retdest with PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5 // 3/2 in the base field - // stack: 3/2, x0^2, N, x0, y0, x1, y1, retdest - MULMOD + // stack: 3/2, x0^2, x0, y0, x1, y1, retdest + MULFP254 // stack: 3/2 * x0^2, x0, y0, x1, y1, retdest DUP3 // stack: y0, 3/2 * x0^2, x0, y0, x1, y1, retdest - %moddiv + %divr_fp254 // stack: lambda, x0, y0, x1, y1, retdest %jump(bn_add_valid_points_with_lambda) @@ -217,79 +199,59 @@ global bn_double: // stack: x, y, x, y, retdest %jump(bn_add_equal_points) -// Push the order of the BN254 base field. -%macro bn_base - PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 -%endmacro - -// Assumption: x, y < N and 2N < 2^256. -// Note: Doesn't hold for Secp256k1 base field. -%macro submod - // stack: x, y - %bn_base - // stack: N, x, y - ADD - // stack: N + x, y // Doesn't overflow since 2N < 2^256 - SUB - // stack: N + x - y // Doesn't underflow since y < N - %bn_base - // stack: N, N + x - y - SWAP1 - // stack: N + x - y, N - MOD - // stack: (N + x - y) % N = (x-y) % N -%endmacro - // Check if (x,y) is a valid curve point. -// Puts y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) on top of the stack. +// Returns (range & curve) || is_identity +// where +// range = (x < N) & (y < N) +// curve = y^2 == (x^3 + 3) +// ident = (x,y) == (0,0) %macro bn_check - // stack: x, y - %bn_base - // stack: N, x, y - DUP2 - // stack: x, N, x, y - LT - // stack: x < N, x, y - %bn_base - // stack: N, x < N, x, y - DUP4 - // stack: y, N, x < N, x, y - LT - // stack: y < N, x < N, x, y - AND - // stack: (y < N) & (x < N), x, y - %stack (b, x, y) -> (x, x, @BN_BASE, x, @BN_BASE, @BN_BASE, x, y, b) - // stack: x, x, N, x, N, N, x, y, b - MULMOD - // stack: x^2 % N, x, N, N, x, y, b - MULMOD - // stack: x^3 % N, N, x, y, b - PUSH 3 - // stack: 3, x^3 % N, N, x, y, b - ADDMOD - // stack: (x^3 + 3) % N, x, y, b - DUP3 - // stack: y, (x^3 + 3) % N, x, y, b - %bn_base - // stack: N, y, (x^3 + 3) % N, x, y, b - SWAP1 - // stack: y, N, (x^3 + 3) % N, x, y, b + // stack: x, y DUP1 - // stack: y, y, N, (x^3 + 3) % N, x, y, b - MULMOD - // stack: y^2 % N, (x^3 + 3) % N, x, y, b - EQ - // stack: y^2 % N == (x^3 + 3) % N, x, y, b + // stack: x, x, y + PUSH @BN_BASE + // stack: N , x, x, y + DUP1 + // stack: N, N , x, x, y + DUP5 + // stack: y , N, N , x, x, y + LT + // stack: y < N, N , x, x, y SWAP2 - // stack: y, x, y^2 % N == (x^3 + 3) % N, b - %ec_isidentity - // stack: (x,y)==(0,0), y^2 % N == (x^3 + 3) % N, b - SWAP2 - // stack: b, y^2 % N == (x^3 + 3) % N, (x,y)==(0,0) + // stack: x , N, y < N, x, y + LT + // stack: x < N, y < N, x, y AND - // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N), (x,y)==(0,0) + // stack: range, x, y + SWAP2 + // stack: y, x, range + DUP2 + // stack: x , y, x, range + DUP1 + DUP1 + MULFP254 + MULFP254 + // stack: x^3, y, x, range + PUSH 3 + ADDFP254 + // stack: 3 + x^3, y, x, range + DUP2 + // stack: y , 3 + x^3, y, x, range + DUP1 + MULFP254 + // stack: y^2, 3 + x^3, y, x, range + EQ + // stack: curve, y, x, range + SWAP2 + // stack: x, y, curve, range + %ec_isidentity + // stack: ident , curve, range + SWAP2 + // stack: range , curve, ident + AND + // stack: range & curve, ident OR - // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) + // stack: is_valid %endmacro // Return (u256::MAX, u256::MAX) which is used to indicate the input was invalid. @@ -297,9 +259,9 @@ global bn_double: // stack: retdest PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff // stack: u256::MAX, retdest - PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + DUP1 // stack: u256::MAX, u256::MAX, retdest SWAP2 // stack: retdest, u256::MAX, u256::MAX JUMP -%endmacro \ No newline at end of file +%endmacro diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm similarity index 100% rename from evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm diff --git a/evm/src/cpu/kernel/asm/curve/bn254/glv.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm similarity index 100% rename from evm/src/cpu/kernel/asm/curve/bn254/glv.asm rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm new file mode 100644 index 00000000..2fcd5d2b --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm @@ -0,0 +1,319 @@ +/// To make the Tate pairing an invariant, the final step is to exponentiate by +/// (p^12 - 1)/N = (p^6 - 1) * (p^2 + 1) * (p^4 - p^2 + 1)/N +/// and thus we can exponentiate by each factor sequentially. +/// +/// def bn254_invariant_exponent(y: Fp12): +/// y = first_exp(y) +/// y = second_exp(y) +/// return final_exp(y) + +global bn254_invariant_exponent: + +/// first, exponentiate by (p^6 - 1) via +/// def first_exp(y): +/// return y.frob(6) / y + // stack: out, retdest {out: y} + %stack (out) -> (out, 0, first_exp, out) + // stack: out, 0, first_exp, out, retdest {out: y} + %jump(inv_fp254_12) +first_exp: + // stack: out, retdest {out: y , 0: y^-1} + %frob_fp254_12_6 + // stack: out, retdest {out: y_6, 0: y^-1} + %stack (out) -> (out, 0, out, second_exp, out) + // stack: out, 0, out, second_exp, out, retdest {out: y_6, 0: y^-1} + %jump(mul_fp254_12) + +/// second, exponentiate by (p^2 + 1) via +/// def second_exp(y): +/// return y.frob(2) * y +second_exp: + // stack: out, retdest {out: y} + %stack (out) -> (out, 0, out, out, final_exp, out) + // stack: out, 0, out, out, final_exp, out, retdest {out: y} + %frob_fp254_12_2_ + // stack: 0, out, out, final_exp, out, retdest {out: y, 0: y_2} + %jump(mul_fp254_12) + +/// Finally, we must exponentiate by (p^4 - p^2 + 1)/N +/// To do so efficiently, we can express this power as +/// (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0 +/// and simultaneously compute y^a4, y^a2, y^a0 where +/// a1 = a4 + 2a2 - a0 +/// We first initialize these powers as 1 and then use +/// binary algorithms for exponentiation. +/// +/// def final_exp(y): +/// y4, y2, y0 = 1, 1, 1 +/// power_loop_4() +/// power_loop_2() +/// power_loop_0() +/// custom_powers() +/// final_power() + +final_exp: + // stack: val, retdest + %stack (val) -> (val, 12, val) + // stack: val, 12, val, retdest + %move_fp254_12 + // stack: 12, val, retdest {12: sqr} + %stack () -> (1, 1, 1) + // stack: 1, 1, 1, 12, val, retdest + %mstore_kernel_bn254_pairing(24) + %mstore_kernel_bn254_pairing(36) + %mstore_kernel_bn254_pairing(48) + // stack: 12, val, retdest {12: sqr, 24: y0, 36: y2, 48: y4} + %stack () -> (64, 62, 65) + // stack: 64, 62, 65, 12, val, retdest {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(power_loop_4) + +/// After computing the powers +/// y^a4, y^a2, y^a0 +/// we would like to transform them to +/// y^a2, y^-a1, y^-a0 +/// +/// def custom_powers() +/// y0 = y0^{-1} +/// y1 = y4 * y2^2 * y0 +/// return y2, y1, y0 +/// +/// And finally, upon doing so, compute the final power +/// y^(p^3) * (y^a2)^(p^2) * (y^-a1)^p * (y^-a0) +/// +/// def final_power() +/// y = y.frob(3) +/// y2 = y2.frob(2) +/// y1 = y1.frob(1) +/// return y * y2 * y1 * y0 + +custom_powers: + // stack: val, retdest {24: y0, 36: y2, 48: y4} + %stack () -> (24, 60, make_term_1) + // stack: 24, 60, make_term_1, val, retdest {24: y0, 36: y2, 48: y4} + %jump(inv_fp254_12) +make_term_1: + // stack: val, retdest {36: y2, 48: y4, 60: y0^-1} + %stack () -> (36, 48, 48, make_term_2) + // stack: 36, 48, 48, make_term_2, val, retdest {36: y2, 48: y4, 60: y0^-1} + %jump(mul_fp254_12) +make_term_2: + // stack: val, retdest {36: y2, 48: y4 * y2, 60: y0^-1} + %stack () -> (36, 48, 48, make_term_3) + // stack: 36, 48, 48, make_term_3, val, retdest {36: y2, 48: y4 * y2, 60: y0^-1} + %jump(mul_fp254_12) +make_term_3: + // stack: val, retdest {36: y2, 48: y4 * y2^2, 60: y0^-1} + %stack () -> (60, 48, 48, final_power) + // stack: 60, 48, 48, final_power, val, retdest {36: y2, 48: y4 * y2^2, 60: y0^-1} + %jump(mul_fp254_12) +final_power: + // stack: val, retdest {val: y , 36: y^a2 , 48: y^a1 , 60: y^a0} + %frob_fp254_12_3 + // stack: val, retdest {val: y_3, 36: y^a2 , 48: y^a1 , 60: y^a0} + %stack () -> (36, 36) + %frob_fp254_12_2_ + POP + // stack: val, retdest {val: y_3, 36: (y^a2)_2, 48: y^a1 , 60: y^a0} + PUSH 48 + %frob_fp254_12_1 + POP + // stack: val, retdest {val: y_3, 36: (y^a2)_2, 48: (y^a1)_1, 60: y^a0} + %stack (val) -> (36, val, val, penult_mul, val) + // stack: 36, val, val, penult_mul, val, retdest {val: y_3, 36: (y^a2)_2, 48: (y^a1)_1, 60: y^a0} + %jump(mul_fp254_12) +penult_mul: + // stack: val, retdest {val: y_3 * (y^a2)_2, 48: (y^a1)_1, 60: y^a0} + %stack (val) -> (48, val, val, final_mul, val) + // stack: 48, val, val, final_mul, val, retdest {val: y_3 * (y^a2)_2, 48: (y^a1)_1, 60: y^a0} + %jump(mul_fp254_12) +final_mul: + // stack: val, retdest {val: y_3 * (y^a2)_2 * (y^a1)_1, 60: y^a0} + %stack (val) -> (60, val, val) + // stack: 60, val, val, retdest {val: y_3 * (y^a2)_2 * (y^a1)_1, 60: y^a0} + %jump(mul_fp254_12) + + +/// def power_loop_4(): +/// for i in range(64): +/// abc = load(i, power_data_4) +/// if a: +/// y4 *= acc +/// if b: +/// y2 *= acc +/// if c: +/// y0 *= acc +/// acc = square_fp254_12(acc) +/// y4 *= acc +/// +/// def power_loop_2(): +/// for i in range(62): +/// ab = load(i, power_data_2) +/// if a: +/// y2 *= acc +/// if b: +/// y0 *= acc +/// acc = square_fp254_12(acc) +/// y2 *= acc +/// +/// def power_loop_0(): +/// for i in range(65): +/// a = load(i, power_data_0) +/// if a: +/// y0 *= acc +/// acc = square_fp254_12(acc) +/// y0 *= acc + +power_loop_4: + // stack: i , j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP1 + ISZERO + // stack: break?, i , j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jumpi(power_loop_4_end) + // stack: i , j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %sub_const(1) + // stack: i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP1 + %mload_kernel_code(power_data_4) + // stack: abc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP1 + %lt_const(100) + // stack: skip?, abc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jumpi(power_loop_4_b) + // stack: abc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %sub_const(100) + // stack: bc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %stack () -> (48, 48, power_loop_4_b) + // stack: 48, 48, power_loop_4_b, bc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP8 + // stack: sqr, 48, 48, power_loop_4_b, bc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(mul_fp254_12) +power_loop_4_b: + // stack: bc, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP1 + %lt_const(10) + // stack: skip?, bc, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jumpi(power_loop_4_c) + // stack: bc, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %sub_const(10) + // stack: c, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %stack () -> (36, 36, power_loop_4_c) + // stack: 36, 36, power_loop_4_c, c, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP8 + // stack: sqr, 36, 36, power_loop_4_c, c, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(mul_fp254_12) +power_loop_4_c: + // stack: c, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + ISZERO + // stack: skip?, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jumpi(power_loop_4_sq) + // stack: i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %stack () -> (24, 24, power_loop_4_sq) + // stack: 24, 24, power_loop_4_sq, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP7 + // stack: sqr, 24, 24, power_loop_4_sq, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(mul_fp254_12) +power_loop_4_sq: + // stack: i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + PUSH power_loop_4 + // stack: power_loop_4, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP5 + DUP1 + // stack: sqr, sqr, power_loop_4, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(square_fp254_12) +power_loop_4_end: + // stack: 0, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + POP + // stack: j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %stack () -> (48, 48, power_loop_2) + // stack: 48, 48, power_loop_2, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP6 + // stack: sqr, 48, 48, power_loop_2, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(mul_fp254_12) + +power_loop_2: + // stack: j , k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP1 + ISZERO + // stack: break?, j , k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jumpi(power_loop_2_end) + // stack: j , k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %sub_const(1) + // stack: j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP1 + %mload_kernel_code(power_data_2) + // stack: ab, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP1 + %lt_const(10) + // stack: skip?, ab, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jumpi(power_loop_2_b) + // stack: ab, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %sub_const(10) + // stack: b, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %stack () -> (36, 36, power_loop_2_b) + // stack: 36, 36, power_loop_2_b, b, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP7 + // stack: sqr, 36, 36, power_loop_2_b, b, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(mul_fp254_12) +power_loop_2_b: + // stack: b, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + ISZERO + // stack: skip?, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jumpi(power_loop_2_sq) + // stack: j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %stack () -> (24, 24, power_loop_2_sq) + // stack: 24, 24, power_loop_2_sq, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP6 + // stack: sqr, 24, 24, power_loop_2_sq, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(mul_fp254_12) +power_loop_2_sq: + // stack: j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + PUSH power_loop_2 + // stack: power_loop_2, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP4 + DUP1 + // stack: sqr, sqr, power_loop_2, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(square_fp254_12) +power_loop_2_end: + // stack: 0, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + POP + // stack: k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %stack () -> (36, 36, power_loop_0) + // stack: 36, 36, power_loop_0, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP5 + // stack: sqr, 36, 36, power_loop_0, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(mul_fp254_12) + +power_loop_0: + // stack: k , sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP1 + ISZERO + // stack: break?, k , sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jumpi(power_loop_0_end) + // stack: k , sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %sub_const(1) + // stack: k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP1 + %mload_kernel_code(power_data_0) + // stack: a, k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + ISZERO + // stack: skip?, k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jumpi(power_loop_0_sq) + // stack: k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %stack () -> (24, 24, power_loop_0_sq) + // stack: 24, 24, power_loop_0_sq, k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP5 + // stack: sqr, 24, 24, power_loop_0_sq, k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(mul_fp254_12) +power_loop_0_sq: + // stack: k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + PUSH power_loop_0 + // stack: power_loop_0, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + DUP3 + DUP1 + // stack: sqr, sqr, power_loop_0, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(square_fp254_12) +power_loop_0_end: + // stack: 0, sqr {12: sqr, 24: y0, 36: y2, 48: y4} + %stack (i, sqr) -> (24, sqr, 24, custom_powers) + // stack: 24, sqr, 24, custom_powers {12: sqr, 24: y0, 36: y2, 48: y4} + %jump(mul_fp254_12) diff --git a/evm/src/cpu/kernel/asm/curve/bn254/msm.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/msm.asm similarity index 100% rename from evm/src/cpu/kernel/asm/curve/bn254/msm.asm rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/msm.asm diff --git a/evm/src/cpu/kernel/asm/curve/bn254/precomputation.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/precomputation.asm similarity index 100% rename from evm/src/cpu/kernel/asm/curve/bn254/precomputation.asm rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/precomputation.asm diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm new file mode 100644 index 00000000..f09684bd --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm @@ -0,0 +1,292 @@ +/// def tate(P: Curve, Q: TwistedCurve) -> Fp12: +/// out = miller_loop(P, Q) +/// return bn254_invariant_exponent(P, Q) +global bn254_tate: + // stack: inp, out, retdest + %stack (inp, out) -> (inp, out, bn254_invariant_exponent, out) + // stack: inp, out, bn254_invariant_exponent, out, retdest + %jump(bn254_miller) + +/// def miller(P, Q): +/// miller_init() +/// miller_loop() +/// +/// def miller_init(): +/// out = 1 +/// O = P +/// times = 61 +/// +/// def miller_loop(): +/// while times: +/// 0xnm = load(miller_data) +/// while 0xnm > 0x20: +/// miller_one() +/// while 0xnm: +/// miller_zero() +/// times -= 1 +/// +/// def miller_one(): +/// 0xnm -= 0x20 +/// mul_tangent() +/// mul_cord() +/// +/// def miller_zero(): +/// 0xnm -= 1 +/// mul_tangent() + +global bn254_miller: + // stack: ptr, out, retdest + %stack (ptr, out) -> (out, 1, ptr, out) + // stack: out, 1, ptr, out, retdest + %mstore_kernel_bn254_pairing + // stack: ptr, out, retdest + %load_fp254_6 + // stack: P, Q, out, retdest + %stack (P: 2) -> (0, 53, P, P) + // stack: 0, 53, O, P, Q, out, retdest + // the head 0 lets miller_loop start with POP +miller_loop: + POP + // stack: times , O, P, Q, out, retdest + DUP1 + ISZERO + // stack: break?, times , O, P, Q, out, retdest + %jumpi(miller_return) + // stack: times , O, P, Q, out, retdest + %sub_const(1) + // stack: times-1, O, P, Q, out, retdest + DUP1 + // stack: times-1, times-1, O, P, Q, out, retdest + %mload_kernel_code(miller_data) + // stack: 0xnm, times-1, O, P, Q, out, retdest + %jump(miller_one) +miller_return: + // stack: times, O, P, Q, out, retdest + %stack (times, O: 2, P: 2, Q: 4, out, retdest) -> (retdest) + // stack: retdest + JUMP + +miller_one: + // stack: 0xnm, times, O, P, Q, out, retdest + DUP1 + %lt_const(0x20) + // stack: skip?, 0xnm, times, O, P, Q, out, retdest + %jumpi(miller_zero) + // stack: 0xnm, times, O, P, Q, out, retdest + %sub_const(0x20) + // stack: 0x{n-1}m, times, O, P, Q, out, retdest + PUSH mul_cord + // stack: mul_cord, 0x{n-1}m, times, O, P, Q, out, retdest + %jump(mul_tangent) + +miller_zero: + // stack: m , times, O, P, Q, out, retdest + DUP1 + ISZERO + // stack: skip?, m , times, O, P, Q, out, retdest + %jumpi(miller_loop) + // stack: m , times, O, P, Q, out, retdest + %sub_const(1) + // stack: m-1, times, O, P, Q, out, retdest + PUSH miller_zero + // stack: miller_zero, m-1, times, O, P, Q, out, retdest + %jump(mul_tangent) + + +/// def mul_tangent() +/// out = square_fp254_12(out) +/// line = tangent(O, Q) +/// out = mul_fp254_12_sparse(out, line) +/// O += O + +mul_tangent: + // stack: retdest, 0xnm, times, O, P, Q, out + PUSH mul_tangent_2 + DUP13 + PUSH mul_tangent_1 + // stack: mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out + %stack (mul_tangent_1, out) -> (out, out, mul_tangent_1, out) + // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out + %jump(square_fp254_12) +mul_tangent_1: + // stack: out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out + DUP13 + DUP13 + DUP13 + DUP13 + // stack: Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out + DUP11 + DUP11 + // stack: O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out + %tangent + // stack: out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out {0: line} + %stack (out) -> (out, 0, out) + // stack: out, 0, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out {0: line} + %jump(mul_fp254_12_sparse) +mul_tangent_2: + // stack: retdest, 0xnm, times, O, P, Q, out {0: line} + PUSH after_double + // stack: after_double, retdest, 0xnm, times, O, P, Q, out {0: line} + DUP6 + DUP6 + // stack: O, after_double, retdest, 0xnm, times, O, P, Q, out {0: line} + %jump(bn_double) +after_double: + // stack: 2*O, retdest, 0xnm, times, O, P, Q, out {0: line} + SWAP5 + POP + SWAP5 + POP + // stack: retdest, 0xnm, times, 2*O, P, Q, out {0: line} + JUMP + +/// def mul_cord() +/// line = cord(P, O, Q) +/// out = mul_fp254_12_sparse(out, line) +/// O += P + +mul_cord: + // stack: 0xnm, times, O, P, Q, out + PUSH mul_cord_1 + // stack: mul_cord_1, 0xnm, times, O, P, Q, out + DUP11 + DUP11 + DUP11 + DUP11 + // stack: Q, mul_cord_1, 0xnm, times, O, P, Q, out + DUP9 + DUP9 + // stack: O, Q, mul_cord_1, 0xnm, times, O, P, Q, out + DUP13 + DUP13 + // stack: P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out + %cord + // stack: mul_cord_1, 0xnm, times, O, P, Q, out {0: line} + DUP12 + // stack: out, mul_cord_1, 0xnm, times, O, P, Q, out {0: line} + %stack (out) -> (out, 0, out) + // stack: out, 0, out, mul_cord_1, 0xnm, times, O, P, Q, out {0: line} + %jump(mul_fp254_12_sparse) +mul_cord_1: + // stack: 0xnm, times, O , P, Q, out + PUSH after_add + // stack: after_add, 0xnm, times, O , P, Q, out + DUP7 + DUP7 + DUP7 + DUP7 + // stack: O , P, after_add, 0xnm, times, O , P, Q, out + %jump(bn_add_valid_points) +after_add: + // stack: O + P, 0xnm, times, O , P, Q, out + SWAP4 + POP + SWAP4 + POP + // stack: 0xnm, times, O+P, P, Q, out + %jump(miller_one) + + +/// def tangent(px, py, qx, qy): +/// return sparse_store( +/// py**2 - 9, +/// (-3px**2) * qx, +/// (2py) * qy, +/// ) + +%macro tangent + // stack: px, py, qx, qx_, qy, qy_ + %stack (px, py) -> (py, py , 9, px, py) + // stack: py, py , 9, px, py, qx, qx_, qy, qy_ + MULFP254 + // stack: py^2 , 9, px, py, qx, qx_, qy, qy_ + SUBFP254 + // stack: py^2 - 9, px, py, qx, qx_, qy, qy_ + %mstore_kernel_bn254_pairing(0) + // stack: px, py, qx, qx_, qy, qy_ + DUP1 + MULFP254 + // stack: px^2, py, qx, qx_, qy, qy_ + PUSH 3 + MULFP254 + // stack: 3*px^2, py, qx, qx_, qy, qy_ + PUSH 0 + SUBFP254 + // stack: -3*px^2, py, qx, qx_, qy, qy_ + SWAP2 + // stack: qx, py, -3px^2, qx_, qy, qy_ + DUP3 + MULFP254 + // stack: (-3*px^2)qx, py, -3px^2, qx_, qy, qy_ + %mstore_kernel_bn254_pairing(2) + // stack: py, -3px^2, qx_, qy, qy_ + PUSH 2 + MULFP254 + // stack: 2py, -3px^2, qx_, qy, qy_ + SWAP3 + // stack: qy, -3px^2, qx_, 2py, qy_ + DUP4 + MULFP254 + // stack: (2py)qy, -3px^2, qx_, 2py, qy_ + %mstore_kernel_bn254_pairing(8) + // stack: -3px^2, qx_, 2py, qy_ + MULFP254 + // stack: (-3px^2)*qx_, 2py, qy_ + %mstore_kernel_bn254_pairing(3) + // stack: 2py, qy_ + MULFP254 + // stack: (2py)*qy_ + %mstore_kernel_bn254_pairing(9) +%endmacro + +/// def cord(p1x, p1y, p2x, p2y, qx, qy): +/// return sparse_store( +/// p1y*p2x - p2y*p1x, +/// (p2y - p1y) * qx, +/// (p1x - p2x) * qy, +/// ) + +%macro cord + // stack: p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ + DUP1 + DUP5 + MULFP254 + // stack: p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ + DUP3 + DUP5 + MULFP254 + // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ + SUBFP254 + // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ + %mstore_kernel_bn254_pairing(0) + // stack: p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ + SWAP3 + // stack: p2y , p1y, p2x , p1x, qx, qx_, qy, qy_ + SUBFP254 + // stack: p2y - p1y, p2x , p1x, qx, qx_, qy, qy_ + SWAP2 + // stack: p1x , p2x, p2y - p1y, qx, qx_, qy, qy_ + SUBFP254 + // stack: p1x - p2x, p2y - p1y, qx, qx_, qy, qy_ + SWAP4 + // stack: qy, p2y - p1y, qx, qx_, p1x - p2x, qy_ + DUP5 + MULFP254 + // stack: (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_ + %mstore_kernel_bn254_pairing(8) + // stack: p2y - p1y, qx, qx_, p1x - p2x, qy_ + SWAP1 + // stack: qx, p2y - p1y, qx_, p1x - p2x, qy_ + DUP2 + MULFP254 + // stack: (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_ + %mstore_kernel_bn254_pairing(2) + // stack: p2y - p1y, qx_, p1x - p2x, qy_ + MULFP254 + // stack: (p2y - p1y)qx_, p1x - p2x, qy_ + %mstore_kernel_bn254_pairing(3) + // stack: p1x - p2x, qy_ + MULFP254 + // stack: (p1x - p2x)*qy_ + %mstore_kernel_bn254_pairing(9) +%endmacro diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm new file mode 100644 index 00000000..5fd47e80 --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm @@ -0,0 +1,378 @@ +/////////////////////////////////////// +///// GENERAL FP12 MULTIPLICATION ///// +/////////////////////////////////////// + +/// cost: 1063 + +/// fp254_6 functions: +/// fn | num | ops | cost +/// ------------------------- +/// load | 8 | 40 | 320 +/// store | 5 | 40 | 200 +/// dup | 5 | 6 | 30 +/// swap | 4 | 16 | 64 +/// add | 4 | 16 | 64 +/// subr | 1 | 17 | 17 +/// mul | 3 | 157 | 471 +/// i9 | 1 | 9 | 9 +/// +/// lone stack operations: +/// op | num +/// ------------ +/// ADD | 3 +/// SWAP | 2 +/// DUP | 6 +/// PUSH | 6 +/// POP | 2 +/// JUMP | 6 +/// +/// TOTAL: 1201 + +/// inputs: +/// F = f + f'z +/// G = g + g'z +/// +/// output: +/// H = h + h'z = FG +/// +/// h = fg + sh(f'g') +/// h' = (f+f')(g+g') - fg - f'g' +/// +/// memory pointers [ind' = ind+6] +/// {inA: f, inA: f', inB: g, inB':g', out: h, out': h'} +/// +/// f, f', g, g' consist of six elements on the stack + +global mul_fp254_12: + // stack: inA, inB, out + DUP1 + %add_const(6) + // stack: inA', inA, inB, out + %load_fp254_6 + // stack: f', inA, inB, out + DUP8 + %add_const(6) + // stack: inB', f', inA, inB, out + %load_fp254_6 + // stack: g', f', inA, inB, out + PUSH mul_fp254_12_1 + // stack: mul_fp254_12_1, g', f', inA, inB, out + %dup_fp254_6_7 + // stack: f', mul_fp254_12_1, g', f', inA, inB, out + %dup_fp254_6_7 + // stack: g', f', mul_fp254_12_1, g', f', inA, inB, out + %jump(mul_fp254_6) +mul_fp254_12_1: + // stack: f'g', g' , f', inA, inB, out + %dup_fp254_6_0 + // stack: f'g', f'g', g' , f', inA, inB, out + %store_fp254_6_sh(84) + // stack: f'g', g' , f', inA, inB, out {84: sh(f'g')} + %store_fp254_6(90) + // stack: g' , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + DUP13 + // stack: inA, g' , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + DUP15 + // stack: inB, inA, g' , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + %load_fp254_6 + // stack: g , inA, g' , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + %stack (f: 6, x, g: 6) -> (g, x, f) + // stack: g', inA, g , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + %dup_fp254_6_7 + // stack: g,g', inA, g , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + %add_fp254_6 + // stack: g+g', inA, g , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + %stack (f: 6, x, g: 6) -> (g, x, f) + // stack: g, inA, g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g'} + PUSH mul_fp254_12_2 + // stack: mul_fp254_12_2, g, inA, g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g'} + SWAP7 + // stack: inA, g, mul_fp254_12_2, g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g'} + %load_fp254_6 + // stack: f, g, mul_fp254_12_2, g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g'} + %jump(mul_fp254_6) +mul_fp254_12_2: + // stack: fg, g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g'} + %store_fp254_6(96) + // stack: g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %stack (x: 6, y: 6) -> (y, x) + // stack: f', g+g', inA, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + PUSH mul_fp254_12_3 + // stack: mul_fp254_12_3, f', g+g', inA, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + SWAP13 + // stack: inA, f', g+g', mul_fp254_12_3, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %load_fp254_6 + // stack: f,f', g+g', mul_fp254_12_3, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %add_fp254_6 + // stack: f+f', g+g', mul_fp254_12_3, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %jump(mul_fp254_6) +mul_fp254_12_3: + // stack: (f+f')(g+g'), inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %load_fp254_6(96) + // stack: fg, (f+f')(g+g'), inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %stack (x: 6, y: 6) -> (y, x) + // stack: (f+f')(g+g'), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %dup_fp254_6_6 + // stack: fg, (f+f')(g+g'), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %load_fp254_6(90) + // stack: f'g',fg, (f+f')(g+g'), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %add_fp254_6 + // stack: f'g'+fg, (f+f')(g+g'), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %subr_fp254_6 + // stack: (f+f')(g+g') - (f'g'+fg), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + DUP14 + %add_const(6) + // stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %store_fp254_6 + // stack: fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %load_fp254_6(84) + // stack: sh(f'g') , fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %add_fp254_6 + // stack: sh(f'g') + fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + DUP8 + // stack: out, sh(f'g') + fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %store_fp254_6 + // stack: inB, out {84: sh(f'g'), 90: f'g', 96: fg} + %pop2 + JUMP + + +////////////////////////////////////// +///// SPARSE FP12 MULTIPLICATION ///// +////////////////////////////////////// + +/// cost: 645 + +/// fp254_6 functions: +/// fn | num | ops | cost +/// --------------------------- +/// load | 2 | 40 | 80 +/// store | 2 | 40 | 80 +/// dup | 4 | 6 | 24 +/// swap | 4 | 16 | 64 +/// add | 4 | 16 | 64 +/// mul_fp254_ | 2 | 21 | 42 +/// mul_fp254_2 | 4 | 59 | 236 +/// +/// lone stack operations: +/// op | num +/// ------------ +/// ADD | 6 +/// DUP | 9 +/// PUSH | 6 +/// POP | 5 +/// +/// TOTAL: 618 + +/// input: +/// F = f + f'z +/// G = g0 + (G1)t + (G2)tz +/// +/// output: +/// H = h + h'z = FG +/// = g0 * [f + f'z] + G1 * [sh(f) + sh(f')z] + G2 * [sh2(f') + sh(f)z] +/// +/// h = g0 * f + G1 * sh(f ) + G2 * sh2(f') +/// h' = g0 * f' + G1 * sh(f') + G2 * sh (f ) +/// +/// memory pointers [ind' = ind+6, inB2 = inB1 + 2 = inB + 3] +/// { inA: f, inA': f', inB: g0, inB1: G1, inB2: G2, out: h, out': h'} +/// +/// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element + +global mul_fp254_12_sparse: + // stack: inA, inB, out + DUP1 + %add_const(6) + // stack: inA', inA, inB, out + %load_fp254_6 + // stack: f', inA, inB, out + DUP8 + // stack: inB, f', inA, inB, out + DUP8 + // stack: inA, inB, f', inA, inB, out + %load_fp254_6 + // stack: f, inB, f', inA, inB, out + DUP16 + // stack: out, f, inB, f', inA, inB, out + %dup_fp254_6_8 + // stack: f', out, f, inB, f', inA, inB, out + DUP14 + // stack: inB, f', out, f, inB, f', inA, inB, out + %dup_fp254_6_8 + // stack: f, inB, f', out, f, inB, f', inA, inB, out + DUP7 + // stack: inB, f, inB, f', out, f, inB, f', inA, inB, out + %dup_fp254_6_8 + // stack: f', inB, f, inB, f', out, f, inB, f', inA, inB, out + %dup_fp254_6_7 + // stack: f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out + DUP13 + // stack: inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out + %mload_kernel_bn254_pairing + // stack: g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out + %scale_re_fp254_6 + // stack: g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out + %stack (x: 6, y: 6) -> (y, x) + // stack: f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out + DUP13 + %add_const(8) + // stack: inB2, f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out + %load_fp254_2 + // stack: G2 , f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out + %scale_fp254_6_sh2 + // stack: G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out + %add_fp254_6 + // stack: G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out + %stack (f: 6, x, g: 6) -> (g, x, f) + // stack: f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + DUP7 %add_const(2) + // stack: inB1, f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + %load_fp254_2 + // stack: G1 , f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + %scale_fp254_6_sh + // stack: G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + %add_fp254_6_hole + // stack: G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + DUP14 + // stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out + %store_fp254_6 + // stack: inB, f', out, f, inB, f', inA, inB, out + %mload_kernel_bn254_pairing + // stack: g0 , f', out, f, inB, f', inA, inB, out + %scale_re_fp254_6 + // stack: g0 * f', out, f, inB, f', inA, inB, out + %stack (f: 6, x, g: 6) -> (g, x, f) + // stack: f , out, g0 * f', inB, f', inA, inB, out + DUP14 + %add_const(8) + // stack: inB2, f , out, g0 * f', inB, f', inA, inB, out + %load_fp254_2 + // stack: G2 , f , out, g0 * f', inB, f', inA, inB, out + %scale_fp254_6_sh + // stack: G2 * sh(f) , out, g0 * f', inB, f', inA, inB, out + %add_fp254_6_hole + // stack: G2 * sh(f) + g0 * f', inB, f', inA, inB, out + %stack (f: 6, x, g: 6) -> (g, x, f) + // stack: f' , inB, G2 * sh(f) + g0 * f', inA, inB, out + DUP7 + %add_const(2) + // stack: inB1, f' , inB, G2 * sh(f) + g0 * f', inA, inB, out + %load_fp254_2 + // stack: G1 , f' , inB, G2 * sh(f) + g0 * f', inA, inB, out + %scale_fp254_6_sh + // stack: G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out + %add_fp254_6_hole + // stack: G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out + DUP9 + %add_const(6) + // stack: out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out + %store_fp254_6 + // stack: inA, inB, out + %pop3 + JUMP + + +///////////////////////// +///// FP12 SQUARING ///// +///////////////////////// + +/// cost: 646 + +/// fp254_6 functions: +/// fn | num | ops | cost +/// ------------------------- +/// load | 2 | 40 | 80 +/// store | 2 | 40 | 80 +/// dup | 2 | 6 | 12 +/// swap | 2 | 16 | 32 +/// add | 1 | 16 | 16 +/// mul | 1 | 157 | 157 +/// sq | 2 | 101 | 202 +/// dbl | 1 | 13 | 13 +/// +/// lone stack operations: +/// op | num +/// ------------ +/// ADD | 3 +/// SWAP | 4 +/// DUP | 5 +/// PUSH | 6 +/// POP | 3 +/// JUMP | 4 +/// +/// TOTAL: + +/// input: +/// F = f + f'z +/// +/// output: +/// H = h + h'z = FF +/// +/// h = ff + sh(f'f') +/// h' = 2ff' +/// +/// memory pointers [ind' = ind+6] +/// {inp: f, inp: f', out: h, out': h'} +/// +/// f, f' consist of six elements on the stack + +global square_fp254_12: + // stack: inp, out + DUP1 + // stack: inp, inp, out + %load_fp254_6 + // stack: f, inp, out + PUSH square_fp254_12_3 + // stack: square_fp254_12_3, f, inp, out + SWAP7 + // stack: inp, f, square_fp254_12_3, out + PUSH square_fp254_12_2 + // stack: square_fp254_12_2, inp, f, square_fp254_12_3, out + %dup_fp254_6_2 + // stack: f , square_fp254_12_2, inp, f, square_fp254_12_3, out + DUP16 + %add_const(6) + // stack: out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out + PUSH square_fp254_12_1 + // stack: square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out + DUP10 + %add_const(6) + // stack: inp', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out + %load_fp254_6 + // stack: f', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out + %stack (f: 6, x: 2, g: 6) -> (g, x, f) + // stack: f , square_fp254_12_1, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out + %dup_fp254_6_8 + // stack: f', f , square_fp254_12_1, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out + %jump(mul_fp254_6) +square_fp254_12_1: + // stack: f'f, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out + DUP7 + // stack: out', f'f, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out + %store_fp254_6_double + // stack: out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out + POP + // stack: f', square_fp254_12_2, inp, f, square_fp254_12_3, out + %jump(square_fp254_6) +square_fp254_12_2: + // stack: f'f', inp, f, square_fp254_12_3, out + %sh_fp254_6 + // stack: sh(f'f'), inp, f, square_fp254_12_3, out + %stack (f: 6, x, g: 6) -> (g, x, f) + // stack: f, inp, sh(f'f'), square_fp254_12_3, out + SWAP6 + SWAP13 + SWAP6 + // stack: f, square_fp254_12_3, sh(f'f'), inp, out + %jump(square_fp254_6) +square_fp254_12_3: + // stack: ff , sh(f'f'), inp, out + %add_fp254_6 + // stack: ff + sh(f'f'), inp, out + DUP8 + // stack: out, ff + sh(f'f'), inp, out + %store_fp254_6 + // stack: inp, out + %pop2 + JUMP diff --git a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_6_mul.asm similarity index 59% rename from evm/src/cpu/kernel/asm/fields/fp6_mul.asm rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_6_mul.asm index 0fc6dbdf..db8b09e0 100644 --- a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_6_mul.asm @@ -1,3 +1,7 @@ +////////////////////////////////////// +///// GENERAL FP6 MULTIPLICATION ///// +////////////////////////////////////// + /// inputs: /// C = C0 + C1t + C2t^2 /// = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2 @@ -54,9 +58,8 @@ /// e2 = c0d2 + c1d1 + c2d0 - (c0_d2_ + c1_d1_ + c2_d0_) /// e2_ = c0d2_ + c0_d2 + c1d1_ + c1_d1 + c2d0_ + c2_d0 - // cost: 157 -global mul_fp6: +global mul_fp254_6: // e2 // make c0_d2_ + c1_d1_ + c2_d0_ DUP8 @@ -256,3 +259,177 @@ global mul_fp6: // stack: retdest, e0, e0_, e1, e1_, e2, e2_ JUMP + + +//////////////////////// +///// FP6 SQUARING ///// +//////////////////////// + +/// inputs: +/// C = C0 + C1t + C2t^2 +/// = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2 +/// +/// output: +/// E = E0 + E1t + E2t^2 = C^2 +/// = (e0 + e0_i) + (e1 + e1_i)t + (e2 + e2_i)t^2 +/// +/// initial stack: c0, c0_, c1, c1_, c2, c2_, retdest +/// final stack: e0, e0_, e1, e1_, e2, e2_ + +/// computations: +/// +/// E0 = C0C0 + i9(2C1C2) = (c0+c0_i)^2 + i9(2(c1+c1_i)(c2+c2_i)) +/// = (c0^2 - c0_^2) + (2c0c0_)i + i9[2(c1c2 - c1_c2_) + 2(c1_c2 + c1c2_)i] +/// +/// E1 = 2*C0C1 + i9(C2C2) = 2(c0+c0_i)(c1+c1_i) + i9((c2+c2_i)(c2+c2_i)) +/// = 2(c0c1 - c0_c1_) + 2(c0c1_ + c0_c1)i + i9[(c2^2 - c2_^2) + (2c2c2_)i] +/// +/// E2 = 2*C0C2 + C1C1 +/// = 2(c0c2 - c0_c2_) + 2(c0_c2 + c2c0_)i + (c1^2 - c1_^2) + (2c1c1_)i +/// +/// e0 = (c0^2 - c0_^2) + x0 +/// e0_ = 2c0c0_ + x0_ +/// where x0_, x0 = %i9 c1c2 - c1_c2_, c1_c2 + c1c2_ +/// +/// e1 = 2(c0c1 - c0_c1_) + x1 +/// e1_ = 2(c0c1_ + c0_c1) + x1_ +/// where x1_, x1 = %i9 c2^2 - c2_^2, 2c2c2_ +/// +/// e2 = 2(c0c2 - c0_c2_) + (c1^2 - c1_^2) +/// e2_ = 2(c0_c2 + c2c0_) + 2c1c1_ + +// cost: 101 +global square_fp254_6: + /// e0 = (c0^2 - c0_^2) + x0 + /// e0_ = 2c0c0_ + x0_ + /// where x0_, x0 = %i9 2(c1c2 - c1_c2_), 2(c1_c2 + c1c2_) + DUP6 + DUP4 + MULFP254 + DUP6 + DUP6 + MULFP254 + ADDFP254 + PUSH 2 + MULFP254 + DUP7 + DUP6 + MULFP254 + DUP7 + DUP6 + MULFP254 + SUBFP254 + PUSH 2 + MULFP254 + %i9 + // stack: x0_, x0 + DUP3 + DUP5 + MULFP254 + PUSH 2 + MULFP254 + // stack: 2c0c0_, x0_, x0 + ADDFP254 + // stack: e0_, x0 + SWAP4 + SWAP1 + // stack: x0 + DUP4 + DUP1 + MULFP254 + DUP4 + DUP1 + MULFP254 + SUBFP254 + // stack: c0^2 - c0_^2, x0 + ADDFP254 + // stack: e0 + SWAP3 + + /// e1 = 2(c0c1 - c0_c1_) + x1 + /// e1_ = 2(c0c1_ + c0_c1 ) + x1_ + /// where x1_, x1 = %i9 c2^2 - c2_^2, 2c2c2_ + DUP7 + DUP9 + MULFP254 + PUSH 2 + MULFP254 + DUP9 + DUP1 + MULFP254 + DUP9 + DUP1 + MULFP254 + SUBFP254 + %i9 + // stack: x1_, x1 + DUP4 + DUP4 + MULFP254 + DUP9 + DUP7 + MULFP254 + ADDFP254 + PUSH 2 + MULFP254 + // stack: 2(c0c1_ + c0_c1), x1_, x1 + ADDFP254 + // stack: e1_, x1 + SWAP8 + SWAP1 + // stack: x1 + DUP8 + DUP4 + MULFP254 + DUP5 + DUP7 + MULFP254 + SUBFP254 + PUSH 2 + MULFP254 + // stack: 2(c0c1 - c0_c1_), x1 + ADDFP254 + SWAP7 + + /// e2 = 2(c0c2 - c0_c2_) + (c1^2 - c1_^2) + /// e2_ = 2(c0_c2 + c2c0_ + c1c1_) + DUP1 + DUP1 + MULFP254 + DUP5 + DUP1 + MULFP254 + SUBFP254 + DUP11 + DUP5 + MULFP254 + DUP4 + DUP8 + MULFP254 + SUBFP254 + PUSH 2 + MULFP254 + ADDFP254 + // stack: e2 + SWAP10 + // stack: c2_, c1_, c2, c0_, c1, c0 + SWAP4 + MULFP254 + // stack: c1c1_, c2, c0_, c2_, c0 + SWAP2 + MULFP254 + // stack: c0_c2 , c1c1_, c2_, c0 + ADDFP254 + // stack: c0_c2 + c1c1_, c2_, c0 + SWAP2 + MULFP254 + // stack: c0c2_ , c0_c2 + c1c1_ + ADDFP254 + // stack: c0c2_ + c0_c2 + c1c1_ + PUSH 2 + MULFP254 + // stack: e2_ + SWAP6 + + // stack: retdest, e0, e0_, e1, e1_, e2, e2_ + JUMP diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm new file mode 100644 index 00000000..ee1e4679 --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm @@ -0,0 +1,272 @@ +// frob_fp12 tests + +global test_frob_fp254_12_1: + // stack: ptr + %frob_fp254_12_1 + // stack: ptr + %jump(0xdeadbeef) + +global test_frob_fp254_12_2: + // stack: ptr + DUP1 + // stack: ptr, ptr + %frob_fp254_12_2_ + // stack: ptr + %jump(0xdeadbeef) + +global test_frob_fp254_12_3: + // stack: ptr + %frob_fp254_12_3 + // stack: ptr + %jump(0xdeadbeef) + +global test_frob_fp254_12_6: + // stack: ptr + %frob_fp254_12_6 + // stack: ptr + %jump(0xdeadbeef) + + +/// def frob_fp254_12_n(f, f'): +/// g = frob_fp254_6(n, f ) +/// g' = FROB_z[n] * frob_fp254_6(n, f') +/// return g, g' + +%macro frob_fp254_12_1 + // stack: ptr + DUP1 + // stack: ptr, ptr + %load_fp254_6 + // stack: f, ptr + %frob_fp254_6_1 + // stack: g, ptr + DUP7 + // stack: ptr, g, ptr + %store_fp254_6 + // stack: ptr + DUP1 %add_const(6) + // stack: ptr', ptr + %load_fp254_6 + // stack: f', ptr + %frobz_1 + // stack: g', ptr + DUP7 %add_const(6) + // stack: ptr', g', ptr + %store_fp254_6 + // stack: ptr +%endmacro + +// Note: this is the only one with distinct input and output pointers +%macro frob_fp254_12_2_ + // stack: ptr , out + DUP1 + // stack: ptr, ptr , out + %load_fp254_6 + // stack: f, ptr , out + %frob_fp254_6_2 + // stack: g, ptr , out + DUP8 + // stack: out, g, ptr , out + %store_fp254_6 + // stack: ptr , out + %add_const(6) + // stack: ptr', out + %load_fp254_6 + // stack: f', out + %frobz_2 + // stack: g', out + DUP7 %add_const(6) + // stack: out', g', out + %store_fp254_6 + // stack: out +%endmacro + +%macro frob_fp254_12_3 + // stack: ptr + DUP1 + // stack: ptr, ptr + %load_fp254_6 + // stack: f, ptr + %frob_fp254_6_3 + // stack: g, ptr + DUP7 + // stack: ptr, g, ptr + %store_fp254_6 + // stack: ptr + DUP1 %add_const(6) + // stack: ptr', ptr + %load_fp254_6 + // stack: f', ptr + %frobz_3 + // stack: g', ptr + DUP7 %add_const(6) + // stack: ptr', g', ptr + %store_fp254_6 + // stack: ptr +%endmacro + +%macro frob_fp254_12_6 + // stack: ptr + DUP1 %add_const(6) + // stack: ptr', ptr + %load_fp254_6 + // stack: f', ptr + %frobz_6 + // stack: g', ptr + DUP7 %add_const(6) + // stack: ptr', g', ptr + %store_fp254_6 + // stack: ptr +%endmacro + +// frob_fp12 tests + +global test_frob_fp254_6_1: + // stack: ptr + %frob_fp254_6_1 + // stack: ptr + %jump(0xdeadbeef) + +global test_frob_fp254_6_2: + // stack: ptr + %frob_fp254_6_2 + // stack: ptr + %jump(0xdeadbeef) + +global test_frob_fp254_6_3: + // stack: ptr + %frob_fp254_6_3 + // stack: ptr + %jump(0xdeadbeef) + + +/// let Z` denote the complex conjugate of Z + +/// def frob_fp254_6_n(C0, C1, C2): +/// if n%2: +/// D0, D1, D2 = C0`, FROB_T1[n] * C1`, FROB_T2[n] * C2` +/// else: +/// D0, D1, D2 = C0 , FROB_T1[n] * C1 , FROB_T2[n] * C2 +/// return D0, D1, D2 + +%macro frob_fp254_6_1 + // stack: C0 , C1 , C2 + %conj_fp254_2 + // stack: D0 , C1 , C2 + %stack (x: 2, a: 2, y:2) -> (y, a, x) + // stack: C2 , C1 , D0 + %conj_fp254_2 + // stack: C2`, C1 , D0 + %frobt2_1 + // stack: D2 , C1 , D0 + %stack (x: 2, a: 2, y:2) -> (y, a, x) + // stack: D0 , C1 , D2 + %stack (x: 2, y: 2) -> (y, x) + // stack: C1 , D0 , D2 + %conj_fp254_2 + // stack: C1`, D0 , D2 + %frobt1_1 + // stack: D1 , D0 , D2 + %stack (x: 2, y: 2) -> (y, x) + // stack: D0 , D1 , D2 +%endmacro + +%macro frob_fp254_6_2 + // stack: C0, C1, C2 + %stack (x: 2, a: 2, y:2) -> (y, a, x) + // stack: C2, C1, C0 + %frobt2_2 + // stack: D2, C1, C0 + %stack (x: 2, a: 2, y:2) -> (y, a, x) + // stack: C0, C1, D2 + %stack (x: 2, y: 2) -> (y, x) + // stack: C1, C0, D2 + %frobt1_2 + // stack: D1, C0, D2 + %stack (x: 2, y: 2) -> (y, x) + // stack: D0, D1, D2 +%endmacro + +%macro frob_fp254_6_3 + // stack: C0 , C1 , C2 + %conj_fp254_2 + // stack: D0 , C1 , C2 + %stack (x: 2, a: 2, y:2) -> (y, a, x) + // stack: C2 , C1 , D0 + %conj_fp254_2 + // stack: C2`, C1 , D0 + %frobt2_3 + // stack: D2 , C1 , D0 + %stack (x: 2, a: 2, y:2) -> (y, a, x) + // stack: D0 , C1 , D2 + %stack (x: 2, y: 2) -> (y, x) + // stack: C1 , D0 , D2 + %conj_fp254_2 + // stack: C1`, D0 , D2 + %frobt1_3 + // stack: D1 , D0 , D2 + %stack (x: 2, y: 2) -> (y, x) + // stack: D0 , D1 , D2 +%endmacro + + +%macro frobz_1 + %frob_fp254_6_1 + PUSH 0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac + PUSH 0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470 + %scale_fp254_6 +%endmacro + +%macro frobz_2 + %frob_fp254_6_2 + PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49 + %scale_re_fp254_6 +%endmacro + +%macro frobz_3 + %frob_fp254_6_3 + PUSH 0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101 + PUSH 0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f + %scale_fp254_6 +%endmacro + +%macro frobz_6 + PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46 + %scale_re_fp254_6 +%endmacro + + +%macro frobt1_1 + PUSH 0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2 + PUSH 0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d + %mul_fp254_2 +%endmacro + +%macro frobt2_1 + PUSH 0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126 + PUSH 0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762 + %mul_fp254_2 +%endmacro + +%macro frobt1_2 + PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48 + %scale_fp254_2 +%endmacro + +%macro frobt2_2 + PUSH 0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe + %scale_fp254_2 +%endmacro + + +%macro frobt1_3 + PUSH 0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de + PUSH 0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d + %mul_fp254_2 +%endmacro + +%macro frobt2_3 + PUSH 0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f + PUSH 0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066 + %mul_fp254_2 +%endmacro diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm new file mode 100644 index 00000000..6214f385 --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm @@ -0,0 +1,60 @@ +// Returns reverse order divison y/x, modulo N +%macro divr_fp254 + // stack: x , y + %inv_fp254 + // stack: x^-1, y + MULFP254 +%endmacro + +// Non-deterministically provide the inverse x^-1 of x modulo N. +// If x === 0 mod N, this function panics. +// Although the official prover provides the unique inverse (inp, out, 72, check_inv_fp254_12) + // stack: inp, out, 72, check_inv_fp254_12, retdest + %jump(mul_fp254_12) +check_inv_fp254_12: + // stack: retdest + PUSH 72 + %load_fp254_12 + // stack: unit?, retdest + %assert_eq_unit_fp254_12 + // stack: retdest + JUMP + +%macro prover_inv_fp254_12 + PROVER_INPUT(ffe::bn254_base::component_11) + PROVER_INPUT(ffe::bn254_base::component_10) + PROVER_INPUT(ffe::bn254_base::component_9) + PROVER_INPUT(ffe::bn254_base::component_8) + PROVER_INPUT(ffe::bn254_base::component_7) + PROVER_INPUT(ffe::bn254_base::component_6) + PROVER_INPUT(ffe::bn254_base::component_5) + PROVER_INPUT(ffe::bn254_base::component_4) + PROVER_INPUT(ffe::bn254_base::component_3) + PROVER_INPUT(ffe::bn254_base::component_2) + PROVER_INPUT(ffe::bn254_base::component_1) + PROVER_INPUT(ffe::bn254_base::component_0) +%endmacro diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm new file mode 100644 index 00000000..af074714 --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm @@ -0,0 +1,1077 @@ +// Load a single value from bn254 pairings memory. +%macro mload_kernel_bn254_pairing + // stack: offset + %mload_kernel(@SEGMENT_KERNEL_BN_PAIRING) + // stack: value +%endmacro + +%macro mload_kernel_bn254_pairing(offset) + // stack: + PUSH $offset + // stack: offset + %mload_kernel(@SEGMENT_KERNEL_BN_PAIRING) + // stack: value +%endmacro + +// Store a single value to bn254 pairings memory. +%macro mstore_kernel_bn254_pairing + // stack: offset, value + %mstore_kernel(@SEGMENT_KERNEL_BN_PAIRING) + // stack: +%endmacro + +%macro mstore_kernel_bn254_pairing(offset) + // stack: value + PUSH $offset + // stack: offset, value + %mstore_kernel(@SEGMENT_KERNEL_BN_PAIRING) + // stack: +%endmacro + +// fp254_2 macros + +%macro load_fp254_2 + // stack: ptr + DUP1 + %add_const(1) + // stack: ind1, ptr + %mload_kernel_bn254_pairing + // stack: x1, ptr + SWAP1 + // stack: ind0, x1 + %mload_kernel_bn254_pairing + // stack: x0, x1 +%endmacro + +/// complex conjugate +%macro conj_fp254_2 + // stack: a, b + SWAP1 + PUSH 0 + SUBFP254 + SWAP1 + // stack: a, -b +%endmacro + +%macro scale_fp254_2 + // stack: c, x, y + SWAP2 + // stack: y, x, c + DUP3 + // stack: c, y, x, c + MULFP254 + // stack: cy, x, c + SWAP2 + // stack: c, x, cy + MULFP254 + // stack: cx, cy +%endmacro + +/// Given z = x + iy: Fp254_2, return complex conjugate z': Fp254_2 +/// where input is represented z.re, z.im and output as z'.im, z'.re +/// cost: 9; note this returns y, x for the output x + yi +%macro i9 + // stack: a , b + DUP2 + // stack: b, a , b + DUP2 + // stack: a , b, a , b + PUSH 9 + MULFP254 + // stack: 9a , b, a , b + SUBFP254 + // stack: 9a - b, a , b + SWAP2 + // stack: b , a, 9a - b + PUSH 9 + MULFP254 + // stack 9b , a, 9a - b + ADDFP254 + // stack: 9b + a, 9a - b +%endmacro + +%macro mul_fp254_2 + // stack: a, b, c, d + DUP4 + DUP3 + MULFP254 + // stack: bd, a, b, c, d + DUP4 + DUP3 + MULFP254 + // stack: ac , bd, a, b, c, d + SUBFP254 + // stack: ac - bd, a, b, c, d + SWAP4 + // stack: d, a, b, c, ac - bd + MULFP254 + // stack: ad, b, c, ac - bd + SWAP2 + // stack: c, b, ad, ac - bd + MULFP254 + // stack: bc , ad, ac - bd + ADDFP254 + // stack: bc + ad, ac - bd + SWAP1 + // stack: ac - bd, bc + ad +%endmacro + +// fp254_6 macros + +%macro load_fp254_6 + // stack: ptr + DUP1 + %add_const(4) + // stack: ind4, ptr + %mload_kernel_bn254_pairing + // stack: x4, ptr + DUP2 + %add_const(3) + // stack: ind3, x4, ptr + %mload_kernel_bn254_pairing + // stack: x3, x4, ptr + DUP3 + %add_const(2) + // stack: ind2, x3, x4, ptr + %mload_kernel_bn254_pairing + // stack: x2, x3, x4, ptr + DUP4 + %add_const(1) + // stack: ind1, x2, x3, x4, ptr + %mload_kernel_bn254_pairing + // stack: x1, x2, x3, x4, ptr + DUP5 + %add_const(5) + // stack: ind5, x1, x2, x3, x4, ptr + %mload_kernel_bn254_pairing + // stack: x5, x1, x2, x3, x4, ptr + SWAP5 + // stack: ind0, x1, x2, x3, x4, x5 + %mload_kernel_bn254_pairing + // stack: x0, x1, x2, x3, x4, x5 +%endmacro + +// cost: 6 loads + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40 +%macro load_fp254_6(ptr) + // stack: + PUSH $ptr + %add_const(5) + // stack: ind5 + %mload_kernel_bn254_pairing + // stack: x5 + PUSH $ptr + %add_const(4) + // stack: ind4, x5 + %mload_kernel_bn254_pairing + // stack: x4, x5 + PUSH $ptr + %add_const(3) + // stack: ind3, x4, x5 + %mload_kernel_bn254_pairing + // stack: x3, x4, x5 + PUSH $ptr + %add_const(2) + // stack: ind2, x3, x4, x5 + %mload_kernel_bn254_pairing + // stack: x2, x3, x4, x5 + PUSH $ptr + %add_const(1) + // stack: ind1, x2, x3, x4, x5 + %mload_kernel_bn254_pairing + // stack: x1, x2, x3, x4, x5 + PUSH $ptr + // stack: ind0, x1, x2, x3, x4, x5 + %mload_kernel_bn254_pairing + // stack: x0, x1, x2, x3, x4, x5 +%endmacro + +// cost: 6 stores + 6 swaps/dups + 5 adds = 6*4 + 6*1 + 5*2 = 40 +%macro store_fp254_6 + // stack: ptr, x0, x1, x2, x3, x4 , x5 + SWAP5 + // stack: x4, x0, x1, x2, x3, ptr, x5 + DUP6 + %add_const(4) + // stack: ind4, x4, x0, x1, x2, x3, ptr, x5 + %mstore_kernel_bn254_pairing + // stack: x0, x1, x2, x3, ptr, x5 + DUP5 + // stack: ind0, x0, x1, x2, x3, ptr, x5 + %mstore_kernel_bn254_pairing + // stack: x1, x2, x3, ptr, x5 + DUP4 + %add_const(1) + // stack: ind1, x1, x2, x3, ptr, x5 + %mstore_kernel_bn254_pairing + // stack: x2, x3, ptr, x5 + DUP3 + %add_const(2) + // stack: ind2, x2, x3, ptr, x5 + %mstore_kernel_bn254_pairing + // stack: x3, ptr, x5 + DUP2 + %add_const(3) + // stack: ind3, x3, ptr, x5 + %mstore_kernel_bn254_pairing + // stack: ptr, x5 + %add_const(5) + // stack: ind5, x5 + %mstore_kernel_bn254_pairing + // stack: +%endmacro + +// cost: 6 stores + 7 swaps/dups + 5 adds + 6 doubles = 6*4 + 7*1 + 5*2 + 6*2 = 53 +%macro store_fp254_6_double + // stack: ptr, x0, x1, x2, x3, x4, x5 + SWAP6 + // stack: x5, x0, x1, x2, x3, x4, ptr + PUSH 2 + MULFP254 + // stack: 2*x5, x0, x1, x2, x3, x4, ptr + DUP7 + %add_const(5) + // stack: ind5, 2*x5, x0, x1, x2, x3, x4, ptr + %mstore_kernel_bn254_pairing + // stack: x0, x1, x2, x3, x4, ptr + PUSH 2 + MULFP254 + // stack: 2*x0, x1, x2, x3, x4, ptr + DUP6 + // stack: ind0, 2*x0, x1, x2, x3, x4, ptr + %mstore_kernel_bn254_pairing + // stack: x1, x2, x3, x4, ptr + PUSH 2 + MULFP254 + // stack: 2*x1, x2, x3, x4, ptr + DUP5 + %add_const(1) + // stack: ind1, 2*x1, x2, x3, x4, ptr + %mstore_kernel_bn254_pairing + // stack: x2, x3, x4, ptr + PUSH 2 + MULFP254 + // stack: 2*x2, x3, x4, ptr + DUP4 + %add_const(2) + // stack: ind2, 2*x2, x3, x4, ptr + %mstore_kernel_bn254_pairing + // stack: x3, x4, ptr + PUSH 2 + MULFP254 + // stack: 2*x3, x4, ptr + DUP3 + %add_const(3) + // stack: ind3, 2*x3, x4, ptr + %mstore_kernel_bn254_pairing + // stack: x4, ptr + PUSH 2 + MULFP254 + // stack: 2*x4, ptr + SWAP1 + // stack: ptr, 2*x4 + %add_const(4) + // stack: ind4, 2*x4 + %mstore_kernel_bn254_pairing + // stack: +%endmacro + +// cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40 +%macro store_fp254_6(ptr) + // stack: x0, x1, x2, x3, x4, x5 + PUSH $ptr + // stack: ind0, x0, x1, x2, x3, x4, x5 + %mstore_kernel_bn254_pairing + // stack: x1, x2, x3, x4, x5 + PUSH $ptr + %add_const(1) + // stack: ind1, x1, x2, x3, x4, x5 + %mstore_kernel_bn254_pairing + // stack: x2, x3, x4, x5 + PUSH $ptr + %add_const(2) + // stack: ind2, x2, x3, x4, x5 + %mstore_kernel_bn254_pairing + // stack: x3, x4, x5 + PUSH $ptr + %add_const(3) + // stack: ind3, x3, x4, x5 + %mstore_kernel_bn254_pairing + // stack: x4, x5 + PUSH $ptr + %add_const(4) + // stack: ind4, x4, x5 + %mstore_kernel_bn254_pairing + // stack: x5 + PUSH $ptr + %add_const(5) + // stack: ind5, x5 + %mstore_kernel_bn254_pairing + // stack: +%endmacro + +// cost: store (40) + i9 (9) = 49 +%macro store_fp254_6_sh(ptr) + // stack: x0, x1, x2, x3, x4, x5 + PUSH $ptr + %add_const(2) + // stack: ind2, x0, x1, x2, x3, x4, x5 + %mstore_kernel_bn254_pairing + // stack: x1, x2, x3, x4, x5 + PUSH $ptr + %add_const(3) + // stack: ind3, x1, x2, x3, x4, x5 + %mstore_kernel_bn254_pairing + // stack: x2, x3, x4, x5 + PUSH $ptr + %add_const(4) + // stack: ind4, x2, x3, x4, x5 + %mstore_kernel_bn254_pairing + // stack: x3, x4, x5 + PUSH $ptr + %add_const(5) + // stack: ind5, x3, x4, x5 + %mstore_kernel_bn254_pairing + // stack: x4, x5 + %i9 + // stack: y5, y4 + PUSH $ptr + %add_const(1) + // stack: ind1, y5, y4 + %mstore_kernel_bn254_pairing + // stack: y4 + PUSH $ptr + // stack: ind0, y4 + %mstore_kernel_bn254_pairing + // stack: +%endmacro + +// cost: 6 +%macro dup_fp254_6_0 + // stack: f: 6 + DUP6 + DUP6 + DUP6 + DUP6 + DUP6 + DUP6 + // stack: f: 6, f: 6 +%endmacro + +// cost: 6 +%macro dup_fp254_6_2 + // stack: X: 2, f: 6 + DUP8 + DUP8 + DUP8 + DUP8 + DUP8 + DUP8 + // stack: f: 6, X: 2, f: 6 +%endmacro + +// cost: 6 +%macro dup_fp254_6_6 + // stack: X: 6, f: 6 + DUP12 + DUP12 + DUP12 + DUP12 + DUP12 + DUP12 + // stack: f: 6, X: 6, f: 6 +%endmacro + +// cost: 6 +%macro dup_fp254_6_7 + // stack: X: 7, f: 6 + DUP13 + DUP13 + DUP13 + DUP13 + DUP13 + DUP13 + // stack: f: 6, X: 7, f: 6 +%endmacro + +// cost: 6 +%macro dup_fp254_6_8 + // stack: X: 8, f: 6 + DUP14 + DUP14 + DUP14 + DUP14 + DUP14 + DUP14 + // stack: f: 6, X: 8, f: 6 +%endmacro + +/// multiply (a + bt + ct^2) by t: +/// t(a + bt + ct^2) = at + bt^2 + ct^3 = (9+i)c + at + bt^2 +%macro sh_fp254_6 + // stack: a, b, c + %stack (a: 2, b: 2, c: 2) -> (c, a, b) + // stack: c, a, b + %i9 + SWAP1 + // stack: (9+i)c, a, b +%endmacro + +// cost: 16 +%macro add_fp254_6 + // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 + SWAP7 + ADDFP254 + SWAP6 + // stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5 + SWAP7 + ADDFP254 + SWAP6 + // stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5 + SWAP7 + ADDFP254 + SWAP6 + // stack: f0, f4, f5, g0, h1, h2, h3, g4, g5 + SWAP7 + ADDFP254 + SWAP6 + // stack: f0, f5, g0, h1, h2, h3, h4, g5 + SWAP7 + ADDFP254 + SWAP6 + // stack: f0, g0, h1, h2, h3, h4, h5 + ADDFP254 + // stack: h0, h1, h2, h3, h4, h5 +%endmacro + +// cost: 18 +// add two fp254_6 elements with a to-be-popped stack term separating them +// (f: 6, X, g: 6) -> (f + g) +%macro add_fp254_6_hole + // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5 + SWAP8 + ADDFP254 + SWAP7 + // stack: f0, f2, f3, f4, f5, X, g0, h1, g2, g3, g4, g5 + SWAP8 + ADDFP254 + SWAP7 + // stack: f0, f3, f4, f5, X, g0, h1, h2, g3, g4, g5 + SWAP8 + ADDFP254 + SWAP7 + // stack: f0, f4, f5, X, g0, h1, h2, h3, g4, g5 + SWAP8 + ADDFP254 + SWAP7 + // stack: f0, f5, X, g0, h1, h2, h3, h4, g5 + SWAP8 + ADDFP254 + SWAP7 + // stack: f0, X, g0, h1, h2, h3, h4, h5 + SWAP1 + POP + ADDFP254 + // stack: h0, h1, h2, h3, h4, h5 +%endmacro + +// *reversed argument subtraction* cost: 17 +%macro subr_fp254_6 + // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 + SWAP7 + SUBFP254 + SWAP6 + // stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5 + SWAP7 + SUBFP254 + SWAP6 + // stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5 + SWAP7 + SUBFP254 + SWAP6 + // stack: f0, f4, f5, g0, h1, h2, h3, g4, g5 + SWAP7 + SUBFP254 + SWAP6 + // stack: f0, f5, g0, h1, h2, h3, h4, g5 + SWAP7 + SUBFP254 + SWAP6 + // stack: f0, g0, h1, h2, h3, h4, h5 + SWAP1 + SUBFP254 + // stack: h0, h1, h2, h3, h4, h5 +%endmacro + +// cost: 21 +%macro scale_re_fp254_6 + // stack: c , f0, f1, f2, f3, f4, f5 + SWAP6 + DUP7 + MULFP254 + SWAP6 + // stack: c , f0, f1, f2, f3, f4, c * f5 + SWAP5 + DUP6 + MULFP254 + SWAP5 + // stack: c , f0, f1, f2, f3, c * f4, c * f5 + SWAP4 + DUP5 + MULFP254 + SWAP4 + // stack: c , f0, f1, f2, c * f3, c * f4, c * f5 + SWAP3 + DUP4 + MULFP254 + SWAP3 + // stack: c , f0, f1, c * f2, c * f3, c *f 4, c * f5 + SWAP2 + DUP3 + MULFP254 + SWAP2 + // stack: c , f0, c * f1, c * f2, c * f3, c * f4, c * f5 + MULFP254 + // stack: c * f0, c * f1, c * f2, c * f3, c * f4, c * f5 +%endmacro + +/// cost: +/// +/// G0 + G1t + G2t^2 = (a+bi) * (F0 + F1t + F2t^2) +/// = (a+bi)F0 + (a+bi)F1t + (a+bi)F2t^2 +/// +/// G0 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i +/// G1 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i +/// G2 = (a+bi)(f2+f2_i) = (af2 - bf2_) + (bf2 + af2_)i + +%macro scale_fp254_6 + // stack: a, b, f0, f0_, f1, f1_, f2, f2_ + DUP2 + DUP5 + MULFP254 + // stack: bf0_, a, b, f0, f0_, f1, f1_, f2, f2_ + DUP2 + DUP5 + MULFP254 + // stack: af0, bf0_, a, b, f0, f0_, f1, f1_, f2, f2_ + SUBFP254 + // stack: g0, a, b, f0, f0_, f1, f1_, f2, f2_ + SWAP3 + // stack: f0, a, b, g0, f0_, f1, f1_, f2, f2_ + DUP3 + MULFP254 + // stack: bf0, a, b, g0, f0_, f1, f1_, f2, f2_ + SWAP1 + SWAP4 + // stack: f0_, bf0, b, g0, a, f1, f1_, f2, f2_ + DUP5 + MULFP254 + // stack: af0_, bf0, b, g0, a, f1, f1_, f2, f2_ + ADDFP254 + // stack: g0_, b, g0, a, f1, f1_, f2, f2_ + SWAP3 + // stack: a, b, g0, g0_, f1, f1_, f2, f2_ + DUP2 + DUP7 + MULFP254 + // stack: bf1_, a, b, g0, g0_, f1, f1_, f2, f2_ + DUP2 + DUP7 + MULFP254 + // stack: af1, bf1_, a, b, g0, g0_, f1, f1_, f2, f2_ + SUBFP254 + // stack: g1, a, b, g0, g0_, f1, f1_, f2, f2_ + SWAP5 + // stack: f1, a, b, g0, g0_, g1, f1_, f2, f2_ + DUP3 + MULFP254 + // stack: bf1, a, b, g0, g0_, g1, f1_, f2, f2_ + SWAP1 + SWAP6 + // stack: f1_, bf1, b, g0, g0_, g1, a, f2, f2_ + DUP7 + MULFP254 + // stack: af1_, bf1, b, g0, g0_, g1, a, f2, f2_ + ADDFP254 + // stack: g1_, b, g0, g0_, g1, a, f2, f2_ + SWAP5 + // stack: a, b, g0, g0_, g1, g1_, f2, f2_ + DUP2 + DUP9 + MULFP254 + // stack: bf2_, a, b, g0, g0_, g1, g1_, f2, f2_ + DUP2 + DUP9 + MULFP254 + // stack: af2, bf2_, a, b, g0, g0_, g1, g1_, f2, f2_ + SUBFP254 + // stack: g2, a, b, g0, g0_, g1, g1_, f2, f2_ + SWAP7 + // stack: f2, a, b, g0, g0_, g1, g1_, g2, f2_ + SWAP8 + // stack: f2_, a, b, g0, g0_, g1, g1_, g2, f2 + MULFP254 + // stack: af2_, b, g0, g0_, g1, g1_, g2, f2 + SWAP7 + // stack: f2, b, g0, g0_, g1, g1_, g2, af2_ + MULFP254 + // stack: bf2, g0, g0_, g1, g1_, g2, af2_ + SWAP1 + SWAP6 + // stack: af2_, bf2, g0_, g1, g1_, g2, g0 + ADDFP254 + // stack: g2_, g0_, g1, g1_, g2, g0 + SWAP5 + // stack: g0, g0_, g1, g1_, g2, g2_ +%endmacro + +/// cost: 1 i9 (9) + 16 dups + 15 swaps + 12 muls + 6 adds/subs = 58 +/// +/// G0 + G1t + G2t^2 = (a+bi)t * (F0 + F1t + F2t^2) +/// = (c+di)F2 + (a+bi)F0t + (a+bi)F1t^2 +/// where c+di = (a+bi)(9+i) = (9a-b) + (a+9b)i +/// +/// G0 = (c+di)(f2+f2_i) = (cf2 - df2_) + (df2 + cf2_)i +/// G1 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i +/// G2 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i + +%macro scale_fp254_6_sh + // stack: a, b, f0, f0_, f1, f1_, f2, f2_ + DUP6 + DUP3 + MULFP254 + // stack: bf1_, a, b, f0, f0_, f1, f1_, f2, f2_ + DUP6 + DUP3 + MULFP254 + // stack: af1 , bf1_, a, b, f0, f0_, f1, f1_, f2, f2_ + SUBFP254 + // stack: g2, a, b, f0, f0_, f1, f1_, f2, f2_ + SWAP7 + // stack: f2, a, b, f0, f0_, f1, f1_, g2, f2_ + SWAP5 + // stack: f1, a, b, f0, f0_, f2, f1_, g2, f2_ + DUP3 + MULFP254 + // stack: bf1, a, b, f0, f0_, f2, f1_, g2, f2_ + SWAP1 + SWAP6 + // stack: f1_, bf1, b, f0, f0_, f2, a, g2, f2_ + DUP7 + MULFP254 + // stack: af1_, bf1, b, f0, f0_, f2, a, g2, f2_ + ADDFP254 + // stack: g2_, b, f0, f0_, f2, a, g2, f2_ + SWAP7 + // stack: f2_, b, f0, f0_, f2, a, g2, g2_ + DUP4 + DUP3 + MULFP254 + // stack: bf0_, f2_, b, f0, f0_, f2, a, g2, g2_ + DUP4 + DUP8 + MULFP254 + // stack: af0, bf0_, f2_, b, f0, f0_, f2, a, g2, g2_ + SUBFP254 + // stack: g1, f2_, b, f0, f0_, f2, a, g2, g2_ + SWAP5 + // stack: f2, f2_, b, f0, f0_, g1, a, g2, g2_ + SWAP3 + // stack: f0, f2_, b, f2, f0_, g1, a, g2, g2_ + DUP3 + MULFP254 + // stack: bf0, f2_, b, f2, f0_, g1, a, g2, g2_ + SWAP1 + SWAP4 + // stack: f0_, bf0, b, f2, f2_, g1, a, g2, g2_ + DUP7 + MULFP254 + // stack: af0_, bf0, b, f2, f2_, g1, a, g2, g2_ + ADDFP254 + // stack: g1_, b, f2, f2_, g1, a, g2, g2_ + SWAP5 + // stack: a, b, f2, f2_, g1, g1_, g2, g2_ + %i9 + // stack: d, c, f2, f2_, g1, g1_, g2, g2_ + DUP4 + DUP2 + MULFP254 + // stack: df2_, d, c, f2, f2_, g1, g1_, g2, g2_ + DUP4 + DUP4 + MULFP254 + // stack: cf2, df2_, d, c, f2, f2_, g1, g1_, g2, g2_ + SUBFP254 + // stack: g0, d, c, f2, f2_, g1, g1_, g2, g2_ + SWAP3 + // stack: f2, d, c, g0, f2_, g1, g1_, g2, g2_ + MULFP254 + // stack: df2, c, g0, f2_, g1, g1_, g2, g2_ + SWAP3 + MULFP254 + // stack: cf2_, g0, df2, g1, g1_, g2, g2_ + SWAP1 + SWAP2 + // stack: df2, cf2_, g0, g1, g1_, g2, g2_ + ADDFP254 + // stack: g0_, g0, g1, g1_, g2, g2_ + SWAP1 + // stack: g0, g0_, g1, g1_, g2, g2_ +%endmacro + +/// cost: 1 i9 (9) + 16 dups + 17 swaps + 12 muls + 6 adds/subs = 60 +/// +/// G0 + G1t + G2t^2 = (a+bi)t^2 * (F0 + F1t + F2t^2) +/// = (c+di)F1 + (c+di)F2t + (a+bi)F0t^2 +/// where c+di = (a+bi)(9+i) = (9a-b) + (a+9b)i +/// +/// G0 = (c+di)(f1+f1_i) = (cf1 - df1_) + (df1 + cf1_)i +/// G1 = (a+bi)(f2+f2_i) = (cf2 - df2_) + (df2 + cf2_)i +/// G2 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i + +%macro scale_fp254_6_sh2 + // stack: a, b, f0, f0_, f1, f1_, f2, f2_ + DUP4 + DUP3 + MULFP254 + // stack: bf0_, a, b, f0, f0_, f1, f1_, f2, f2_ + DUP4 + DUP3 + MULFP254 + // stack: af0, bf0_, a, b, f0, f0_, f1, f1_, f2, f2_ + SUBFP254 + // stack: g2, a, b, f0, f0_, f1, f1_, f2, f2_ + SWAP7 + SWAP3 + // stack: f0, a, b, f2, f0_, f1, f1_, g2, f2_ + DUP3 + MULFP254 + // stack: bf0, a, b, f2, f0_, f1, f1_, g2, f2_ + SWAP1 + SWAP4 + // stack: f0_, bf0, b, f2, a, f1, f1_, g2, f2_ + DUP5 + MULFP254 + // stack: af0_, bf0, b, f2, a, f1, f1_, g2, f2_ + ADDFP254 + // stack: g2_, b, f2, a, f1, f1_, g2, f2_ + SWAP7 + SWAP3 + // stack: a, b, f2, f2_, f1, f1_, g2, g2_ + %i9 + // stack: d, c, f2, f2_, f1, f1_, g2, g2_ + DUP4 + DUP2 + MULFP254 + // stack: df2_, d, c, f2, f2_, f1, f1_, g2, g2_ + DUP4 + DUP4 + MULFP254 + // stack: cf2, df2_, d, c, f2, f2_, f1, f1_, g2, g2_ + SUBFP254 + // stack: g1, d, c, f2, f2_, f1, f1_, g2, g2_ + SWAP5 + SWAP3 + // stack: f2, d, c, f1, f2_, g1, f1_, g2, g2_ + DUP2 + MULFP254 + // stack: df2, d, c, f1, f2_, g1, f1_, g2, g2_ + SWAP1 + SWAP4 + // stack: f2_, df2, c, f1, d, g1, f1_, g2, g2_ + DUP3 + MULFP254 + // stack: cf2_, df2, c, f1, d, g1, f1_, g2, g2_ + ADDFP254 + // stack: g1_, c, f1, d, g1, f1_, g2, g2_ + SWAP5 + // stack: f1_, c, f1, d, g1, g1_, g2, g2_ + DUP1 + DUP5 + MULFP254 + // stack: df1_, f1_, c, f1, d, g1, g1_, g2, g2_ + DUP4 + DUP4 + MULFP254 + // stack: cf1, df1_, f1_, c, f1, d, g1, g1_, g2, g2_ + SUBFP254 + // stack: g0, f1_, c, f1, d, g1, g1_, g2, g2_ + SWAP3 + // stack: f1, f1_, c, g0, d, g1, g1_, g2, g2_ + SWAP2 + MULFP254 + // stack: cf1_, f1, g0, d, g1, g1_, g2, g2_ + SWAP3 + MULFP254 + // stack: df1, g0, cf1_, g1, g1_, g2, g2_ + SWAP1 + SWAP2 + // stack: cf1_, df1, g0, g1, g1_, g2, g2_ + ADDFP254 + // stack: g0_, g0, g1, g1_, g2, g2_ + SWAP1 + // stack: g0, g0_, g1, g1_, g2, g2_ +%endmacro + +%macro load_fp254_12 + // stack: ptr + DUP1 + %add_const(10) + // stack: ind10, ptr + %mload_kernel_bn254_pairing + // stack: x10, ptr + DUP2 + %add_const(9) + // stack: ind09, x10, ptr + %mload_kernel_bn254_pairing + // stack: x09, x10, ptr + DUP3 + %add_const(8) + // stack: ind08, x09, x10, ptr + %mload_kernel_bn254_pairing + // stack: x08, x09, x10, ptr + DUP4 + %add_const(7) + // stack: ind07, x08, x09, x10, ptr + %mload_kernel_bn254_pairing + // stack: x07, x08, x09, x10, ptr + DUP5 + %add_const(6) + // stack: ind06, x07, x08, x09, x10, ptr + %mload_kernel_bn254_pairing + // stack: x06, x07, x08, x09, x10, ptr + DUP6 + %add_const(5) + // stack: ind05, x06, x07, x08, x09, x10, ptr + %mload_kernel_bn254_pairing + // stack: x05, x06, x07, x08, x09, x10, ptr + DUP7 + %add_const(4) + // stack: ind04, x05, x06, x07, x08, x09, x10, ptr + %mload_kernel_bn254_pairing + // stack: x04, x05, x06, x07, x08, x09, x10, ptr + DUP8 + %add_const(3) + // stack: ind03, x04, x05, x06, x07, x08, x09, x10, ptr + %mload_kernel_bn254_pairing + // stack: x03, x04, x05, x06, x07, x08, x09, x10, ptr + DUP9 + %add_const(2) + // stack: ind02, x03, x04, x05, x06, x07, x08, x09, x10, ptr + %mload_kernel_bn254_pairing + // stack: x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr + DUP10 + %add_const(1) + // stack: ind01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr + %mload_kernel_bn254_pairing + // stack: x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr + DUP11 + %add_const(11) + // stack: ind11, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr + %mload_kernel_bn254_pairing + // stack: x11, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr + SWAP11 + // stack: ind00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11 + %mload_kernel_bn254_pairing + // stack: x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11 +%endmacro + +%macro store_fp254_12 + // stack: ptr, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11 + SWAP11 + // stack: x10, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11 + DUP12 + %add_const(10) + // stack: ind10, x10, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11 + DUP11 + // stack: ind00, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11 + DUP10 + %add_const(01) + // stack: ind01, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11 + DUP9 + %add_const(02) + // stack: ind02, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: x03, x04, x05, x06, x07, x08, x09, ptr, x11 + DUP8 + %add_const(03) + // stack: ind03, x03, x04, x05, x06, x07, x08, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: x04, x05, x06, x07, x08, x09, ptr, x11 + DUP7 + %add_const(04) + // stack: ind04, x04, x05, x06, x07, x08, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: x05, x06, x07, x08, x09, ptr, x11 + DUP6 + %add_const(05) + // stack: ind05, x05, x06, x07, x08, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: x06, x07, x08, x09, ptr, x11 + DUP5 + %add_const(06) + // stack: ind06, x06, x07, x08, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: x07, x08, x09, ptr, x11 + DUP4 + %add_const(07) + // stack: ind07, x07, x08, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: x08, x09, ptr, x11 + DUP3 + %add_const(08) + // stack: ind08, x08, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: x09, ptr, x11 + DUP2 + %add_const(09) + // stack: ind09, x09, ptr, x11 + %mstore_kernel_bn254_pairing + // stack: ptr, x11 + %add_const(11) + // stack: ind11, x11 + %mstore_kernel_bn254_pairing + // stack: +%endmacro + +/// moves fp254_12 from src..src+12 to dest..dest+12 +/// these should not overlap. leaves dest on stack +%macro move_fp254_12 + // stack: src, dest + DUP1 + // stack: ind00, src, dest + %mload_kernel_bn254_pairing + // stack: x00, src, dest + DUP3 + // stack: ind00', x00, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + DUP1 + %add_const(1) + // stack: ind01, src, dest + %mload_kernel_bn254_pairing + // stack: x01, src, dest + DUP3 + %add_const(1) + // stack: ind01', x01, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + DUP1 + %add_const(2) + // stack: ind02, src, dest + %mload_kernel_bn254_pairing + // stack: x02, src, dest + DUP3 + %add_const(2) + // stack: ind02', x02, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + DUP1 + %add_const(3) + // stack: ind03, src, dest + %mload_kernel_bn254_pairing + // stack: x03, src, dest + DUP3 + %add_const(3) + // stack: ind03', x03, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + DUP1 + %add_const(4) + // stack: ind04, src, dest + %mload_kernel_bn254_pairing + // stack: x04, src, dest + DUP3 + %add_const(4) + // stack: ind04', x04, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + DUP1 + %add_const(5) + // stack: ind05, src, dest + %mload_kernel_bn254_pairing + // stack: x05, src, dest + DUP3 + %add_const(5) + // stack: ind05', x05, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + DUP1 + %add_const(6) + // stack: ind06, src, dest + %mload_kernel_bn254_pairing + // stack: x06, src, dest + DUP3 + %add_const(6) + // stack: ind06', x06, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + DUP1 + %add_const(7) + // stack: ind07, src, dest + %mload_kernel_bn254_pairing + // stack: x07, src, dest + DUP3 + %add_const(7) + // stack: ind07', x07, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + DUP1 + %add_const(8) + // stack: ind08, src, dest + %mload_kernel_bn254_pairing + // stack: x08, src, dest + DUP3 + %add_const(8) + // stack: ind08', x08, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + DUP1 + %add_const(9) + // stack: ind09, src, dest + %mload_kernel_bn254_pairing + // stack: x09, src, dest + DUP3 + %add_const(9) + // stack: ind09', x09, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + DUP1 + %add_const(10) + // stack: ind10, src, dest + %mload_kernel_bn254_pairing + // stack: x10, src, dest + DUP3 + %add_const(10) + // stack: ind10', x10, src, dest + %mstore_kernel_bn254_pairing + // stack: src, dest + %add_const(11) + // stack: ind11, dest + %mload_kernel_bn254_pairing + // stack: x11, dest + DUP2 + %add_const(11) + // stack: ind11', x11, dest + %mstore_kernel_bn254_pairing +%endmacro + +%macro assert_eq_unit_fp254_12 + %assert_eq_const(1) + %assert_zero + %assert_zero + %assert_zero + %assert_zero + %assert_zero + %assert_zero + %assert_zero + %assert_zero + %assert_zero + %assert_zero + %assert_zero +%endmacro diff --git a/evm/src/cpu/kernel/asm/curve/bn254/moddiv.asm b/evm/src/cpu/kernel/asm/curve/bn254/moddiv.asm deleted file mode 100644 index 780473b9..00000000 --- a/evm/src/cpu/kernel/asm/curve/bn254/moddiv.asm +++ /dev/null @@ -1,39 +0,0 @@ -/// Division modulo 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47, the BN254 base field order -/// To replace with more efficient method using non-determinism later. - -// Returns y * (x^-1) where the inverse is taken modulo N -%macro moddiv - // stack: x, y - %inverse - // stack: x^-1, y - %mulmodn -%endmacro - -%macro mulmodn - // stack: x, y - %bn_base - // stack: N, x, y - SWAP2 - // stack: y, x, N - MULMOD -%endmacro - -%macro squaremodn - // stack: x - DUP1 - // stack: x, x - %mulmodn -%endmacro - -// Non-deterministically provide the inverse modulo N. -%macro inverse - // stack: x - PROVER_INPUT(ff::bn254_base::inverse) - // stack: x^-1, x - %stack (inv, x) -> (inv, x, @BN_BASE, inv) - // stack: x^-1, x, N, x^-1 - MULMOD - // stack: x^-1 * x, x^-1 - %assert_eq_const(1) - // stack: x^-1 -%endmacro diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm deleted file mode 100644 index 2f4b9024..00000000 --- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm +++ /dev/null @@ -1,166 +0,0 @@ -/// Note: uncomment this to test - -/// global test_mul_Fp12: -/// // stack: f, in0 , f', g, in1 , g', in1, out, in0, out -/// DUP7 -/// // stack: in0, f, in0 , f', g, in1 , g', in1, out, in0, out -/// %store_fp6 -/// // stack: in0 , f', g, in1 , g', in1, out, in0, out -/// %add_const(6) -/// // stack: in0', f', g, in1 , g', in1, out, in0, out -/// %store_fp6 -/// // stack: g, in1 , g', in1, out, in0, out -/// DUP7 -/// // stack: in1, g, in1 , g', in1, out, in0, out -/// %store_fp6 -/// // stack: in1 , g', in1, out, in0, out -/// %add_const(6) -/// // stack: in1', g', in1, out, in0, out -/// %store_fp6 -/// // stack: in1, out, in0, out -/// PUSH ret_stack -/// // stack: ret_stack, in1, out, in0, out -/// SWAP3 -/// // stack: in0, in1, out, ret_stack, out -/// %jump(mul_Fp12) -/// ret_stack: -/// // stack: out -/// DUP1 %add_const(6) -/// // stack: out', out -/// %load_fp6 -/// // stack: h', out -/// DUP7 -/// // stack: out, h', out -/// %load_fp6 -/// // stack: h, h', out -/// %jump(0xdeadbeef) - - -/// fp6 functions: -/// fn | num | ops | cost -/// ------------------------- -/// load | 8 | 40 | 320 -/// store | 5 | 40 | 200 -/// dup | 5 | 6 | 30 -/// swap | 4 | 16 | 64 -/// add | 4 | 16 | 64 -/// subr | 1 | 17 | 17 -/// mul | 3 | 157 | 471 -/// i9 | 1 | 9 | 9 -/// -/// lone stack operations: -/// op | num -/// ------------ -/// ADD | 3 -/// SWAP | 2 -/// DUP | 6 -/// PUSH | 6 -/// POP | 2 -/// JUMP | 1 -/// -/// TOTAL: 1196 - -/// inputs: -/// F = f + f'z -/// G = g + g'z -/// -/// output: -/// H = h + h'z = FG -/// -/// h = fg + sh(f'g') -/// h' = (f+f')(g+g') - fg - f'g' -/// -/// memory pointers [ind' = ind+6] -/// {in0: f, in0: f', in1: g, in1':g', out: h, out': h'} -/// -/// f, f', g, g' consist of six elements on the stack - -global mul_Fp12: - // stack: in0, in1, out - DUP1 %add_const(6) - // stack: in0', in0, in1, out - %load_fp6 - // stack: f', in0, in1, out - DUP8 %add_const(6) - // stack: in1', f', in0, in1, out - %load_fp6 - // stack: g', f', in0, in1, out - PUSH ret_1 - // stack: ret_1, g', f', in0, in1, out - %dup_fp6_7 - // stack: f', ret_1, g', f', in0, in1, out - %dup_fp6_7 - // stack: g', f', ret_1, g', f', in0, in1, out - %jump(mul_fp6) -ret_1: - // stack: f'g', g' , f', in0, in1, out - %dup_fp6_0 - // stack: f'g', f'g', g' , f', in0, in1, out - %store_fp6_sh(100) - // stack: f'g', g' , f', in0, in1, out {100: sh(f'g')} - %store_fp6(106) - // stack: g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} - DUP13 - // stack: in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} - DUP15 - // stack: in1, in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} - %load_fp6 - // stack: g , in0, g' , f', in0, in1, out {100: sh(f'g'), 106: f'g'} - %swap_fp6_hole - // stack: g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'} - %dup_fp6_7 - // stack: g,g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'} - %add_fp6 - // stack: g+g', in0, g , f', in0, in1, out {100: sh(f'g'), 106: f'g'} - %swap_fp6_hole - // stack: g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} - PUSH ret_2 - // stack: ret_2, g, in0, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} - SWAP7 - // stack: in0, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} - %load_fp6 - // stack: f, g, ret_2, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} - %jump(mul_fp6) -ret_2: - // stack: fg, g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g'} - %store_fp6(112) - // stack: g+g', f', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %swap_fp6 - // stack: f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - PUSH ret_3 - // stack: ret_3, f', g+g', in0, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - SWAP13 - // stack: in0, f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %load_fp6 - // stack: f,f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %add_fp6 - // stack: f+f', g+g', ret_3, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %jump(mul_fp6) -ret_3: - // stack: (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %load_fp6(112) - // stack: fg, (f+f')(g+g'), in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %swap_fp6 - // stack: (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %dup_fp6_6 - // stack: fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %load_fp6(106) - // stack: f'g',fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %add_fp6 - // stack: f'g'+fg, (f+f')(g+g'), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %subr_fp6 - // stack: (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - DUP14 %add_const(6) - // stack: out', (f+f')(g+g') - (f'g'+fg), fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %store_fp6 - // stack: fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %load_fp6(100) - // stack: sh(f'g') , fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %add_fp6 - // stack: sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - DUP8 - // stack: out, sh(f'g') + fg, in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %store_fp6 - // stack: in1, out {100: sh(f'g'), 106: f'g', 112: fg} - %pop2 - JUMP diff --git a/evm/src/cpu/kernel/asm/fields/fp6_macros.asm b/evm/src/cpu/kernel/asm/fields/fp6_macros.asm deleted file mode 100644 index b575c234..00000000 --- a/evm/src/cpu/kernel/asm/fields/fp6_macros.asm +++ /dev/null @@ -1,314 +0,0 @@ -// cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40 -%macro load_fp6 - // stack: ptr - DUP1 %add_const(4) - // stack: ind4, ptr - %mload_kernel_general - // stack: x4, ptr - DUP2 %add_const(3) - // stack: ind3, x4, ptr - %mload_kernel_general - // stack: x3, x4, ptr - DUP3 %add_const(2) - // stack: ind2, x3, x4, ptr - %mload_kernel_general - // stack: x2, x3, x4, ptr - DUP4 %add_const(1) - // stack: ind1, x2, x3, x4, ptr - %mload_kernel_general - // stack: x1, x2, x3, x4, ptr - DUP5 %add_const(5) - // stack: ind5, x1, x2, x3, x4, ptr - %mload_kernel_general - // stack: x5, x1, x2, x3, x4, ptr - SWAP5 - // stack: ind0, x1, x2, x3, x4, x5 - %mload_kernel_general - // stack: x0, x1, x2, x3, x4, x5 -%endmacro - -// cost: 6 loads + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40 -%macro load_fp6(ptr) - // stack: - PUSH $ptr %add_const(5) - // stack: ind5 - %mload_kernel_general - // stack: x5 - PUSH $ptr %add_const(4) - // stack: ind4, x5 - %mload_kernel_general - // stack: x4, x5 - PUSH $ptr %add_const(3) - // stack: ind3, x4, x5 - %mload_kernel_general - // stack: x3, x4, x5 - PUSH $ptr %add_const(2) - // stack: ind2, x3, x4, x5 - %mload_kernel_general - // stack: x2, x3, x4, x5 - PUSH $ptr %add_const(1) - // stack: ind1, x2, x3, x4, x5 - %mload_kernel_general - // stack: x1, x2, x3, x4, x5 - PUSH $ptr - // stack: ind0, x1, x2, x3, x4, x5 - %mload_kernel_general - // stack: x0, x1, x2, x3, x4, x5 -%endmacro - -// cost: 6 stores + 6 swaps/dups + 5 adds = 6*4 + 6*1 + 5*2 = 40 -%macro store_fp6 - // stack: ptr, x0, x1, x2, x3, x4 , x5 - SWAP5 - // stack: x4, x0, x1, x2, x3, ptr, x5 - DUP6 %add_const(4) - // stack: ind4, x4, x0, x1, x2, x3, ptr, x5 - %mstore_kernel_general - // stack: x0, x1, x2, x3, ptr, x5 - DUP5 - // stack: ind0, x0, x1, x2, x3, ptr, x5 - %mstore_kernel_general - // stack: x1, x2, x3, ptr, x5 - DUP4 %add_const(1) - // stack: ind1, x1, x2, x3, ptr, x5 - %mstore_kernel_general - // stack: x2, x3, ptr, x5 - DUP3 %add_const(2) - // stack: ind2, x2, x3, ptr, x5 - %mstore_kernel_general - // stack: x3, ptr, x5 - DUP2 %add_const(3) - // stack: ind3, x3, ptr, x5 - %mstore_kernel_general - // stack: ptr, x5 - %add_const(5) - // stack: ind5, x5 - %mstore_kernel_general - // stack: -%endmacro - -// cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40 -%macro store_fp6(ptr) - // stack: x0, x1, x2, x3, x4, x5 - PUSH $ptr - // stack: ind0, x0, x1, x2, x3, x4, x5 - %mstore_kernel_general - // stack: x1, x2, x3, x4, x5 - PUSH $ptr %add_const(1) - // stack: ind1, x1, x2, x3, x4, x5 - %mstore_kernel_general - // stack: x2, x3, x4, x5 - PUSH $ptr %add_const(2) - // stack: ind2, x2, x3, x4, x5 - %mstore_kernel_general - // stack: x3, x4, x5 - PUSH $ptr %add_const(3) - // stack: ind3, x3, x4, x5 - %mstore_kernel_general - // stack: x4, x5 - PUSH $ptr %add_const(4) - // stack: ind4, x4, x5 - %mstore_kernel_general - // stack: x5 - PUSH $ptr %add_const(5) - // stack: ind5, x5 - %mstore_kernel_general - // stack: -%endmacro - -// cost: store (40) + i9 (9) = 49 -%macro store_fp6_sh(ptr) - // stack: x0, x1, x2, x3, x4, x5 - PUSH $ptr %add_const(2) - // stack: ind2, x0, x1, x2, x3, x4, x5 - %mstore_kernel_general - // stack: x1, x2, x3, x4, x5 - PUSH $ptr %add_const(3) - // stack: ind3, x1, x2, x3, x4, x5 - %mstore_kernel_general - // stack: x2, x3, x4, x5 - PUSH $ptr %add_const(4) - // stack: ind4, x2, x3, x4, x5 - %mstore_kernel_general - // stack: x3, x4, x5 - PUSH $ptr %add_const(5) - // stack: ind5, x3, x4, x5 - %mstore_kernel_general - // stack: x4, x5 - %i9 - // stack: y5, y4 - PUSH $ptr %add_const(1) - // stack: ind1, y5, y4 - %mstore_kernel_general - // stack: y4 - PUSH $ptr - // stack: ind0, y4 - %mstore_kernel_general - // stack: -%endmacro - -// cost: 9; note this returns y, x for the output x + yi -%macro i9 - // stack: a , b - DUP2 - // stack: b, a, b - DUP2 - // stack: a , b, a , b - PUSH 9 MULFP254 - // stack: 9a , b, a , b - SUBFP254 - // stack: 9a - b, a , b - SWAP2 - // stack: b , a, 9a - b - PUSH 9 MULFP254 - // stack 9b , a, 9a - b - ADDFP254 - // stack: 9b + a, 9a - b -%endmacro - -// cost: 6 -%macro dup_fp6_0 - // stack: f: 6 - DUP6 - DUP6 - DUP6 - DUP6 - DUP6 - DUP6 - // stack: f: 6, g: 6 -%endmacro - -// cost: 6 -%macro dup_fp6_6 - // stack: f: 6, g: 6 - DUP12 - DUP12 - DUP12 - DUP12 - DUP12 - DUP12 - // stack: g: 6, f: 6, g: 6 -%endmacro - -// cost: 6 -%macro dup_fp6_7 - // stack: f: 6, g: 6 - DUP13 - DUP13 - DUP13 - DUP13 - DUP13 - DUP13 - // stack: g: 6, f: 6, g: 6 -%endmacro - -// cost: 16 -%macro swap_fp6 - // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 - SWAP6 - // stack: g0, f1, f2, f3, f4, f5, f0, g1, g2, g3, g4, g5 - SWAP1 - SWAP7 - SWAP1 - // stack: g0, g1, f2, f3, f4, f5, f0, f1, g2, g3, g4, g5 - SWAP2 - SWAP8 - SWAP2 - // stack: g0, g1, g2, f3, f4, f5, f0, f1, f2, g3, g4, g5 - SWAP3 - SWAP9 - SWAP3 - // stack: g0, g1, g2, g3, f4, f5, f0, f1, f2, f3, g4, g5 - SWAP4 - SWAP10 - SWAP4 - // stack: g0, g1, g2, g3, g4, f5, f0, f1, f2, f3, f4, g5 - SWAP5 - SWAP11 - SWAP5 - // stack: g0, g1, g2, g3, g4, g5, f0, f1, f2, f3, f4, f5 -%endmacro - -// cost: 16 -// swap two fp6 elements with a stack term separating them -// (f: 6, x, g: 6) -> (g: 6, x, f: 6) -%macro swap_fp6_hole - // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5 - SWAP7 - // stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5 - SWAP1 - SWAP8 - SWAP1 - // stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5 - SWAP2 - SWAP9 - SWAP2 - // stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5 - SWAP3 - SWAP10 - SWAP3 - // stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5 - SWAP4 - SWAP11 - SWAP4 - // stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5 - SWAP5 - SWAP12 - SWAP5 - // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5 -%endmacro - -// cost: 16 -%macro add_fp6 - // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 - SWAP7 - ADDFP254 - SWAP6 - // stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5 - SWAP7 - ADDFP254 - SWAP6 - // stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5 - SWAP7 - ADDFP254 - SWAP6 - // stack: f0, f4, f5, g0, h1, h2, h3, g4, g5 - SWAP7 - ADDFP254 - SWAP6 - // stack: f0, f5, g0, h1, h2, h3, h4, g5 - SWAP7 - ADDFP254 - SWAP6 - // stack: f0, g0, h1, h2, h3, h4, h5 - ADDFP254 - // stack: h0, h1, h2, h3, h4, h5 -%endmacro - -// *reversed argument subtraction* cost: 17 -%macro subr_fp6 - // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5 - SWAP7 - SUBFP254 - SWAP6 - // stack: f0, f2, f3, f4, f5, g0, h1, g2, g3, g4, g5 - SWAP7 - SUBFP254 - SWAP6 - // stack: f0, f3, f4, f5, g0, h1, h2, g3, g4, g5 - SWAP7 - SUBFP254 - SWAP6 - // stack: f0, f4, f5, g0, h1, h2, h3, g4, g5 - SWAP7 - SUBFP254 - SWAP6 - // stack: f0, f5, g0, h1, h2, h3, h4, g5 - SWAP7 - SUBFP254 - SWAP6 - // stack: f0, g0, h1, h2, h3, h4, h5 - SWAP1 - SUBFP254 - // stack: h0, h1, h2, h3, h4, h5 -%endmacro diff --git a/evm/src/cpu/kernel/asm/hash/ripemd/box.asm b/evm/src/cpu/kernel/asm/hash/ripemd/box.asm index d60d9b8c..87e1f56a 100644 --- a/evm/src/cpu/kernel/asm/hash/ripemd/box.asm +++ b/evm/src/cpu/kernel/asm/hash/ripemd/box.asm @@ -72,7 +72,7 @@ post_rol: %macro get_round - // stack: sides, rounds + // stack: sides , rounds %mul_const(5) PUSH 10 SUB diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 52556d3f..e81993b0 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -289,30 +289,30 @@ // given u32 bytestring abcd return dcba %macro reverse_bytes_u32 - // stack: abcd + // stack: abcd DUP1 PUSH 28 BYTE - // stack: a, abcd + // stack: a, abcd DUP2 PUSH 29 BYTE %shl_const(8) - // stack: b0, a, abcd + // stack: b0, a, abcd DUP3 PUSH 30 BYTE %shl_const(16) - // stack: c00, b0, a, abcd + // stack: c00, b0, a, abcd SWAP3 PUSH 31 BYTE %shl_const(24) - // stack: d000, b0, a, c00 + // stack: d000, b0, a, c00 ADD // OR ADD // OR ADD // OR - // stack: dcba + // stack: dcba %endmacro %macro reverse_bytes_u64 diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 843f0a0d..52876c97 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -1,12 +1,14 @@ //! An EVM interpreter for testing and debugging purposes. use std::collections::HashMap; +use std::ops::Range; use anyhow::{anyhow, bail, ensure}; use ethereum_types::{U256, U512}; use keccak_hash::keccak; use plonky2::field::goldilocks_field::GoldilocksField; +use crate::bn254_arithmetic::BN_BASE; use crate::cpu::kernel::aggregator::KERNEL; use crate::cpu::kernel::constants::context_metadata::ContextMetadata; use crate::cpu::kernel::constants::global_metadata::GlobalMetadata; @@ -23,14 +25,6 @@ type F = GoldilocksField; /// Halt interpreter execution whenever a jump to this offset is done. const DEFAULT_HALT_OFFSET: usize = 0xdeadbeef; -/// Order of the BN254 base field. -const BN_BASE: U256 = U256([ - 4332616871279656263, - 10917124144477883021, - 13281191951274694749, - 3486998266802970665, -]); - impl MemoryState { pub(crate) fn mload_general(&self, context: usize, segment: Segment, offset: usize) -> U256 { self.get(MemoryAddress::new(context, segment, offset)) @@ -249,6 +243,18 @@ impl<'a> Interpreter<'a> { .content } + pub fn extract_kernel_memory(self, segment: Segment, range: Range) -> Vec { + let mut output: Vec = vec![]; + for i in range { + let term = self + .generation_state + .memory + .get(MemoryAddress::new(0, segment, i)); + output.push(term); + } + output + } + pub(crate) fn push(&mut self, x: U256) { self.stack_mut().push(x); self.generation_state.registers.stack_len += 1; diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs new file mode 100644 index 00000000..badfb472 --- /dev/null +++ b/evm/src/cpu/kernel/tests/bn254.rs @@ -0,0 +1,319 @@ +use anyhow::Result; +use ethereum_types::U256; +use rand::Rng; + +use crate::bn254_arithmetic::{Fp, Fp12, Fp2, Fp6}; +use crate::bn254_pairing::{ + gen_fp12_sparse, invariant_exponent, miller_loop, tate, Curve, TwistedCurve, +}; +use crate::cpu::kernel::interpreter::{ + run_interpreter_with_memory, Interpreter, InterpreterMemoryInitialization, +}; +use crate::memory::segments::Segment::BnPairing; + +fn extract_stack(interpreter: Interpreter<'static>) -> Vec { + interpreter + .stack() + .iter() + .rev() + .cloned() + .collect::>() +} + +fn setup_mul_fp6_test(f: Fp6, g: Fp6, label: &str) -> InterpreterMemoryInitialization { + let mut stack = f.on_stack(); + if label == "mul_fp254_6" { + stack.extend(g.on_stack()); + } + stack.push(U256::from(0xdeadbeefu32)); + InterpreterMemoryInitialization { + label: label.to_string(), + stack, + segment: BnPairing, + memory: vec![], + } +} + +#[test] +fn test_mul_fp6() -> Result<()> { + let mut rng = rand::thread_rng(); + let f: Fp6 = rng.gen::(); + let g: Fp6 = rng.gen::(); + + let setup_normal: InterpreterMemoryInitialization = setup_mul_fp6_test(f, g, "mul_fp254_6"); + let setup_square: InterpreterMemoryInitialization = setup_mul_fp6_test(f, f, "square_fp254_6"); + + let intrptr_normal: Interpreter = run_interpreter_with_memory(setup_normal).unwrap(); + let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap(); + + let out_normal: Vec = extract_stack(intrptr_normal); + let out_square: Vec = extract_stack(intrptr_square); + + let exp_normal: Vec = (f * g).on_stack(); + let exp_square: Vec = (f * f).on_stack(); + + assert_eq!(out_normal, exp_normal); + assert_eq!(out_square, exp_square); + + Ok(()) +} + +fn setup_mul_fp12_test( + out: usize, + f: Fp12, + g: Fp12, + label: &str, +) -> InterpreterMemoryInitialization { + let in0: usize = 200; + let in1: usize = 212; + + let mut stack = vec![ + U256::from(in0), + U256::from(in1), + U256::from(out), + U256::from(0xdeadbeefu32), + ]; + if label == "square_fp254_12" { + stack.remove(0); + } + InterpreterMemoryInitialization { + label: label.to_string(), + stack, + segment: BnPairing, + memory: vec![(in0, f.on_stack()), (in1, g.on_stack())], + } +} + +#[test] +fn test_mul_fp12() -> Result<()> { + let out: usize = 224; + + let mut rng = rand::thread_rng(); + let f: Fp12 = rng.gen::(); + let g: Fp12 = rng.gen::(); + let h: Fp12 = gen_fp12_sparse(&mut rng); + + let setup_normal: InterpreterMemoryInitialization = + setup_mul_fp12_test(out, f, g, "mul_fp254_12"); + let setup_sparse: InterpreterMemoryInitialization = + setup_mul_fp12_test(out, f, h, "mul_fp254_12_sparse"); + let setup_square: InterpreterMemoryInitialization = + setup_mul_fp12_test(out, f, f, "square_fp254_12"); + + let intrptr_normal: Interpreter = run_interpreter_with_memory(setup_normal).unwrap(); + let intrptr_sparse: Interpreter = run_interpreter_with_memory(setup_sparse).unwrap(); + let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap(); + + let out_normal: Vec = intrptr_normal.extract_kernel_memory(BnPairing, out..out + 12); + let out_sparse: Vec = intrptr_sparse.extract_kernel_memory(BnPairing, out..out + 12); + let out_square: Vec = intrptr_square.extract_kernel_memory(BnPairing, out..out + 12); + + let exp_normal: Vec = (f * g).on_stack(); + let exp_sparse: Vec = (f * h).on_stack(); + let exp_square: Vec = (f * f).on_stack(); + + assert_eq!(out_normal, exp_normal); + assert_eq!(out_sparse, exp_sparse); + assert_eq!(out_square, exp_square); + + Ok(()) +} + +fn setup_frob_fp6_test(f: Fp6, n: usize) -> InterpreterMemoryInitialization { + InterpreterMemoryInitialization { + label: String::from("test_frob_fp254_6_") + &(n.to_string()), + stack: f.on_stack(), + segment: BnPairing, + memory: vec![], + } +} + +#[test] +fn test_frob_fp6() -> Result<()> { + let mut rng = rand::thread_rng(); + let f: Fp6 = rng.gen::(); + for n in 1..4 { + let setup_frob = setup_frob_fp6_test(f, n); + let intrptr_frob: Interpreter = run_interpreter_with_memory(setup_frob).unwrap(); + let out_frob: Vec = extract_stack(intrptr_frob); + let exp_frob: Vec = f.frob(n).on_stack(); + assert_eq!(out_frob, exp_frob); + } + Ok(()) +} + +fn setup_frob_fp12_test(ptr: usize, f: Fp12, n: usize) -> InterpreterMemoryInitialization { + InterpreterMemoryInitialization { + label: String::from("test_frob_fp254_12_") + &(n.to_string()), + stack: vec![U256::from(ptr)], + segment: BnPairing, + memory: vec![(ptr, f.on_stack())], + } +} + +#[test] +fn test_frob_fp12() -> Result<()> { + let ptr: usize = 200; + let mut rng = rand::thread_rng(); + let f: Fp12 = rng.gen::(); + for n in [1, 2, 3, 6] { + let setup_frob = setup_frob_fp12_test(ptr, f, n); + let intrptr_frob: Interpreter = run_interpreter_with_memory(setup_frob).unwrap(); + let out_frob: Vec = intrptr_frob.extract_kernel_memory(BnPairing, ptr..ptr + 12); + let exp_frob: Vec = f.frob(n).on_stack(); + assert_eq!(out_frob, exp_frob); + } + Ok(()) +} + +#[test] +fn test_inv_fp12() -> Result<()> { + let ptr: usize = 200; + let inv: usize = 212; + let mut rng = rand::thread_rng(); + let f: Fp12 = rng.gen::(); + + let setup = InterpreterMemoryInitialization { + label: "inv_fp254_12".to_string(), + stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)], + segment: BnPairing, + memory: vec![(ptr, f.on_stack())], + }; + let interpreter: Interpreter = run_interpreter_with_memory(setup).unwrap(); + let output: Vec = interpreter.extract_kernel_memory(BnPairing, inv..inv + 12); + let expected: Vec = f.inv().on_stack(); + + assert_eq!(output, expected); + + Ok(()) +} + +#[test] +fn test_invariant_exponent() -> Result<()> { + let ptr: usize = 200; + let mut rng = rand::thread_rng(); + let f: Fp12 = rng.gen::(); + + let setup = InterpreterMemoryInitialization { + label: "bn254_invariant_exponent".to_string(), + stack: vec![U256::from(ptr), U256::from(0xdeadbeefu32)], + segment: BnPairing, + memory: vec![(ptr, f.on_stack())], + }; + + let interpreter: Interpreter = run_interpreter_with_memory(setup).unwrap(); + let output: Vec = interpreter.extract_kernel_memory(BnPairing, ptr..ptr + 12); + let expected: Vec = invariant_exponent(f).on_stack(); + + assert_eq!(output, expected); + + Ok(()) +} + +// The curve is cyclic with generator (1, 2) +pub const CURVE_GENERATOR: Curve = { + Curve { + x: Fp { val: U256::one() }, + y: Fp { + val: U256([2, 0, 0, 0]), + }, + } +}; + +// The twisted curve is cyclic with generator (x, y) as follows +pub const TWISTED_GENERATOR: TwistedCurve = { + TwistedCurve { + x: Fp2 { + re: Fp { + val: U256([ + 0x46debd5cd992f6ed, + 0x674322d4f75edadd, + 0x426a00665e5c4479, + 0x1800deef121f1e76, + ]), + }, + im: Fp { + val: U256([ + 0x97e485b7aef312c2, + 0xf1aa493335a9e712, + 0x7260bfb731fb5d25, + 0x198e9393920d483a, + ]), + }, + }, + y: Fp2 { + re: Fp { + val: U256([ + 0x4ce6cc0166fa7daa, + 0xe3d1e7690c43d37b, + 0x4aab71808dcb408f, + 0x12c85ea5db8c6deb, + ]), + }, + im: Fp { + val: U256([ + 0x55acdadcd122975b, + 0xbc4b313370b38ef3, + 0xec9e99ad690c3395, + 0x090689d0585ff075, + ]), + }, + }, + } +}; + +#[test] +fn test_miller() -> Result<()> { + let ptr: usize = 200; + let out: usize = 206; + let inputs: Vec = vec![ + CURVE_GENERATOR.x.val, + CURVE_GENERATOR.y.val, + TWISTED_GENERATOR.x.re.val, + TWISTED_GENERATOR.x.im.val, + TWISTED_GENERATOR.y.re.val, + TWISTED_GENERATOR.y.im.val, + ]; + + let setup = InterpreterMemoryInitialization { + label: "bn254_miller".to_string(), + stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)], + segment: BnPairing, + memory: vec![(ptr, inputs)], + }; + let interpreter = run_interpreter_with_memory(setup).unwrap(); + let output: Vec = interpreter.extract_kernel_memory(BnPairing, out..out + 12); + let expected = miller_loop(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack(); + + assert_eq!(output, expected); + + Ok(()) +} + +#[test] +fn test_tate() -> Result<()> { + let ptr: usize = 200; + let out: usize = 206; + let inputs: Vec = vec![ + CURVE_GENERATOR.x.val, + CURVE_GENERATOR.y.val, + TWISTED_GENERATOR.x.re.val, + TWISTED_GENERATOR.x.im.val, + TWISTED_GENERATOR.y.re.val, + TWISTED_GENERATOR.y.im.val, + ]; + + let setup = InterpreterMemoryInitialization { + label: "bn254_tate".to_string(), + stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)], + segment: BnPairing, + memory: vec![(ptr, inputs)], + }; + let interpreter = run_interpreter_with_memory(setup).unwrap(); + let output: Vec = interpreter.extract_kernel_memory(BnPairing, out..out + 12); + let expected = tate(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack(); + + assert_eq!(output, expected); + + Ok(()) +} diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs deleted file mode 100644 index 83e18dcf..00000000 --- a/evm/src/cpu/kernel/tests/fields.rs +++ /dev/null @@ -1,203 +0,0 @@ -use anyhow::Result; -use ethereum_types::U256; -use rand::{thread_rng, Rng}; - -use crate::cpu::kernel::aggregator::KERNEL; -use crate::cpu::kernel::interpreter::run_interpreter; - -// TODO: 107 is hardcoded as a dummy prime for testing -// should be changed to the proper implementation prime -// once the run_{add, mul, sub}fp254 fns are implemented -const P254: u32 = 107; - -fn add_fp(x: u32, y: u32) -> u32 { - (x + y) % P254 -} - -fn add3_fp(x: u32, y: u32, z: u32) -> u32 { - (x + y + z) % P254 -} - -fn mul_fp(x: u32, y: u32) -> u32 { - (x * y) % P254 -} - -fn sub_fp(x: u32, y: u32) -> u32 { - (P254 + x - y) % P254 -} - -fn add_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] { - let [a, a_] = a; - let [b, b_] = b; - [add_fp(a, b), add_fp(a_, b_)] -} - -fn add3_fp2(a: [u32; 2], b: [u32; 2], c: [u32; 2]) -> [u32; 2] { - let [a, a_] = a; - let [b, b_] = b; - let [c, c_] = c; - [add3_fp(a, b, c), add3_fp(a_, b_, c_)] -} - -// fn sub_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] { -// let [a, a_] = a; -// let [b, b_] = b; -// [sub_fp(a, b), sub_fp(a_, b_)] -// } - -fn mul_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] { - let [a, a_] = a; - let [b, b_] = b; - [ - sub_fp(mul_fp(a, b), mul_fp(a_, b_)), - add_fp(mul_fp(a, b_), mul_fp(a_, b)), - ] -} - -fn i9(a: [u32; 2]) -> [u32; 2] { - let [a, a_] = a; - [sub_fp(mul_fp(9, a), a_), add_fp(a, mul_fp(9, a_))] -} - -// fn add_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] { -// let [c0, c1, c2] = c; -// let [d0, d1, d2] = d; - -// let e0 = add_fp2(c0, d0); -// let e1 = add_fp2(c1, d1); -// let e2 = add_fp2(c2, d2); -// [e0, e1, e2] -// } - -// fn sub_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] { -// let [c0, c1, c2] = c; -// let [d0, d1, d2] = d; - -// let e0 = sub_fp2(c0, d0); -// let e1 = sub_fp2(c1, d1); -// let e2 = sub_fp2(c2, d2); -// [e0, e1, e2] -// } - -fn mul_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] { - let [c0, c1, c2] = c; - let [d0, d1, d2] = d; - - let c0d0 = mul_fp2(c0, d0); - let c0d1 = mul_fp2(c0, d1); - let c0d2 = mul_fp2(c0, d2); - let c1d0 = mul_fp2(c1, d0); - let c1d1 = mul_fp2(c1, d1); - let c1d2 = mul_fp2(c1, d2); - let c2d0 = mul_fp2(c2, d0); - let c2d1 = mul_fp2(c2, d1); - let c2d2 = mul_fp2(c2, d2); - let cd12 = add_fp2(c1d2, c2d1); - - [ - add_fp2(c0d0, i9(cd12)), - add3_fp2(c0d1, c1d0, i9(c2d2)), - add3_fp2(c0d2, c1d1, c2d0), - ] -} - -// fn sh(c: [[u32; 2]; 3]) -> [[u32; 2]; 3] { -// let [c0, c1, c2] = c; -// [i9(c2), c0, c1] -// } - -// fn mul_fp12(f: [[[u32; 2]; 3]; 2], g: [[[u32; 2]; 3]; 2]) -> [[[u32; 2]; 3]; 2] { -// let [f0, f1] = f; -// let [g0, g1] = g; - -// let h0 = mul_fp6(f0, g0); -// let h1 = mul_fp6(f1, g1); -// let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1)); -// [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))] -// } - -fn gen_fp6() -> [[u32; 2]; 3] { - let mut rng = thread_rng(); - [ - [rng.gen_range(0..P254), rng.gen_range(0..P254)], - [rng.gen_range(0..P254), rng.gen_range(0..P254)], - [rng.gen_range(0..P254), rng.gen_range(0..P254)], - ] -} - -fn as_stack(xs: Vec) -> Vec { - xs.iter().map(|&x| U256::from(x)).rev().collect() -} - -#[test] -#[ignore] -fn test_fp6() -> Result<()> { - let c = gen_fp6(); - let d = gen_fp6(); - - let mut input: Vec = [c, d].into_iter().flatten().flatten().collect(); - input.push(0xdeadbeef); - - let initial_offset = KERNEL.global_labels["mul_fp6"]; - let initial_stack: Vec = as_stack(input); - let final_stack: Vec = run_interpreter(initial_offset, initial_stack)? - .stack() - .to_vec(); - - let output: Vec = mul_fp6(c, d).into_iter().flatten().collect(); - let expected = as_stack(output); - - assert_eq!(final_stack, expected); - - Ok(()) -} - -// fn make_initial_stack( -// f0: [[u32; 2]; 3], -// f1: [[u32; 2]; 3], -// g0: [[u32; 2]; 3], -// g1: [[u32; 2]; 3], -// ) -> Vec { -// // stack: in0, f, in0', f', in1, g, in1', g', in1, out, in0, out -// let f0: Vec = f0.into_iter().flatten().collect(); -// let f1: Vec = f1.into_iter().flatten().collect(); -// let g0: Vec = g0.into_iter().flatten().collect(); -// let g1: Vec = g1.into_iter().flatten().collect(); - -// let mut input = f0; -// input.extend(vec![0]); -// input.extend(f1); -// input.extend(g0); -// input.extend(vec![12]); -// input.extend(g1); -// input.extend(vec![12, 24, 0, 24]); - -// as_stack(input) -// } - -// #[test] -// fn test_fp12() -> Result<()> { -// let f0 = gen_fp6(); -// let f1 = gen_fp6(); -// let g0 = gen_fp6(); -// let g1 = gen_fp6(); - -// let kernel = combined_kernel(); -// let initial_offset = kernel.global_labels["test_mul_Fp12"]; -// let initial_stack: Vec = make_initial_stack(f0, f1, g0, g1); -// let final_stack: Vec = run_with_kernel(&kernel, initial_offset, initial_stack)? -// .stack() -// .to_vec(); - -// let mut output: Vec = mul_fp12([f0, f1], [g0, g1]) -// .into_iter() -// .flatten() -// .flatten() -// .collect(); -// output.extend(vec![24]); -// let expected = as_stack(output); - -// assert_eq!(final_stack, expected); - -// Ok(()) -// } diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs index acf90230..aab8298d 100644 --- a/evm/src/cpu/kernel/tests/mod.rs +++ b/evm/src/cpu/kernel/tests/mod.rs @@ -1,9 +1,9 @@ mod account_code; mod balance; +mod bn254; mod core; mod ecc; mod exp; -mod fields; mod hash; mod mpt; mod packing; diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs index 588454fd..1313de83 100644 --- a/evm/src/generation/prover_input.rs +++ b/evm/src/generation/prover_input.rs @@ -1,15 +1,18 @@ +use std::mem::transmute; use std::str::FromStr; use anyhow::{bail, Error}; use ethereum_types::{BigEndianHash, H256, U256}; use plonky2::field::types::Field; +use crate::bn254_arithmetic::Fp12; use crate::generation::prover_input::EvmField::{ Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar, }; use crate::generation::prover_input::FieldOp::{Inverse, Sqrt}; use crate::generation::state::GenerationState; -use crate::witness::util::stack_peek; +use crate::memory::segments::Segment::BnPairing; +use crate::witness::util::{kernel_peek, stack_peek}; /// Prover input function represented as a scoped function name. /// Example: `PROVER_INPUT(ff::bn254_base::inverse)` is represented as `ProverInputFn([ff, bn254_base, inverse])`. @@ -27,6 +30,7 @@ impl GenerationState { match input_fn.0[0].as_str() { "end_of_txns" => self.run_end_of_txns(), "ff" => self.run_ff(input_fn), + "ffe" => self.run_ffe(input_fn), "mpt" => self.run_mpt(), "rlp" => self.run_rlp(), "account_code" => self.run_account_code(input_fn), @@ -52,6 +56,31 @@ impl GenerationState { field.op(op, x) } + /// Finite field extension operations. + fn run_ffe(&self, input_fn: &ProverInputFn) -> U256 { + let field = EvmField::from_str(input_fn.0[1].as_str()).unwrap(); + let n = input_fn.0[2] + .as_str() + .split('_') + .nth(1) + .unwrap() + .parse::() + .unwrap(); + let ptr = stack_peek(self, 11 - n).expect("Empty stack").as_usize(); + + let f: [U256; 12] = match field { + Bn254Base => { + let mut f: [U256; 12] = [U256::zero(); 12]; + for i in 0..12 { + f[i] = kernel_peek(self, BnPairing, ptr + i); + } + f + } + _ => todo!(), + }; + field.field_extension_inverse(n, f) + } + /// MPT data. fn run_mpt(&mut self) -> U256 { self.mpt_prover_inputs @@ -176,6 +205,12 @@ impl EvmField { ); modexp(x, q, n) } + + fn field_extension_inverse(&self, n: usize, f: [U256; 12]) -> U256 { + let f: Fp12 = unsafe { transmute(f) }; + let f_inv: [U256; 12] = unsafe { transmute(f.inv()) }; + f_inv[n] + } } fn modexp(x: U256, e: U256, n: U256) -> U256 { diff --git a/evm/src/lib.rs b/evm/src/lib.rs index 6ca956c4..b6bb6130 100644 --- a/evm/src/lib.rs +++ b/evm/src/lib.rs @@ -8,6 +8,8 @@ pub mod all_stark; pub mod arithmetic; +pub mod bn254_arithmetic; +pub mod bn254_pairing; pub mod config; pub mod constraint_consumer; pub mod cpu; diff --git a/evm/src/memory/segments.rs b/evm/src/memory/segments.rs index 202901e2..ee76cf7a 100644 --- a/evm/src/memory/segments.rs +++ b/evm/src/memory/segments.rs @@ -43,10 +43,11 @@ pub enum Segment { BnWnafA = 19, BnWnafB = 20, BnTableQ = 21, + BnPairing = 22, } impl Segment { - pub(crate) const COUNT: usize = 22; + pub(crate) const COUNT: usize = 23; pub(crate) fn all() -> [Self; Self::COUNT] { [ @@ -72,6 +73,7 @@ impl Segment { Self::BnWnafA, Self::BnWnafB, Self::BnTableQ, + Self::BnPairing, ] } @@ -100,6 +102,7 @@ impl Segment { Segment::BnWnafA => "SEGMENT_KERNEL_BN_WNAF_A", Segment::BnWnafB => "SEGMENT_KERNEL_BN_WNAF_B", Segment::BnTableQ => "SEGMENT_KERNEL_BN_TABLE_Q", + Segment::BnPairing => "SEGMENT_KERNEL_BN_PAIRING", } } @@ -128,6 +131,7 @@ impl Segment { Segment::BnWnafA => 8, Segment::BnWnafB => 8, Segment::BnTableQ => 256, + Segment::BnPairing => 256, } } } diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs index 74970553..a5ebf2ac 100644 --- a/evm/src/witness/util.rs +++ b/evm/src/witness/util.rs @@ -27,7 +27,7 @@ fn to_bits_le(n: u8) -> [F; 8] { res } -/// Peak at the stack item `i`th from the top. If `i=0` this gives the tip. +/// Peek at the stack item `i`th from the top. If `i=0` this gives the tip. pub(crate) fn stack_peek(state: &GenerationState, i: usize) -> Option { if i >= state.registers.stack_len { return None; @@ -39,6 +39,17 @@ pub(crate) fn stack_peek(state: &GenerationState, i: usize) -> Opti ))) } +/// Peek at kernel at specified segment and address +pub(crate) fn kernel_peek( + state: &GenerationState, + segment: Segment, + virt: usize, +) -> U256 { + state + .memory + .get(MemoryAddress::new(state.registers.context, segment, virt)) +} + pub(crate) fn mem_read_with_log( channel: MemoryChannel, address: MemoryAddress,