diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 62021123..32aeb8f6 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -55,15 +55,17 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/final_exponent.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/glv.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/invariant_exponent.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/msm.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/pairing.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/precomputation.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/twisted_curve.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/degree_6_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/degree_12_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/util.asm"),
         include_str!("asm/curve/common.asm"),
         include_str!("asm/curve/secp256k1/curve_add.asm"),
diff --git a/evm/src/cpu/kernel/asm/core/precompiles/snarkv.asm b/evm/src/cpu/kernel/asm/core/precompiles/snarkv.asm
index fbb46670..433186b5 100644
--- a/evm/src/cpu/kernel/asm/core/precompiles/snarkv.asm
+++ b/evm/src/cpu/kernel/asm/core/precompiles/snarkv.asm
@@ -1,3 +1,118 @@
 global precompile_snarkv:
-    // TODO
-    PANIC
+    // stack: address, retdest, new_ctx, (old stack)
+    %pop2
+    // stack: new_ctx, (old stack)
+    DUP1
+    SET_CONTEXT
+    // stack: (empty)
+    PUSH 0x100000000 // = 2^32 (is_kernel = true)
+    // stack: kexit_info
+
+    PUSH 192 %calldatasize DUP2 DUP2
+    // stack: calldata_size, 192, calldata_size, 192, kexit_info
+    MOD %jumpi(fault_exception) // calldata_size should be a multiple of 192
+    DIV
+    // stack: k, kexit_info
+    DUP1 %mul_const(@SNARKV_DYNAMIC_GAS) %add_const(@SNARKV_STATIC_GAS)
+    %stack (gas, k, kexit_info) -> (gas, kexit_info, k)
+    %charge_gas
+    SWAP1
+    // stack: k, kexit_info
+    PUSH 0
+loading_loop:
+    // stack: i, k, kexit_info
+    DUP2 DUP2 EQ %jumpi(loading_done)
+    // stack: i, k, kexit_info
+    DUP1 %mul_const(192)
+    // stack: px, i, k, kexit_info
+    GET_CONTEXT
+    %stack (ctx, px) -> (ctx, @SEGMENT_CALLDATA, px, 32, loading_loop_contd, px)
+    %jump(mload_packing)
+loading_loop_contd:
+    // stack: x, px, i, k, kexit_info
+    SWAP1 %add_const(32)
+    GET_CONTEXT
+    %stack (ctx, py) -> (ctx, @SEGMENT_CALLDATA, py, 32, loading_loop_contd2, py)
+    %jump(mload_packing)
+loading_loop_contd2:
+    // stack: y, py, x, i, k, kexit_info
+    SWAP1 %add_const(32)
+    GET_CONTEXT
+    %stack (ctx, px_im) -> (ctx, @SEGMENT_CALLDATA, px_im, 32, loading_loop_contd3, px_im)
+    %jump(mload_packing)
+loading_loop_contd3:
+    // stack: x_im, px_im, y, x, i, k, kexit_info
+    SWAP1 %add_const(32)
+    // stack: px_re, x_im, y, x, i, k, kexit_info
+    GET_CONTEXT
+    %stack (ctx, px_re) -> (ctx, @SEGMENT_CALLDATA, px_re, 32, loading_loop_contd4, px_re)
+    %jump(mload_packing)
+loading_loop_contd4:
+    // stack: x_re, px_re, x_im, y, x, i, k, kexit_info
+    SWAP1 %add_const(32)
+    // stack: py_im, x_re, x_im, y, x, i, k, kexit_info
+    GET_CONTEXT
+    %stack (ctx, py_im) -> (ctx, @SEGMENT_CALLDATA, py_im, 32, loading_loop_contd5, py_im)
+    %jump(mload_packing)
+loading_loop_contd5:
+    // stack: y_im, py_im, x_re, x_im, y, x, i, k, kexit_info
+    SWAP1 %add_const(32)
+    // stack: py_re, y_im, x_re, x_im, y, x, i, k, kexit_info
+    GET_CONTEXT
+    %stack (ctx, py_re) -> (ctx, @SEGMENT_CALLDATA, py_re, 32, loading_loop_contd6)
+    %jump(mload_packing)
+loading_loop_contd6:
+    // stack: y_re, y_im, x_re, x_im, y, x, i, k, kexit_info
+    SWAP1  // the EVM serializes the imaginary part first
+    // stack: y_im, y_re, x_re, x_im, y, x, i, k, kexit_info
+    DUP7
+    // stack: i, y_im, y_re, x_re, x_im, y, x, i, k, kexit_info
+    %mul_const(6) %add_const(@SNARKV_INP)
+    %add_const(5)
+    %mstore_kernel_bn254_pairing
+    // stack: y_re, x_re, x_im, y, x, i, k, kexit_info
+    DUP6
+    // stack: i, y_re, x_re, x_im, y, x, i, k, kexit_info
+    %mul_const(6) %add_const(@SNARKV_INP)
+    %add_const(4)
+    %mstore_kernel_bn254_pairing
+    SWAP1  // the EVM serializes the imaginary part first
+    // stack: x_im, x_re, y, x, i, k, kexit_info
+    DUP5
+    // stack: i, x_im, x_re, y, x, i, k, kexit_info
+    %mul_const(6) %add_const(@SNARKV_INP)
+    %add_const(3)
+    %mstore_kernel_bn254_pairing
+    // stack: x_re, y, x, i, k, kexit_info
+    DUP4
+    // stack: i, x_re, y, x, i, k, kexit_info
+    %mul_const(6) %add_const(@SNARKV_INP)
+    %add_const(2)
+    %mstore_kernel_bn254_pairing
+    // stack: y, x, i, k, kexit_info
+    DUP3
+    // stack: i, y, x, i, k, kexit_info
+    %mul_const(6) %add_const(@SNARKV_INP)
+    %add_const(1)
+    %mstore_kernel_bn254_pairing
+    // stack: x, i, k, kexit_info
+    DUP2
+    // stack: i, x, i, k, kexit_info
+    %mul_const(6) %add_const(@SNARKV_INP)
+    %mstore_kernel_bn254_pairing
+    // stack: i, k, kexit_info
+    %increment
+    %jump(loading_loop)
+
+loading_done:
+    %stack (i, k) -> (k, @SNARKV_INP, @SNARKV_OUT, got_result)
+    %jump(bn254_pairing)
+got_result:
+    // stack: result, kexit_info
+    DUP1 %eq_const(@U256_MAX) %jumpi(fault_exception)
+    // stack: result, kexit_info
+    // Store the result bool (repr. by a U256) to the parent's return data using `mstore_unpacking`.
+    %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 32)
+    %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT)
+    %stack (parent_ctx, address) -> (parent_ctx, @SEGMENT_RETURNDATA, 0, address, 32, pop_and_return_success)
+    %jump(mstore_unpacking)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
index 499d88d5..a43c4047 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@@ -201,8 +201,8 @@ global bn_double:
     %jump(bn_add_equal_points)
 
 // Check if (x,y) is a valid curve point.
-// Returns (range & curve) || is_identity
-// where
+// Returns (range & curve) || ident
+//   where
 //     range = (x < N) & (y < N) 
 //     curve = y^2 == (x^3 + 3) 
 //     ident = (x,y) == (0,0)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_exponent.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_exponent.asm
new file mode 100644
index 00000000..f8e48807
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_exponent.asm
@@ -0,0 +1,319 @@
+/// To make the Tate pairing an invariant, the final step is to exponentiate by
+///     (p^12 - 1)/N = (p^6 - 1) * (p^2 + 1) * (p^4 - p^2 + 1)/N
+/// and thus we can exponentiate by each factor sequentially.
+///
+/// def bn254_final_exponent(y: Fp12):
+///     y = first_exp(y)
+///     y = second_exp(y)
+///     return final_exp(y)
+
+global bn254_final_exponent:
+
+/// first, exponentiate by (p^6 - 1) via
+///     def first_exp(y):
+///         return y.frob(6) / y
+    // stack:            k, inp, out, retdest  {out: y}
+    %stack (k, inp, out) -> (out, 0, first_exp, out)         
+    // stack: out, 0, first_exp, out, retdest  {out: y}
+    %jump(inv_fp254_12)
+first_exp:
+    // stack:                           out, retdest  {out: y  , 0: y^-1}
+    %frob_fp254_12_6
+    // stack:                           out, retdest  {out: y_6, 0: y^-1}
+    %stack (out) -> (out, 0, out, second_exp, out)
+    // stack:  out, 0, out, second_exp, out, retdest  {out: y_6, 0: y^-1}
+    %jump(mul_fp254_12)
+
+/// second, exponentiate by (p^2 + 1) via 
+///     def second_exp(y):
+///         return y.frob(2) * y
+second_exp:
+    // stack:                              out, retdest  {out: y}
+    %stack (out) -> (out, 0, out, out, final_exp, out)
+    // stack: out, 0, out, out, final_exp, out, retdest  {out: y}
+    %frob_fp254_12_2_
+    // stack:      0, out, out, final_exp, out, retdest  {out: y, 0: y_2}
+    %jump(mul_fp254_12)
+
+/// Finally, we must exponentiate by (p^4 - p^2 + 1)/N
+/// To do so efficiently, we can express this power as
+///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
+/// and simultaneously compute y^a4, y^a2, y^a0 where
+///     a1 = a4 + 2a2 - a0
+/// We first initialize these powers as 1 and then use 
+/// binary algorithms for exponentiation.
+///
+/// def final_exp(y):
+///     y4, y2, y0 = 1, 1, 1
+///     power_loop_4()
+///     power_loop_2()
+///     power_loop_0()
+///     custom_powers()
+///     final_power()
+
+final_exp:
+    // stack:                 val, retdest
+    %stack (val) -> (val, 0, val)
+    // stack:        val, 0, val, retdest
+    %move_fp254_12
+    // stack:             0, val, retdest  {0: sqr}
+    %stack () -> (1, 1, 1)
+    // stack:    1, 1, 1, 0, val, retdest
+    %mstore_kernel_bn254_pairing(12)  
+    %mstore_kernel_bn254_pairing(24)  
+    %mstore_kernel_bn254_pairing(36)
+    // stack:             0, val, retdest  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %stack () -> (64, 62, 65)
+    // stack: 64, 62, 65, 0, val, retdest  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(power_loop_4)
+
+/// After computing the powers 
+///     y^a4, y^a2, y^a0
+/// we would like to transform them to
+///     y^a2, y^-a1, y^-a0
+///
+/// def custom_powers()
+///     y0 = y0^{-1}
+///     y1 = y4 * y2^2 * y0
+///     return y2, y1, y0
+///
+/// And finally, upon doing so, compute the final power
+///     y^(p^3) * (y^a2)^(p^2) * (y^-a1)^p * (y^-a0)
+///
+/// def final_power()
+///     y  = y.frob(3)
+///     y2 = y2.frob(2)
+///     y1 = y1.frob(1)
+///     return y * y2 * y1 * y0
+
+custom_powers:
+    // stack:                           val, retdest  {12: y0, 24: y2, 36: y4}
+    %stack () -> (12, 48, make_term_1)
+    // stack:      12, 48, make_term_1, val, retdest  {12: y0, 24: y2, 36: y4}
+    %jump(inv_fp254_12)
+make_term_1:
+    // stack:                           val, retdest  {24: y2, 36: y4, 48: y0^-1}
+    %stack () -> (24, 36, 36, make_term_2)
+    // stack:  24, 36, 36, make_term_2, val, retdest  {24: y2, 36: y4, 48: y0^-1}
+    %jump(mul_fp254_12)
+make_term_2:
+    // stack:                           val, retdest  {24: y2, 36: y4 * y2, 48: y0^-1}
+    %stack () -> (24, 36, 36, make_term_3)
+    // stack:  24, 36, 36, make_term_3, val, retdest  {24: y2, 36: y4 * y2, 48: y0^-1}
+    %jump(mul_fp254_12)
+make_term_3:
+    // stack:                           val, retdest  {24: y2, 36: y4 * y2^2, 48: y0^-1}
+    %stack () -> (48, 36, 36, final_power)
+    // stack:  48, 36, 36, final_power, val, retdest  {24: y2, 36: y4 * y2^2, 48: y0^-1}
+    %jump(mul_fp254_12)
+final_power:
+    // stack:                           val, retdest  {val: y  , 24:  y^a2   , 36:  y^a1   , 48: y^a0}
+    %frob_fp254_12_3
+    // stack:                           val, retdest  {val: y_3, 24:  y^a2   , 36:  y^a1   , 48: y^a0}
+    %stack () -> (24, 24)
+    %frob_fp254_12_2_
+    POP
+    // stack:                           val, retdest  {val: y_3, 24: (y^a2)_2, 36:  y^a1   , 48: y^a0}
+    PUSH 36
+    %frob_fp254_12_1
+    POP
+    // stack:                           val, retdest  {val: y_3, 24: (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
+    %stack (val) -> (24, val, val, penult_mul, val)
+    // stack: 24, val, val, penult_mul, val, retdest  {val: y_3, 24: (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
+    %jump(mul_fp254_12)
+penult_mul:
+    // stack:                           val, retdest  {val: y_3 * (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
+    %stack (val) -> (36, val, val, final_mul, val)
+    // stack:  36, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
+    %jump(mul_fp254_12)
+final_mul: 
+    // stack:                           val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 48: y^a0}
+    %stack (val) -> (48, val, val)
+    // stack:                  48, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 48: y^a0}
+    %jump(mul_fp254_12)
+
+
+/// def power_loop_4():
+///     for i in range(64):
+///         abc = load(i, power_data_4)
+///         if a:
+///             y4 *= acc
+///         if b:
+///             y2 *= acc
+///         if c:
+///             y0 *= acc
+///         acc = square_fp254_12(acc)
+///     y4 *= acc
+///
+/// def power_loop_2():
+///     for i in range(62):
+///        ab = load(i, power_data_2)
+///        if a:
+///            y2 *= acc
+///        if b:
+///            y0 *= acc
+///        acc = square_fp254_12(acc)
+///     y2 *= acc
+///
+/// def power_loop_0():
+///     for i in range(65):
+///         a = load(i, power_data_0)
+///         if a:
+///             y0 *= acc
+///         acc = square_fp254_12(acc)
+///     y0 *= acc
+
+power_loop_4:
+    // stack:                                   i  , j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP1  
+    ISZERO
+    // stack:                           break?, i  , j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jumpi(power_loop_4_end)
+    // stack:                                   i  , j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %sub_const(1)
+    // stack:                                   i-1, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP1  
+    %mload_kernel_code(power_data_4)
+    // stack:                              abc, i-1, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP1  
+    %lt_const(100)
+    // stack:                       skip?, abc, i-1, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jumpi(power_loop_4_b)
+    // stack:                              abc, i-1, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %sub_const(100)
+    // stack:                               bc, i-1, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %stack () -> (36, 36, power_loop_4_b)
+    // stack:      36, 36, power_loop_4_b,  bc, i-1, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP8
+    // stack: sqr, 36, 36, power_loop_4_b,  bc, i-1, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(mul_fp254_12)
+power_loop_4_b:
+    // stack:                             bc, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP1  
+    %lt_const(10)
+    // stack:                      skip?, bc, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jumpi(power_loop_4_c)
+    // stack:                             bc, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %sub_const(10)
+    // stack:                              c, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %stack () -> (24, 24, power_loop_4_c)
+    // stack:      24, 24, power_loop_4_c, c, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP8
+    // stack: sqr, 24, 24, power_loop_4_c, c, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(mul_fp254_12)
+power_loop_4_c:
+    // stack:                            c, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    ISZERO
+    // stack:                        skip?, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jumpi(power_loop_4_sq)
+    // stack:                               i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %stack () -> (12, 12, power_loop_4_sq)
+    // stack:      12, 12, power_loop_4_sq, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP7
+    // stack: sqr, 12, 12, power_loop_4_sq, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(mul_fp254_12)
+power_loop_4_sq:
+    // stack:                         i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    PUSH power_loop_4  
+    // stack:           power_loop_4, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP5  
+    DUP1
+    // stack: sqr, sqr, power_loop_4, i, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(square_fp254_12)
+power_loop_4_end:
+    // stack:                         0, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    POP  
+    // stack:                            j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %stack () -> (36, 36, power_loop_2) 
+    // stack:      36, 36, power_loop_2, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP6
+    // stack: sqr, 36, 36, power_loop_2, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(mul_fp254_12)
+
+power_loop_2:
+    // stack:                                   j  , k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP1  
+    ISZERO
+    // stack:                         break?, j  , k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jumpi(power_loop_2_end)
+    // stack:                                 j  , k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %sub_const(1)
+    // stack:                                 j-1, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP1  
+    %mload_kernel_code(power_data_2)
+    // stack:                             ab, j-1, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP1  
+    %lt_const(10)
+    // stack:                      skip?, ab, j-1, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jumpi(power_loop_2_b)
+    // stack:                             ab, j-1, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %sub_const(10)
+    // stack:                              b, j-1, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %stack () -> (24, 24, power_loop_2_b) 
+    // stack:      24, 24, power_loop_2_b, b, j-1, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP7
+    // stack: sqr, 24, 24, power_loop_2_b, b, j-1, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(mul_fp254_12)
+power_loop_2_b:
+    // stack:                            b, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    ISZERO
+    // stack:                        skip?, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jumpi(power_loop_2_sq)
+    // stack:                               j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %stack () -> (12, 12, power_loop_2_sq) 
+    // stack:      12, 12, power_loop_2_sq, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP6
+    // stack: sqr, 12, 12, power_loop_2_sq, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(mul_fp254_12)
+power_loop_2_sq:
+    // stack:                         j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    PUSH power_loop_2  
+    // stack:           power_loop_2, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP4  
+    DUP1
+    // stack: sqr, sqr, power_loop_2, j, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(square_fp254_12)
+power_loop_2_end:
+    // stack:                         0, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    POP  
+    // stack:                            k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %stack () -> (24, 24, power_loop_0)
+    // stack:      24, 24, power_loop_0, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP5
+    // stack: sqr, 24, 24, power_loop_0, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(mul_fp254_12)
+
+power_loop_0:
+    // stack:                               k  , sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP1  
+    ISZERO
+    // stack:                       break?, k  , sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jumpi(power_loop_0_end)
+    // stack:                               k  , sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %sub_const(1)
+    // stack:                               k-1, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP1  
+    %mload_kernel_code(power_data_0)
+    // stack:                            a, k-1, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    ISZERO
+    // stack:                        skip?, k-1, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jumpi(power_loop_0_sq)
+    // stack:                               k-1, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %stack () -> (12, 12, power_loop_0_sq)  
+    // stack:      12, 12, power_loop_0_sq, k-1, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP5
+    // stack: sqr, 12, 12, power_loop_0_sq, k-1, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(mul_fp254_12)
+power_loop_0_sq:
+    // stack:                         k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    PUSH power_loop_0  
+    // stack:           power_loop_0, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    DUP3  
+    DUP1
+    // stack: sqr, sqr, power_loop_0, k, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(square_fp254_12)
+power_loop_0_end:
+    // stack:                       0, sqr  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %stack (i, sqr) -> (12, sqr, 12, custom_powers)
+    // stack:   12, sqr, 12, custom_powers  {0: sqr, 12: y0, 24: y2, 36: y4}
+    %jump(mul_fp254_12)    
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
deleted file mode 100644
index 2fcd5d2b..00000000
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
+++ /dev/null
@@ -1,319 +0,0 @@
-/// To make the Tate pairing an invariant, the final step is to exponentiate by
-///     (p^12 - 1)/N = (p^6 - 1) * (p^2 + 1) * (p^4 - p^2 + 1)/N
-/// and thus we can exponentiate by each factor sequentially.
-///
-/// def bn254_invariant_exponent(y: Fp12):
-///     y = first_exp(y)
-///     y = second_exp(y)
-///     return final_exp(y)
-
-global bn254_invariant_exponent:
-
-/// first, exponentiate by (p^6 - 1) via
-///     def first_exp(y):
-///         return y.frob(6) / y
-    // stack:                    out, retdest  {out: y}
-    %stack (out) -> (out, 0, first_exp, out)         
-    // stack: out, 0, first_exp, out, retdest  {out: y}
-    %jump(inv_fp254_12)
-first_exp:
-    // stack:                           out, retdest  {out: y  , 0: y^-1}
-    %frob_fp254_12_6
-    // stack:                           out, retdest  {out: y_6, 0: y^-1}
-    %stack (out) -> (out, 0, out, second_exp, out)
-    // stack:  out, 0, out, second_exp, out, retdest  {out: y_6, 0: y^-1}
-    %jump(mul_fp254_12)
-
-/// second, exponentiate by (p^2 + 1) via 
-///     def second_exp(y):
-///         return y.frob(2) * y
-second_exp:
-    // stack:                              out, retdest  {out: y}
-    %stack (out) -> (out, 0, out, out, final_exp, out)
-    // stack: out, 0, out, out, final_exp, out, retdest  {out: y}
-    %frob_fp254_12_2_
-    // stack:      0, out, out, final_exp, out, retdest  {out: y, 0: y_2}
-    %jump(mul_fp254_12)
-
-/// Finally, we must exponentiate by (p^4 - p^2 + 1)/N
-/// To do so efficiently, we can express this power as
-///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
-/// and simultaneously compute y^a4, y^a2, y^a0 where
-///     a1 = a4 + 2a2 - a0
-/// We first initialize these powers as 1 and then use 
-/// binary algorithms for exponentiation.
-///
-/// def final_exp(y):
-///     y4, y2, y0 = 1, 1, 1
-///     power_loop_4()
-///     power_loop_2()
-///     power_loop_0()
-///     custom_powers()
-///     final_power()
-
-final_exp:
-    // stack:                 val, retdest
-    %stack (val) -> (val, 12, val)
-    // stack:        val, 12, val, retdest
-    %move_fp254_12
-    // stack:             12, val, retdest  {12: sqr}
-    %stack () -> (1, 1, 1)
-    // stack:    1, 1, 1, 12, val, retdest
-    %mstore_kernel_bn254_pairing(24)  
-    %mstore_kernel_bn254_pairing(36)  
-    %mstore_kernel_bn254_pairing(48)
-    // stack:             12, val, retdest  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %stack () -> (64, 62, 65)
-    // stack: 64, 62, 65, 12, val, retdest  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(power_loop_4)
-
-/// After computing the powers 
-///     y^a4, y^a2, y^a0
-/// we would like to transform them to
-///     y^a2, y^-a1, y^-a0
-///
-/// def custom_powers()
-///     y0 = y0^{-1}
-///     y1 = y4 * y2^2 * y0
-///     return y2, y1, y0
-///
-/// And finally, upon doing so, compute the final power
-///     y^(p^3) * (y^a2)^(p^2) * (y^-a1)^p * (y^-a0)
-///
-/// def final_power()
-///     y  = y.frob(3)
-///     y2 = y2.frob(2)
-///     y1 = y1.frob(1)
-///     return y * y2 * y1 * y0
-
-custom_powers:
-    // stack:                           val, retdest  {24: y0, 36: y2, 48: y4}
-    %stack () -> (24, 60, make_term_1)
-    // stack:      24, 60, make_term_1, val, retdest  {24: y0, 36: y2, 48: y4}
-    %jump(inv_fp254_12)
-make_term_1:
-    // stack:                           val, retdest  {36: y2, 48: y4, 60: y0^-1}
-    %stack () -> (36, 48, 48, make_term_2)
-    // stack:  36, 48, 48, make_term_2, val, retdest  {36: y2, 48: y4, 60: y0^-1}
-    %jump(mul_fp254_12)
-make_term_2:
-    // stack:                           val, retdest  {36: y2, 48: y4 * y2, 60: y0^-1}
-    %stack () -> (36, 48, 48, make_term_3)
-    // stack:  36, 48, 48, make_term_3, val, retdest  {36: y2, 48: y4 * y2, 60: y0^-1}
-    %jump(mul_fp254_12)
-make_term_3:
-    // stack:                           val, retdest  {36: y2, 48: y4 * y2^2, 60: y0^-1}
-    %stack () -> (60, 48, 48, final_power)
-    // stack:  60, 48, 48, final_power, val, retdest  {36: y2, 48: y4 * y2^2, 60: y0^-1}
-    %jump(mul_fp254_12)
-final_power:
-    // stack:                           val, retdest  {val: y  , 36:  y^a2   , 48:  y^a1   , 60: y^a0}
-    %frob_fp254_12_3
-    // stack:                           val, retdest  {val: y_3, 36:  y^a2   , 48:  y^a1   , 60: y^a0}
-    %stack () -> (36, 36)
-    %frob_fp254_12_2_
-    POP
-    // stack:                           val, retdest  {val: y_3, 36: (y^a2)_2, 48:  y^a1   , 60: y^a0}
-    PUSH 48
-    %frob_fp254_12_1
-    POP
-    // stack:                           val, retdest  {val: y_3, 36: (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
-    %stack (val) -> (36, val, val, penult_mul, val)
-    // stack: 36, val, val, penult_mul, val, retdest  {val: y_3, 36: (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
-    %jump(mul_fp254_12)
-penult_mul:
-    // stack:                           val, retdest  {val: y_3 * (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
-    %stack (val) -> (48, val, val, final_mul, val)
-    // stack:  48, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
-    %jump(mul_fp254_12)
-final_mul: 
-    // stack:                           val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 60: y^a0}
-    %stack (val) -> (60, val, val)
-    // stack:                  60, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 60: y^a0}
-    %jump(mul_fp254_12)
-
-
-/// def power_loop_4():
-///     for i in range(64):
-///         abc = load(i, power_data_4)
-///         if a:
-///             y4 *= acc
-///         if b:
-///             y2 *= acc
-///         if c:
-///             y0 *= acc
-///         acc = square_fp254_12(acc)
-///     y4 *= acc
-///
-/// def power_loop_2():
-///     for i in range(62):
-///        ab = load(i, power_data_2)
-///        if a:
-///            y2 *= acc
-///        if b:
-///            y0 *= acc
-///        acc = square_fp254_12(acc)
-///     y2 *= acc
-///
-/// def power_loop_0():
-///     for i in range(65):
-///         a = load(i, power_data_0)
-///         if a:
-///             y0 *= acc
-///         acc = square_fp254_12(acc)
-///     y0 *= acc
-
-power_loop_4:
-    // stack:                                   i  , j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP1  
-    ISZERO
-    // stack:                           break?, i  , j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jumpi(power_loop_4_end)
-    // stack:                                   i  , j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %sub_const(1)
-    // stack:                                   i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP1  
-    %mload_kernel_code(power_data_4)
-    // stack:                              abc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP1  
-    %lt_const(100)
-    // stack:                       skip?, abc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jumpi(power_loop_4_b)
-    // stack:                              abc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %sub_const(100)
-    // stack:                               bc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %stack () -> (48, 48, power_loop_4_b)
-    // stack:      48, 48, power_loop_4_b,  bc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP8
-    // stack: sqr, 48, 48, power_loop_4_b,  bc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(mul_fp254_12)
-power_loop_4_b:
-    // stack:                             bc, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP1  
-    %lt_const(10)
-    // stack:                      skip?, bc, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jumpi(power_loop_4_c)
-    // stack:                             bc, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %sub_const(10)
-    // stack:                              c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %stack () -> (36, 36, power_loop_4_c)
-    // stack:      36, 36, power_loop_4_c, c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP8
-    // stack: sqr, 36, 36, power_loop_4_c, c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(mul_fp254_12)
-power_loop_4_c:
-    // stack:                            c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    ISZERO
-    // stack:                        skip?, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jumpi(power_loop_4_sq)
-    // stack:                               i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %stack () -> (24, 24, power_loop_4_sq)
-    // stack:      24, 24, power_loop_4_sq, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP7
-    // stack: sqr, 24, 24, power_loop_4_sq, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(mul_fp254_12)
-power_loop_4_sq:
-    // stack:                         i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    PUSH power_loop_4  
-    // stack:           power_loop_4, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP5  
-    DUP1
-    // stack: sqr, sqr, power_loop_4, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(square_fp254_12)
-power_loop_4_end:
-    // stack:                         0, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    POP  
-    // stack:                            j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %stack () -> (48, 48, power_loop_2) 
-    // stack:      48, 48, power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP6
-    // stack: sqr, 48, 48, power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(mul_fp254_12)
-
-power_loop_2:
-    // stack:                                   j  , k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP1  
-    ISZERO
-    // stack:                         break?, j  , k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jumpi(power_loop_2_end)
-    // stack:                                 j  , k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %sub_const(1)
-    // stack:                                 j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP1  
-    %mload_kernel_code(power_data_2)
-    // stack:                             ab, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP1  
-    %lt_const(10)
-    // stack:                      skip?, ab, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jumpi(power_loop_2_b)
-    // stack:                             ab, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %sub_const(10)
-    // stack:                              b, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %stack () -> (36, 36, power_loop_2_b) 
-    // stack:      36, 36, power_loop_2_b, b, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP7
-    // stack: sqr, 36, 36, power_loop_2_b, b, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(mul_fp254_12)
-power_loop_2_b:
-    // stack:                            b, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    ISZERO
-    // stack:                        skip?, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jumpi(power_loop_2_sq)
-    // stack:                               j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %stack () -> (24, 24, power_loop_2_sq) 
-    // stack:      24, 24, power_loop_2_sq, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP6
-    // stack: sqr, 24, 24, power_loop_2_sq, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(mul_fp254_12)
-power_loop_2_sq:
-    // stack:                         j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    PUSH power_loop_2  
-    // stack:           power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP4  
-    DUP1
-    // stack: sqr, sqr, power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(square_fp254_12)
-power_loop_2_end:
-    // stack:                         0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    POP  
-    // stack:                            k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %stack () -> (36, 36, power_loop_0)
-    // stack:      36, 36, power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP5
-    // stack: sqr, 36, 36, power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(mul_fp254_12)
-
-power_loop_0:
-    // stack:                               k  , sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP1  
-    ISZERO
-    // stack:                       break?, k  , sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jumpi(power_loop_0_end)
-    // stack:                               k  , sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %sub_const(1)
-    // stack:                               k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP1  
-    %mload_kernel_code(power_data_0)
-    // stack:                            a, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    ISZERO
-    // stack:                        skip?, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jumpi(power_loop_0_sq)
-    // stack:                               k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %stack () -> (24, 24, power_loop_0_sq)  
-    // stack:      24, 24, power_loop_0_sq, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP5
-    // stack: sqr, 24, 24, power_loop_0_sq, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(mul_fp254_12)
-power_loop_0_sq:
-    // stack:                         k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    PUSH power_loop_0  
-    // stack:           power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    DUP3  
-    DUP1
-    // stack: sqr, sqr, power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(square_fp254_12)
-power_loop_0_end:
-    // stack:                       0, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %stack (i, sqr) -> (24, sqr, 24, custom_powers)
-    // stack:   24, sqr, 24, custom_powers  {12: sqr, 24: y0, 36: y2, 48: y4}
-    %jump(mul_fp254_12)    
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
similarity index 77%
rename from evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index f09684bd..120365af 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -1,12 +1,3 @@
-/// def tate(P: Curve, Q: TwistedCurve) -> Fp12:
-///     out = miller_loop(P, Q)
-///     return bn254_invariant_exponent(P, Q)
-global bn254_tate:
-    // stack:                                inp, out, retdest
-    %stack (inp, out) -> (inp, out, bn254_invariant_exponent, out)
-    // stack: inp, out, bn254_invariant_exponent, out, retdest
-    %jump(bn254_miller)
-
 /// def miller(P, Q):
 ///     miller_init()
 ///     miller_loop()
@@ -35,13 +26,13 @@ global bn254_tate:
 ///     mul_tangent()
 
 global bn254_miller:
-    // stack:         ptr, out, retdest
-    %stack (ptr, out) -> (out, 1, ptr, out)
-    // stack: out, 1, ptr, out, retdest
-    %mstore_kernel_bn254_pairing
-    // stack:         ptr, out, retdest
+    // stack:            ptr, out, retdest
+    %stack (ptr, out) -> (out, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ptr, out)
+    // stack: out, unit, ptr, out, retdest
+    %store_fp254_12
+    // stack:            ptr, out, retdest
     %load_fp254_6
-    // stack:        P, Q, out, retdest
+    // stack:           P, Q, out, retdest
     %stack (P: 2) -> (0, 53, P, P)
     // stack: 0, 53, O, P, Q, out, retdest
     // the head 0 lets miller_loop start with POP
@@ -64,6 +55,7 @@ miller_return:
     // stack: times, O, P, Q, out, retdest
     %stack (times, O: 2, P: 2, Q: 4, out, retdest) -> (retdest)
     // stack:                      retdest
+    %clear_line
     JUMP 
 
 miller_one:
@@ -109,35 +101,35 @@ mul_tangent:
     // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %jump(square_fp254_12)
 mul_tangent_1:
-    // stack:         out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    // stack:          out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     DUP13
     DUP13
     DUP13
     DUP13
-    // stack:      Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    // stack:       Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     DUP11  
     DUP11
-    // stack:   O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    // stack:    O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %tangent
-    // stack:         out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {0: line}
-    %stack (out) -> (out, 0, out)
-    // stack: out, 0, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {0: line}
+    // stack:          out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {12: line}
+    %stack (out) -> (out, 12, out)
+    // stack: out, 12, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {12: line}
     %jump(mul_fp254_12_sparse)
 mul_tangent_2:
-    // stack:                  retdest, 0xnm, times,   O, P, Q, out  {0: line}
+    // stack:                  retdest, 0xnm, times,   O, P, Q, out  {12: line}
     PUSH after_double
-    // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {0: line}
+    // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {12: line}
     DUP6  
     DUP6
-    // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {0: line}
+    // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {12: line}
     %jump(bn_double)
 after_double:
-    // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {0: line}
+    // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {12: line}
     SWAP5
     POP
     SWAP5
     POP
-    // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {0: line}
+    // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {12: line}
     JUMP
 
 /// def mul_cord()
@@ -146,26 +138,26 @@ after_double:
 ///     O += P
 
 mul_cord:
-    // stack:                          0xnm, times, O, P, Q, out
+    // stack:                           0xnm, times, O, P, Q, out
     PUSH mul_cord_1
-    // stack:              mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack:               mul_cord_1, 0xnm, times, O, P, Q, out
     DUP11  
     DUP11  
     DUP11  
     DUP11
-    // stack:           Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack:            Q, mul_cord_1, 0xnm, times, O, P, Q, out
     DUP9  
     DUP9
-    // stack:        O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack:         O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
     DUP13  
     DUP13
-    // stack:     P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack:      P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
     %cord 
-    // stack:              mul_cord_1, 0xnm, times, O, P, Q, out  {0: line}
+    // stack:               mul_cord_1, 0xnm, times, O, P, Q, out  {12: line}
     DUP12
-    // stack:         out, mul_cord_1, 0xnm, times, O, P, Q, out  {0: line}
-    %stack (out) -> (out, 0, out)
-    // stack: out, 0, out, mul_cord_1, 0xnm, times, O, P, Q, out  {0: line}
+    // stack:          out, mul_cord_1, 0xnm, times, O, P, Q, out  {12: line}
+    %stack (out) -> (out, 12, out)
+    // stack: out, 12, out, mul_cord_1, 0xnm, times, O, P, Q, out  {12: line}
     %jump(mul_fp254_12_sparse)
 mul_cord_1:
     // stack:                   0xnm, times, O  , P, Q, out
@@ -202,7 +194,7 @@ after_add:
     // stack:      py^2 , 9, px, py, qx, qx_,  qy, qy_
     SUBFP254
     // stack:      py^2 - 9, px, py, qx, qx_,  qy, qy_
-    %mstore_kernel_bn254_pairing(0)
+    %mstore_kernel_bn254_pairing(12)
     // stack:                px, py, qx, qx_,  qy, qy_
     DUP1  
     MULFP254
@@ -218,7 +210,7 @@ after_add:
     DUP3  
     MULFP254
     // stack:   (-3*px^2)qx, py, -3px^2, qx_,  qy, qy_ 
-    %mstore_kernel_bn254_pairing(2)
+    %mstore_kernel_bn254_pairing(14)
     // stack:                py, -3px^2, qx_,  qy, qy_ 
     PUSH 2  
     MULFP254
@@ -228,15 +220,15 @@ after_add:
     DUP4  
     MULFP254
     // stack:           (2py)qy, -3px^2, qx_, 2py, qy_ 
-    %mstore_kernel_bn254_pairing(8)
+    %mstore_kernel_bn254_pairing(20)
     // stack:                    -3px^2, qx_, 2py, qy_ 
     MULFP254
     // stack:                   (-3px^2)*qx_, 2py, qy_ 
-    %mstore_kernel_bn254_pairing(3)
+    %mstore_kernel_bn254_pairing(15)
     // stack:                                 2py, qy_ 
     MULFP254
     // stack:                                (2py)*qy_ 
-    %mstore_kernel_bn254_pairing(9)
+    %mstore_kernel_bn254_pairing(21)
 %endmacro
 
 /// def cord(p1x, p1y, p2x, p2y, qx, qy):
@@ -258,7 +250,7 @@ after_add:
     // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     SUBFP254
     // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    %mstore_kernel_bn254_pairing(0)
+    %mstore_kernel_bn254_pairing(12)
     // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     SWAP3
     // stack:                    p2y , p1y, p2x , p1x, qx, qx_, qy, qy_
@@ -273,20 +265,29 @@ after_add:
     DUP5
     MULFP254
     // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
-    %mstore_kernel_bn254_pairing(8)
+    %mstore_kernel_bn254_pairing(20)
     // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
     SWAP1
     // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
     DUP2
     MULFP254
     // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
-    %mstore_kernel_bn254_pairing(2)
+    %mstore_kernel_bn254_pairing(14)
     // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
     MULFP254
     // stack:                            (p2y - p1y)qx_, p1x - p2x, qy_
-    %mstore_kernel_bn254_pairing(3)
+    %mstore_kernel_bn254_pairing(15)
     // stack:                                            p1x - p2x, qy_
     MULFP254
     // stack:                                           (p1x - p2x)*qy_
-    %mstore_kernel_bn254_pairing(9)
+    %mstore_kernel_bn254_pairing(21)
+%endmacro
+
+%macro clear_line
+    %stack () -> (0, 0, 0, 0, 0)
+    %mstore_kernel_bn254_pairing(12)
+    %mstore_kernel_bn254_pairing(14)
+    %mstore_kernel_bn254_pairing(15)
+    %mstore_kernel_bn254_pairing(20)
+    %mstore_kernel_bn254_pairing(21)
 %endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/pairing.asm
new file mode 100644
index 00000000..4479b965
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/pairing.asm
@@ -0,0 +1,193 @@
+/// The input to the pairing script is a list of points
+///     P_i = n_i*G: Curve, Q_i = m_i*H: TwistedCurve
+/// where G, H are the respective generators, such that
+///     sum_i n_i*m_i = 0
+/// and therefore, due to bilinearity of the pairing:
+///     prod_i e(P_i, Q_i) 
+///   = prod_i e(n_i G, m_i H) 
+///   = prod_i e(G,H)^{n_i * m_i} 
+///   = e(G,H)^{sum_i n_i * m_i}
+///   = e(G,H)^0
+///   = 1: Fp12 
+
+/// def bn254_pairing(pairs: List((Curve, TwistedCurve))) -> Bool:
+///     
+///     for P, Q in pairs:
+///         if not (P.is_valid and Q.is_valid):
+///             return @U256_MAX
+///     
+///     out = 1
+///     for P, Q in pairs:
+///         if P != 0 and Q != 0:
+///             out *= miller_loop(P, Q)
+///
+///     result = bn254_final_exponent(out)
+///     return result == unit_fp12
+
+/// The following is a key to this API
+/// 
+/// - k is the number of inputs
+/// - each input given by a pair of points, one on the curve and one on the twisted curve
+/// - each input consists of 6 stack terms---2 for the curve point and 4 for the twisted curve point
+/// - the inputs are presumed to be placed on the kernel contiguously
+/// - the output (as defined above) is an Fp12 element
+/// - out and inp are the BnPairing segment offsets for the output element and input
+/// - the assembly code currently uses offsets 0-78 for scratch space
+
+global bn254_pairing:
+    // stack: k, inp, out, retdest 
+    DUP1
+
+bn254_input_check:
+    // stack:       j    , k, inp 
+    DUP1
+    ISZERO
+    // stack: end?, j    , k, inp
+    %jumpi(bn254_pairing_start)
+    // stack:       j    , k, inp
+    %sub_const(1)
+    // stack:       j=j-1, k, inp
+
+    %stack (j, k, inp) -> (j, inp, j, k, inp)
+    // stack:        j, inp, j, k, inp
+    %mul_const(6)
+    ADD
+    // stack:  inp_j=inp+6j, j, k, inp
+    DUP1
+    // stack:  inp_j, inp_j, j, k, inp
+    %load_fp254_2
+    // stack:    P_j, inp_j, j, k, inp
+    %bn_check
+    // stack: valid?, inp_j, j, k, inp
+    ISZERO
+    %jumpi(bn_pairing_invalid_input)
+    // stack:         inp_j, j, k, inp
+    DUP1
+    // stack: inp_j , inp_j, j, k, inp
+    %add_const(2)
+    // stack: inp_j', inp_j, j, k, inp
+    %load_fp254_4
+    // stack:    Q_j, inp_j, j, k, inp
+    %bn_check_twisted
+    // stack: valid?, inp_j, j, k, inp
+    ISZERO
+    %jumpi(bn_pairing_invalid_input)
+    // stack:         inp_j, j, k, inp
+    POP
+    %jump(bn254_input_check)
+
+bn_pairing_invalid_input:
+    // stack:  inp_j, j, k, inp, out, retdest
+    %stack (inp_j, j, k, inp, out, retdest) -> (retdest, inp_j)
+    JUMP
+
+bn254_pairing_start:
+    // stack:      0, k, inp, out,                   retdest
+    %stack (j, k, inp, out) -> (out, 1, k, inp, out, bn254_pairing_output_validation, out)
+    // stack: out, 1, k, inp, out, bn254_pairing_output_validation, out, retdest
+    %mstore_kernel_bn254_pairing
+    // stack:         k, inp, out, bn254_pairing_output_validation, out, retdest
+
+bn254_pairing_loop:
+    // stack:               k, inp, out, bn254_pairing_output_validation, out, retdest
+    DUP1
+    ISZERO
+    // stack:         end?, k, inp, out, bn254_pairing_output_validation, out, retdest
+    %jumpi(bn254_final_exponent)
+    // stack:               k, inp, out, bn254_pairing_output_validation, out, retdest
+    %sub_const(1)
+    // stack:           k=k-1, inp, out, bn254_pairing_output_validation, out, retdest
+    %stack (k, inp) -> (k, inp, k, inp)
+    // stack:       k, inp, k, inp, out, bn254_pairing_output_validation, out, retdest
+    %mul_const(6)
+    ADD
+    // stack:        inp_k, k, inp, out, bn254_pairing_output_validation, out, retdest
+    DUP1
+    %load_fp254_6
+    // stack:  P, Q, inp_k, k, inp, out, bn254_pairing_output_validation, out, retdest
+    %neutral_input
+    // stack: skip?, inp_k, k, inp, out, bn254_pairing_output_validation, out, retdest
+    %jumpi(bn_skip_input)
+    // stack:        inp_k, k, inp, out, bn254_pairing_output_validation, out, retdest
+    %stack (inp_k, k, inp, out) -> (bn254_miller, inp_k, 0, mul_fp254_12, 0, out, out, bn254_pairing_loop, k, inp, out)
+    // stack: bn254_miller,                       inp_k, 0, 
+    //        mul_fp254_12,                    0, out, out, 
+    //        bn254_pairing_loop,              k, inp, out, 
+    //        bn254_pairing_output_validation, out, retdest
+    JUMP
+
+bn_skip_input:
+    // stack: inp_k, k, inp, out, bn254_pairing_output_validation, out, retdest
+    POP
+    // stack:        k, inp, out, bn254_pairing_output_validation, out, retdest
+    %jump(bn254_pairing_loop)
+
+
+bn254_pairing_output_validation:
+    // stack:        out, retdest
+    PUSH 1
+    // stack: check, out, retdest
+    %check_output_term
+    %check_output_term(1)
+    %check_output_term(2)
+    %check_output_term(3)
+    %check_output_term(4)
+    %check_output_term(5)
+    %check_output_term(6)
+    %check_output_term(7)
+    %check_output_term(8)
+    %check_output_term(9)
+    %check_output_term(10)
+    %check_output_term(11)
+    // stack: check, out, retdest
+    %stack (check, out, retdest) -> (retdest, check)
+    JUMP
+
+%macro check_output_term
+    // stack:          check, out
+    DUP2
+    // stack:    out0, check, out
+    %mload_kernel_bn254_pairing
+    // stack:      f0, check, out
+    %eq_const(1)
+    // stack:  check0, check, out
+    MUL
+    // stack:          check, out
+%endmacro
+
+%macro check_output_term(j)
+    // stack:          check, out
+    DUP2
+    %add_const($j)
+    // stack:    outj, check, out
+    %mload_kernel_bn254_pairing
+    // stack:      fj, check, out
+    ISZERO
+    // stack:  checkj, check, out
+    MUL
+    // stack:          check, out
+%endmacro
+
+%macro neutral_input
+    // stack: P      , Q
+    ISZERO
+    SWAP1
+    ISZERO
+    MUL
+    // stack: P==0,    Q
+    SWAP4
+    // stack: Q   , P==0
+    ISZERO
+    SWAP1
+    ISZERO
+    MUL
+    SWAP1
+    ISZERO
+    MUL
+    SWAP1
+    ISZERO
+    MUL
+    // stack: Q==0, P==0
+    OR
+    // stack: Q==0||P==0
+%endmacro
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/twisted_curve.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/twisted_curve.asm
new file mode 100644
index 00000000..859c45fe
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/twisted_curve.asm
@@ -0,0 +1,94 @@
+// Check if (X,Y) is a valid curve point.
+// Returns (range & curve) || ident
+//   where
+//     range = (x < N) & (x_ < N) & (y < N) & (y_ < N)
+//     curve = Y^2 == X^3 + 3/(9+i)
+//     ident = (X,Y) == (0,0)
+
+%macro bn_check_twisted
+    // stack:                x, x_, y, y_
+    %bn_check_twisted_range
+    // stack:         range, x, x_, y, y_
+    %bn_check_twisted_curve
+    // stack: curve , range, x, x_, y, y_
+    MUL // Cheaper than AND
+    // stack: curve & range, x, x_, y, y_
+    SWAP4
+    // stack: y_, x, x_, y, curve & range
+    %bn_check_twisted_ident
+    // stack:     ident ,   curve & range
+    OR
+    // stack:     ident || (curve & range)
+%endmacro
+
+%macro bn_check_twisted_range
+    // stack:                               x, x_, y, y_
+    PUSH @BN_BASE
+    // stack:                            N, x, x_, y, y_
+    %stack (N) -> (N, N, N, N)
+    // stack:                   N, N, N, N, x, x_, y, y_
+    DUP8
+    // stack:              y_ , N, N, N, N, x, x_, y, y_
+    LT  
+    // stack:              y_ < N, N, N, N, x, x_, y, y_
+    SWAP3
+    // stack:              N, N, N, y_ < N, x, x_, y, y_
+    DUP7
+    // stack:          y , N, N, N, y_ < N, x, x_, y, y_
+    LT
+    // stack:          y < N, N, N, y_ < N, x, x_, y, y_
+    SWAP2
+    // stack:          N, N, y < N, y_ < N, x, x_, y, y_
+    DUP6
+    // stack:     x_ , N, N, y < N, y_ < N, x, x_, y, y_
+    LT
+    // stack:     x_ < N, N, y < N, y_ < N, x, x_, y, y_
+    SWAP1
+    // stack:     N, x_ < N, y < N, y_ < N, x, x_, y, y_
+    DUP5 
+    // stack: x , N, x_ < N, y < N, y_ < N, x, x_, y, y_
+    LT
+    // stack: x < N, x_ < N, y < N, y_ < N, x, x_, y, y_
+    MUL // Cheaper than AND 
+    MUL // Cheaper than AND
+    MUL // Cheaper than AND
+    // stack:                        range, x, x_, y, y_
+%endmacro
+
+%macro bn_check_twisted_curve
+    // stack:                  range, X, Y
+    %stack (range, X: 2, Y: 2) -> (Y, Y, range, X, Y)
+    // stack:            Y, Y, range, X, Y
+    %mul_fp254_2
+    // stack:             Y^2, range, X, Y
+    %stack () -> (@BN_TWISTED_RE, @BN_TWISTED_IM)
+    // stack:          A, Y^2, range, X, Y
+    %stack (A: 2, Y2: 2, range, X: 2) -> (X, X, X, A, Y2, range, X)
+    // stack: X, X, X, A, Y^2, range, X, Y
+    %mul_fp254_2
+    %mul_fp254_2
+    // stack:    X^3 , A, Y^2, range, X, Y
+    %add_fp254_2
+    // stack:    X^3 + A, Y^2, range, X, Y
+    %eq_fp254_2
+    // stack:           curve, range, X, Y
+%endmacro
+
+%macro bn_check_twisted_ident
+    SWAP2
+    // stack: a   , b   , c   , d
+    ISZERO
+    SWAP3
+    // stack: d   , b   , c   , a==0
+    ISZERO
+    SWAP2
+    // stack: c   , b   , d==0, a==0
+    ISZERO
+    SWAP1
+    // stack: b   , c==0, d==0, a==0
+    ISZERO
+    // stack: b==0, c==0, d==0, a==0
+    MUL // Cheaper than AND
+    MUL // Cheaper than AND
+    MUL // Cheaper than AND
+%endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index 5fd47e80..ca32a30d 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -2,32 +2,6 @@
 ///// GENERAL FP12 MULTIPLICATION /////
 ///////////////////////////////////////
 
-/// cost: 1063
-
-/// fp254_6 functions:
-///  fn    | num | ops | cost
-///  -------------------------
-///  load  |   8 |  40 |  320
-///  store |   5 |  40 |  200
-///  dup   |   5 |   6 |   30
-///  swap  |   4 |  16 |   64
-///  add   |   4 |  16 |   64
-///  subr  |   1 |  17 |   17
-///  mul   |   3 | 157 |  471
-///  i9    |   1 |   9 |    9
-///
-/// lone stack operations:
-///  op    | num 
-///  ------------
-///  ADD   |   3
-///  SWAP  |   2
-///  DUP   |   6
-///  PUSH  |   6
-///  POP   |   2
-///  JUMP  |   6
-///
-/// TOTAL: 1201
-
 /// inputs:
 ///     F = f + f'z
 ///     G = g + g'z
@@ -66,73 +40,73 @@ mul_fp254_12_1:
     // stack:                   f'g', g'  , f', inA, inB, out 
     %dup_fp254_6_0
     // stack:             f'g', f'g', g'  , f', inA, inB, out 
-    %store_fp254_6_sh(84)                                    
-    // stack:                   f'g', g'  , f', inA, inB, out  {84: sh(f'g')}
-    %store_fp254_6(90)
-    // stack:                         g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    %store_fp254_6_sh(60)                                    
+    // stack:                   f'g', g'  , f', inA, inB, out  {60: sh(f'g')}
+    %store_fp254_6(66)
+    // stack:                         g'  , f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     DUP13
-    // stack:                    inA, g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    // stack:                    inA, g'  , f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     DUP15  
-    // stack:               inB, inA, g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    // stack:               inB, inA, g'  , f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     %load_fp254_6
-    // stack:                g , inA, g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    // stack:                g , inA, g'  , f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     %stack (f: 6, x, g: 6) -> (g, x, f)
-    // stack:                g', inA, g   , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    // stack:                g', inA, g   , f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     %dup_fp254_6_7
-    // stack:              g,g', inA, g   , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    // stack:              g,g', inA, g   , f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     %add_fp254_6
-    // stack:              g+g', inA, g   , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    // stack:              g+g', inA, g   , f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     %stack (f: 6, x, g: 6) -> (g, x, f)
-    // stack:                 g, inA, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    // stack:                 g, inA, g+g', f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     PUSH mul_fp254_12_2
-    // stack: mul_fp254_12_2, g, inA, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    // stack: mul_fp254_12_2, g, inA, g+g', f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     SWAP7
-    // stack: inA, g, mul_fp254_12_2, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    // stack: inA, g, mul_fp254_12_2, g+g', f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     %load_fp254_6
-    // stack:   f, g, mul_fp254_12_2, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    // stack:   f, g, mul_fp254_12_2, g+g', f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
     %jump(mul_fp254_6)
 mul_fp254_12_2:    
-    // stack:                     fg, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
-    %store_fp254_6(96)
-    // stack:                         g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:                     fg, g+g', f', inA, inB, out  {60: sh(f'g'), 66: f'g'}
+    %store_fp254_6(72)
+    // stack:                         g+g', f', inA, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %stack (x: 6, y: 6) -> (y, x)
-    // stack:                         f', g+g', inA, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:                         f', g+g', inA, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     PUSH mul_fp254_12_3
-    // stack:         mul_fp254_12_3, f', g+g', inA, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:         mul_fp254_12_3, f', g+g', inA, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     SWAP13
-    // stack:         inA, f', g+g', mul_fp254_12_3, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:         inA, f', g+g', mul_fp254_12_3, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %load_fp254_6
-    // stack:            f,f', g+g', mul_fp254_12_3, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:            f,f', g+g', mul_fp254_12_3, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %add_fp254_6
-    // stack:            f+f', g+g', mul_fp254_12_3, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:            f+f', g+g', mul_fp254_12_3, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %jump(mul_fp254_6)
 mul_fp254_12_3:
-    // stack:                          (f+f')(g+g'), inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
-    %load_fp254_6(96)
-    // stack:                      fg, (f+f')(g+g'), inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:                          (f+f')(g+g'), inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
+    %load_fp254_6(72)
+    // stack:                      fg, (f+f')(g+g'), inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %stack (x: 6, y: 6) -> (y, x)
-    // stack:                      (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:                      (f+f')(g+g'), fg, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %dup_fp254_6_6
-    // stack:                  fg, (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
-    %load_fp254_6(90)
-    // stack:             f'g',fg, (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:                  fg, (f+f')(g+g'), fg, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
+    %load_fp254_6(66)
+    // stack:             f'g',fg, (f+f')(g+g'), fg, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %add_fp254_6
-    // stack:             f'g'+fg, (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:             f'g'+fg, (f+f')(g+g'), fg, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %subr_fp254_6
-    // stack:          (f+f')(g+g') - (f'g'+fg), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}   
+    // stack:          (f+f')(g+g') - (f'g'+fg), fg, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}   
     DUP14  
     %add_const(6) 
-    // stack:    out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}   
+    // stack:    out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}   
     %store_fp254_6
-    // stack:                                    fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
-    %load_fp254_6(84)
-    // stack:                         sh(f'g') , fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:                                    fg, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
+    %load_fp254_6(60)
+    // stack:                         sh(f'g') , fg, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %add_fp254_6
-    // stack:                         sh(f'g') + fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:                         sh(f'g') + fg, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     DUP8
-    // stack:                    out, sh(f'g') + fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:                    out, sh(f'g') + fg, inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %store_fp254_6
-    // stack:                                        inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    // stack:                                        inB, out  {60: sh(f'g'), 66: f'g', 72: fg}
     %pop2  
     JUMP
 
@@ -141,29 +115,6 @@ mul_fp254_12_3:
 ///// SPARSE FP12 MULTIPLICATION /////
 //////////////////////////////////////
 
-/// cost: 645
-
-/// fp254_6 functions:
-///  fn      | num | ops | cost
-///  ---------------------------
-///  load    |   2 |  40 |   80
-///  store   |   2 |  40 |   80
-///  dup     |   4 |   6 |   24
-///  swap    |   4 |  16 |   64
-///  add     |   4 |  16 |   64
-///  mul_fp254_  |   2 |  21 |   42
-///  mul_fp254_2 |   4 |  59 |  236
-///
-/// lone stack operations:
-///  op    | num 
-///  ------------
-///  ADD   |   6
-///  DUP   |   9
-///  PUSH  |   6
-///  POP   |   5
-///
-/// TOTAL: 618
-
 /// input:
 ///     F = f + f'z
 ///     G = g0 + (G1)t + (G2)tz
@@ -277,32 +228,6 @@ global mul_fp254_12_sparse:
 ///// FP12 SQUARING /////
 /////////////////////////
 
-/// cost: 646
-
-/// fp254_6 functions:
-///  fn    | num | ops | cost
-///  -------------------------
-///  load  |   2 |  40 |   80
-///  store |   2 |  40 |   80
-///  dup   |   2 |   6 |   12
-///  swap  |   2 |  16 |   32
-///  add   |   1 |  16 |   16
-///  mul   |   1 | 157 |  157
-///  sq    |   2 | 101 |  202
-///  dbl   |   1 |  13 |   13
-///
-/// lone stack operations:
-///  op    | num 
-///  ------------
-///  ADD   |   3
-///  SWAP  |   4
-///  DUP   |   5
-///  PUSH  |   6
-///  POP   |   3
-///  JUMP  |   4
-///
-/// TOTAL: 
-
 /// input:
 ///     F = f + f'z
 ///
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 6214f385..ce4602a9 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -32,16 +32,19 @@ global inv_fp254_12:
     // stack:              out, f^-1, inp, out, retdest
     %store_fp254_12
     // stack:                         inp, out, retdest
-    %stack (inp, out) -> (inp, out, 72, check_inv_fp254_12)
-    // stack: inp, out, 72, check_inv_fp254_12, retdest 
+    %stack (inp, out) -> (inp, out, 60, check_inv_fp254_12)
+    // stack: inp, out, 60, check_inv_fp254_12, retdest 
     %jump(mul_fp254_12)
 check_inv_fp254_12:
     // stack:        retdest
-    PUSH 72  
+    PUSH 60  
     %load_fp254_12
     // stack: unit?, retdest
     %assert_eq_unit_fp254_12
     // stack:        retdest
+    PUSH 0
+    // stack:     0, retdest
+    %mstore_kernel_bn254_pairing(60)
     JUMP
 
 %macro prover_inv_fp254_12
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index af074714..82617e8f 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -67,6 +67,31 @@
     // stack:     cx, cy 
 %endmacro
 
+%macro eq_fp254_2
+    // stack: x, x_, y, y_
+    SWAP3
+    // stack: y_, x_, y, x
+    EQ
+    // stack: y_==x_, y, x
+    SWAP2
+    // stack: x, y, y_==x_
+    EQ
+    // stack: x==y, y_==x_
+    AND
+%endmacro
+
+%macro add_fp254_2
+    // stack: x, x_, y, y_
+    SWAP3
+    // stack: y_, x_, y, x
+    ADDFP254
+    // stack:     z_, y, x
+    SWAP2
+    // stack:     x, y, z_
+    ADDFP254
+    // stack:        z, z_
+%endmacro
+
 /// Given z = x + iy: Fp254_2, return complex conjugate z': Fp254_2
 /// where input is represented z.re, z.im and output as z'.im, z'.re
 /// cost: 9; note this returns y, x for the output x + yi
@@ -116,6 +141,31 @@
     // stack:    ac - bd, bc + ad
 %endmacro 
 
+// load twisted curve
+
+%macro load_fp254_4
+    // stack:                         ptr
+    DUP1  
+    %add_const(2)
+    // stack:                   ind2, ptr
+    %mload_kernel_bn254_pairing
+    // stack:                     x2, ptr
+    DUP2  
+    %add_const(1)
+    // stack:               ind1, x2, ptr
+    %mload_kernel_bn254_pairing
+    // stack:                 x1, x2, ptr
+    DUP3  
+    %add_const(3)
+    // stack:           ind3, x1, x2, ptr
+    %mload_kernel_bn254_pairing
+    // stack:             x3, x1, x2, ptr
+    SWAP3
+    // stack:            ind0, x1, x2, x3
+    %mload_kernel_bn254_pairing
+    // stack:              x0, x1, x2, x3
+%endmacro
+
 // fp254_6 macros
 
 %macro load_fp254_6
diff --git a/evm/src/cpu/kernel/constants/mod.rs b/evm/src/cpu/kernel/constants/mod.rs
index cf2a1e31..b1486589 100644
--- a/evm/src/cpu/kernel/constants/mod.rs
+++ b/evm/src/cpu/kernel/constants/mod.rs
@@ -44,6 +44,10 @@ pub fn evm_constants() -> HashMap<String, U256> {
         c.insert(name.into(), U256::from(value));
     }
 
+    for (name, value) in SNARKV_POINTERS {
+        c.insert(name.into(), U256::from(value));
+    }
+
     for segment in Segment::all() {
         c.insert(segment.var_name().into(), (segment as u32).into());
     }
@@ -87,7 +91,7 @@ const HASH_CONSTANTS: [(&str, [u8; 32]); 2] = [
     ),
 ];
 
-const EC_CONSTANTS: [(&str, [u8; 32]); 18] = [
+const EC_CONSTANTS: [(&str, [u8; 32]); 20] = [
     (
         "U256_MAX",
         hex!("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"),
@@ -96,6 +100,14 @@ const EC_CONSTANTS: [(&str, [u8; 32]); 18] = [
         "BN_BASE",
         hex!("30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"),
     ),
+    (
+        "BN_TWISTED_RE",
+        hex!("2b149d40ceb8aaae81be18991be06ac3b5b4c5e559dbefa33267e6dc24a138e5"),
+    ),
+    (
+        "BN_TWISTED_IM",
+        hex!("009713b03af0fed4cd2cafadeed8fdf4a74fa084e52d1852e4a2bd0685c315d2"),
+    ),
     (
         "BN_SCALAR",
         hex!("30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001"),
@@ -232,6 +244,8 @@ const PRECOMPILES_GAS: [(&str, u16); 13] = [
     ("BLAKE2_F__GAS", 1),
 ];
 
+const SNARKV_POINTERS: [(&str, u64); 2] = [("SNARKV_INP", 112), ("SNARKV_OUT", 100)];
+
 const CODE_SIZE_LIMIT: [(&str, u64); 3] = [
     ("MAX_CODE_SIZE", 0x6000),
     ("MAX_INITCODE_SIZE", 0xc000),
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 8e71ffd6..548f9789 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -2,12 +2,12 @@ use anyhow::Result;
 use ethereum_types::U256;
 use rand::Rng;
 
-use crate::bn254_pairing::{
-    gen_fp12_sparse, invariant_exponent, miller_loop, tate, Curve, TwistedCurve,
-};
 use crate::cpu::kernel::interpreter::{
     run_interpreter_with_memory, Interpreter, InterpreterMemoryInitialization,
 };
+use crate::curve_pairings::{
+    bn_final_exponent, bn_miller_loop, gen_bn_fp12_sparse, Curve, CyclicGroup,
+};
 use crate::extension_tower::{FieldExt, Fp12, Fp2, Fp6, Stack, BN254};
 use crate::memory::segments::Segment::BnPairing;
 
@@ -20,38 +20,31 @@ fn extract_stack(interpreter: Interpreter<'static>) -> Vec<U256> {
         .collect::<Vec<U256>>()
 }
 
-fn setup_mul_fp6_test(
-    f: Fp6<BN254>,
-    g: Fp6<BN254>,
-    label: &str,
-) -> InterpreterMemoryInitialization {
+fn run_bn_mul_fp6(f: Fp6<BN254>, g: Fp6<BN254>, label: &str) -> Vec<U256> {
     let mut stack = f.on_stack();
     if label == "mul_fp254_6" {
         stack.extend(g.on_stack());
     }
     stack.push(U256::from(0xdeadbeefu32));
-    InterpreterMemoryInitialization {
+
+    let setup = InterpreterMemoryInitialization {
         label: label.to_string(),
         stack,
         segment: BnPairing,
         memory: vec![],
-    }
+    };
+    let interpreter = run_interpreter_with_memory(setup).unwrap();
+    extract_stack(interpreter)
 }
 
 #[test]
-fn test_mul_fp6() -> Result<()> {
+fn test_bn_mul_fp6() -> Result<()> {
     let mut rng = rand::thread_rng();
     let f: Fp6<BN254> = rng.gen::<Fp6<BN254>>();
     let g: Fp6<BN254> = rng.gen::<Fp6<BN254>>();
 
-    let setup_normal: InterpreterMemoryInitialization = setup_mul_fp6_test(f, g, "mul_fp254_6");
-    let setup_square: InterpreterMemoryInitialization = setup_mul_fp6_test(f, f, "square_fp254_6");
-
-    let intrptr_normal: Interpreter = run_interpreter_with_memory(setup_normal).unwrap();
-    let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap();
-
-    let out_normal: Vec<U256> = extract_stack(intrptr_normal);
-    let out_square: Vec<U256> = extract_stack(intrptr_square);
+    let out_normal: Vec<U256> = run_bn_mul_fp6(f, g, "mul_fp254_6");
+    let out_square: Vec<U256> = run_bn_mul_fp6(f, f, "square_fp254_6");
 
     let exp_normal: Vec<U256> = (f * g).on_stack();
     let exp_square: Vec<U256> = (f * f).on_stack();
@@ -62,14 +55,10 @@ fn test_mul_fp6() -> Result<()> {
     Ok(())
 }
 
-fn setup_mul_fp12_test(
-    out: usize,
-    f: Fp12<BN254>,
-    g: Fp12<BN254>,
-    label: &str,
-) -> InterpreterMemoryInitialization {
-    let in0: usize = 200;
-    let in1: usize = 212;
+fn run_bn_mul_fp12(f: Fp12<BN254>, g: Fp12<BN254>, label: &str) -> Vec<U256> {
+    let in0: usize = 100;
+    let in1: usize = 112;
+    let out: usize = 124;
 
     let mut stack = vec![
         U256::from(in0),
@@ -80,37 +69,27 @@ fn setup_mul_fp12_test(
     if label == "square_fp254_12" {
         stack.remove(0);
     }
-    InterpreterMemoryInitialization {
+
+    let setup = InterpreterMemoryInitialization {
         label: label.to_string(),
         stack,
         segment: BnPairing,
         memory: vec![(in0, f.on_stack()), (in1, g.on_stack())],
-    }
+    };
+    let interpreter = run_interpreter_with_memory(setup).unwrap();
+    interpreter.extract_kernel_memory(BnPairing, out..out + 12)
 }
 
 #[test]
-fn test_mul_fp12() -> Result<()> {
-    let out: usize = 224;
-
+fn test_bn_mul_fp12() -> Result<()> {
     let mut rng = rand::thread_rng();
     let f: Fp12<BN254> = rng.gen::<Fp12<BN254>>();
     let g: Fp12<BN254> = rng.gen::<Fp12<BN254>>();
-    let h: Fp12<BN254> = gen_fp12_sparse(&mut rng);
+    let h: Fp12<BN254> = gen_bn_fp12_sparse(&mut rng);
 
-    let setup_normal: InterpreterMemoryInitialization =
-        setup_mul_fp12_test(out, f, g, "mul_fp254_12");
-    let setup_sparse: InterpreterMemoryInitialization =
-        setup_mul_fp12_test(out, f, h, "mul_fp254_12_sparse");
-    let setup_square: InterpreterMemoryInitialization =
-        setup_mul_fp12_test(out, f, f, "square_fp254_12");
-
-    let intrptr_normal: Interpreter = run_interpreter_with_memory(setup_normal).unwrap();
-    let intrptr_sparse: Interpreter = run_interpreter_with_memory(setup_sparse).unwrap();
-    let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap();
-
-    let out_normal: Vec<U256> = intrptr_normal.extract_kernel_memory(BnPairing, out..out + 12);
-    let out_sparse: Vec<U256> = intrptr_sparse.extract_kernel_memory(BnPairing, out..out + 12);
-    let out_square: Vec<U256> = intrptr_square.extract_kernel_memory(BnPairing, out..out + 12);
+    let out_normal: Vec<U256> = run_bn_mul_fp12(f, g, "mul_fp254_12");
+    let out_sparse: Vec<U256> = run_bn_mul_fp12(f, h, "mul_fp254_12_sparse");
+    let out_square: Vec<U256> = run_bn_mul_fp12(f, f, "square_fp254_12");
 
     let exp_normal: Vec<U256> = (f * g).on_stack();
     let exp_sparse: Vec<U256> = (f * h).on_stack();
@@ -123,57 +102,58 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-fn setup_frob_fp6_test(f: Fp6<BN254>, n: usize) -> InterpreterMemoryInitialization {
-    InterpreterMemoryInitialization {
-        label: String::from("test_frob_fp254_6_") + &(n.to_string()),
+fn run_bn_frob_fp6(f: Fp6<BN254>, n: usize) -> Vec<U256> {
+    let setup = InterpreterMemoryInitialization {
+        label: format!("test_frob_fp254_6_{}", n),
         stack: f.on_stack(),
         segment: BnPairing,
         memory: vec![],
-    }
+    };
+    let interpreter = run_interpreter_with_memory(setup).unwrap();
+    extract_stack(interpreter)
 }
 
 #[test]
-fn test_frob_fp6() -> Result<()> {
+fn test_bn_frob_fp6() -> Result<()> {
     let mut rng = rand::thread_rng();
     let f: Fp6<BN254> = rng.gen::<Fp6<BN254>>();
     for n in 1..4 {
-        let setup_frob = setup_frob_fp6_test(f, n);
-        let intrptr_frob: Interpreter = run_interpreter_with_memory(setup_frob).unwrap();
-        let out_frob: Vec<U256> = extract_stack(intrptr_frob);
-        let exp_frob: Vec<U256> = f.frob(n).on_stack();
-        assert_eq!(out_frob, exp_frob);
+        let output: Vec<U256> = run_bn_frob_fp6(f, n);
+        let expected: Vec<U256> = f.frob(n).on_stack();
+        assert_eq!(output, expected);
     }
     Ok(())
 }
 
-fn setup_frob_fp12_test(ptr: usize, f: Fp12<BN254>, n: usize) -> InterpreterMemoryInitialization {
-    InterpreterMemoryInitialization {
-        label: String::from("test_frob_fp254_12_") + &(n.to_string()),
+fn run_bn_frob_fp12(f: Fp12<BN254>, n: usize) -> Vec<U256> {
+    let ptr: usize = 100;
+    let setup = InterpreterMemoryInitialization {
+        label: format!("test_frob_fp254_12_{}", n),
         stack: vec![U256::from(ptr)],
         segment: BnPairing,
         memory: vec![(ptr, f.on_stack())],
-    }
+    };
+    let interpreter = run_interpreter_with_memory(setup).unwrap();
+    interpreter.extract_kernel_memory(BnPairing, ptr..ptr + 12)
 }
 
 #[test]
-fn test_frob_fp12() -> Result<()> {
-    let ptr: usize = 200;
+fn test_bn_frob_fp12() -> Result<()> {
     let mut rng = rand::thread_rng();
     let f: Fp12<BN254> = rng.gen::<Fp12<BN254>>();
+
     for n in [1, 2, 3, 6] {
-        let setup_frob = setup_frob_fp12_test(ptr, f, n);
-        let intrptr_frob: Interpreter = run_interpreter_with_memory(setup_frob).unwrap();
-        let out_frob: Vec<U256> = intrptr_frob.extract_kernel_memory(BnPairing, ptr..ptr + 12);
-        let exp_frob: Vec<U256> = f.frob(n).on_stack();
-        assert_eq!(out_frob, exp_frob);
+        let output = run_bn_frob_fp12(f, n);
+        let expected: Vec<U256> = f.frob(n).on_stack();
+        assert_eq!(output, expected);
     }
     Ok(())
 }
 
 #[test]
-fn test_inv_fp12() -> Result<()> {
-    let ptr: usize = 200;
-    let inv: usize = 212;
+fn test_bn_inv_fp12() -> Result<()> {
+    let ptr: usize = 100;
+    let inv: usize = 112;
     let mut rng = rand::thread_rng();
     let f: Fp12<BN254> = rng.gen::<Fp12<BN254>>();
 
@@ -193,101 +173,54 @@ fn test_inv_fp12() -> Result<()> {
 }
 
 #[test]
-fn test_invariant_exponent() -> Result<()> {
-    let ptr: usize = 200;
+fn test_bn_final_exponent() -> Result<()> {
+    let ptr: usize = 100;
+
     let mut rng = rand::thread_rng();
     let f: Fp12<BN254> = rng.gen::<Fp12<BN254>>();
 
     let setup = InterpreterMemoryInitialization {
-        label: "bn254_invariant_exponent".to_string(),
-        stack: vec![U256::from(ptr), U256::from(0xdeadbeefu32)],
+        label: "bn254_final_exponent".to_string(),
+        stack: vec![
+            U256::zero(),
+            U256::zero(),
+            U256::from(ptr),
+            U256::from(0xdeadbeefu32),
+        ],
         segment: BnPairing,
         memory: vec![(ptr, f.on_stack())],
     };
 
     let interpreter: Interpreter = run_interpreter_with_memory(setup).unwrap();
     let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, ptr..ptr + 12);
-    let expected: Vec<U256> = invariant_exponent(f).on_stack();
+    let expected: Vec<U256> = bn_final_exponent(f).on_stack();
 
     assert_eq!(output, expected);
 
     Ok(())
 }
 
-// The curve is cyclic with generator (1, 2)
-pub const CURVE_GENERATOR: Curve = {
-    Curve {
-        x: BN254 { val: U256::one() },
-        y: BN254 {
-            val: U256([2, 0, 0, 0]),
-        },
-    }
-};
-
-// The twisted curve is cyclic with generator (x, y) as follows
-pub const TWISTED_GENERATOR: TwistedCurve = {
-    TwistedCurve {
-        x: Fp2 {
-            re: BN254 {
-                val: U256([
-                    0x46debd5cd992f6ed,
-                    0x674322d4f75edadd,
-                    0x426a00665e5c4479,
-                    0x1800deef121f1e76,
-                ]),
-            },
-            im: BN254 {
-                val: U256([
-                    0x97e485b7aef312c2,
-                    0xf1aa493335a9e712,
-                    0x7260bfb731fb5d25,
-                    0x198e9393920d483a,
-                ]),
-            },
-        },
-        y: Fp2 {
-            re: BN254 {
-                val: U256([
-                    0x4ce6cc0166fa7daa,
-                    0xe3d1e7690c43d37b,
-                    0x4aab71808dcb408f,
-                    0x12c85ea5db8c6deb,
-                ]),
-            },
-            im: BN254 {
-                val: U256([
-                    0x55acdadcd122975b,
-                    0xbc4b313370b38ef3,
-                    0xec9e99ad690c3395,
-                    0x090689d0585ff075,
-                ]),
-            },
-        },
-    }
-};
-
 #[test]
-fn test_miller() -> Result<()> {
-    let ptr: usize = 200;
-    let out: usize = 206;
-    let inputs: Vec<U256> = vec![
-        CURVE_GENERATOR.x.val,
-        CURVE_GENERATOR.y.val,
-        TWISTED_GENERATOR.x.re.val,
-        TWISTED_GENERATOR.x.im.val,
-        TWISTED_GENERATOR.y.re.val,
-        TWISTED_GENERATOR.y.im.val,
-    ];
+fn test_bn_miller() -> Result<()> {
+    let ptr: usize = 100;
+    let out: usize = 106;
+
+    let mut rng = rand::thread_rng();
+    let p: Curve<BN254> = rng.gen::<Curve<BN254>>();
+    let q: Curve<Fp2<BN254>> = rng.gen::<Curve<Fp2<BN254>>>();
+
+    let mut input = p.on_stack();
+    input.extend(q.on_stack());
 
     let setup = InterpreterMemoryInitialization {
         label: "bn254_miller".to_string(),
         stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
         segment: BnPairing,
-        memory: vec![(ptr, inputs)],
+        memory: vec![(ptr, input)],
     };
     let interpreter = run_interpreter_with_memory(setup).unwrap();
     let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, out..out + 12);
-    let expected = miller_loop(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
+    let expected = bn_miller_loop(p, q).on_stack();
 
     assert_eq!(output, expected);
 
@@ -295,29 +228,41 @@ fn test_miller() -> Result<()> {
 }
 
 #[test]
-fn test_tate() -> Result<()> {
-    let ptr: usize = 200;
-    let out: usize = 206;
-    let inputs: Vec<U256> = vec![
-        CURVE_GENERATOR.x.val,
-        CURVE_GENERATOR.y.val,
-        TWISTED_GENERATOR.x.re.val,
-        TWISTED_GENERATOR.x.im.val,
-        TWISTED_GENERATOR.y.re.val,
-        TWISTED_GENERATOR.y.im.val,
-    ];
+fn test_bn_pairing() -> Result<()> {
+    let out: usize = 100;
+    let ptr: usize = 112;
+
+    let mut rng = rand::thread_rng();
+    let k: usize = rng.gen_range(1..10);
+    let mut acc: i32 = 0;
+    let mut input: Vec<U256> = vec![];
+    for _ in 1..k {
+        let m: i32 = rng.gen_range(-8..8);
+        let n: i32 = rng.gen_range(-8..8);
+        acc -= m * n;
+
+        let p: Curve<BN254> = Curve::<BN254>::int(m);
+        let q: Curve<Fp2<BN254>> = Curve::<Fp2<BN254>>::int(n);
+        input.extend(p.on_stack());
+        input.extend(q.on_stack());
+    }
+    let p: Curve<BN254> = Curve::<BN254>::int(acc);
+    let q: Curve<Fp2<BN254>> = Curve::<Fp2<BN254>>::GENERATOR;
+    input.extend(p.on_stack());
+    input.extend(q.on_stack());
 
     let setup = InterpreterMemoryInitialization {
-        label: "bn254_tate".to_string(),
-        stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
+        label: "bn254_pairing".to_string(),
+        stack: vec![
+            U256::from(k),
+            U256::from(ptr),
+            U256::from(out),
+            U256::from(0xdeadbeefu32),
+        ],
         segment: BnPairing,
-        memory: vec![(ptr, inputs)],
+        memory: vec![(ptr, input)],
     };
     let interpreter = run_interpreter_with_memory(setup).unwrap();
-    let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, out..out + 12);
-    let expected = tate(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
-
-    assert_eq!(output, expected);
-
+    assert_eq!(interpreter.stack()[0], U256::one());
     Ok(())
 }
diff --git a/evm/src/bn254_pairing.rs b/evm/src/curve_pairings.rs
similarity index 63%
rename from evm/src/bn254_pairing.rs
rename to evm/src/curve_pairings.rs
index 7277c2a8..708e7fb2 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/curve_pairings.rs
@@ -1,25 +1,75 @@
-use std::ops::Add;
+use std::ops::{Add, Mul, Neg};
 
+use ethereum_types::U256;
+use rand::distributions::Standard;
+use rand::prelude::Distribution;
 use rand::Rng;
 
-use crate::extension_tower::{FieldExt, Fp12, Fp2, Fp6, BN254};
+use crate::extension_tower::{FieldExt, Fp12, Fp2, Fp6, Stack, BN254};
 
-// The curve consists of pairs (x, y): (BN254, BN254) | y^2 = x^3 + 2
 #[derive(Debug, Copy, Clone, PartialEq)]
-pub struct Curve {
-    pub x: BN254,
-    pub y: BN254,
+pub struct Curve<T>
+where
+    T: FieldExt,
+{
+    pub x: T,
+    pub y: T,
+}
+
+impl<T: FieldExt> Curve<T> {
+    pub fn unit() -> Self {
+        Curve {
+            x: T::ZERO,
+            y: T::ZERO,
+        }
+    }
+}
+
+impl<T: FieldExt + Stack> Curve<T> {
+    pub fn on_stack(self) -> Vec<U256> {
+        let mut stack = self.x.on_stack();
+        stack.extend(self.y.on_stack());
+        stack
+    }
+}
+
+impl<T> Curve<T>
+where
+    T: FieldExt,
+    Curve<T>: CyclicGroup,
+{
+    pub fn int(z: i32) -> Self {
+        Curve::<T>::GENERATOR * z
+    }
+}
+
+impl<T> Distribution<Curve<T>> for Standard
+where
+    T: FieldExt,
+    Curve<T>: CyclicGroup,
+{
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Curve<T> {
+        Curve::<T>::GENERATOR * rng.gen::<i32>()
+    }
 }
 
 /// Standard addition formula for elliptic curves, restricted to the cases  
-/// where neither inputs nor output would ever be the identity O. source:
 /// https://en.wikipedia.org/wiki/Elliptic_curve#Algebraic_interpretation
-impl Add for Curve {
+impl<T: FieldExt> Add for Curve<T> {
     type Output = Self;
 
     fn add(self, other: Self) -> Self {
+        if self == Curve::<T>::unit() {
+            return other;
+        }
+        if other == Curve::<T>::unit() {
+            return self;
+        }
+        if self == -other {
+            return Curve::<T>::unit();
+        }
         let m = if self == other {
-            BN254::new(3) * self.x * self.x / (BN254::new(2) * self.y)
+            T::new(3) * self.x * self.x / (T::new(2) * self.y)
         } else {
             (other.y - self.y) / (other.x - self.x)
         };
@@ -31,33 +81,130 @@ impl Add for Curve {
     }
 }
 
-// The twisted curve consists of pairs (x, y): (Fp2, Fp2) | y^2 = x^3 + 3/(9 + i)
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub struct TwistedCurve {
-    pub x: Fp2<BN254>,
-    pub y: Fp2<BN254>,
+impl<T: FieldExt> Neg for Curve<T> {
+    type Output = Curve<T>;
+
+    fn neg(self) -> Self {
+        Curve {
+            x: self.x,
+            y: -self.y,
+        }
+    }
+}
+
+pub trait CyclicGroup {
+    const GENERATOR: Self;
+}
+
+/// The BN curve consists of pairs
+///     (x, y): (BN254, BN254) | y^2 = x^3 + 2
+// with generator given by (1, 2)
+impl CyclicGroup for Curve<BN254> {
+    const GENERATOR: Curve<BN254> = Curve {
+        x: BN254 { val: U256::one() },
+        y: BN254 {
+            val: U256([2, 0, 0, 0]),
+        },
+    };
+}
+
+impl<T> Mul<i32> for Curve<T>
+where
+    T: FieldExt,
+    Curve<T>: CyclicGroup,
+{
+    type Output = Curve<T>;
+
+    fn mul(self, other: i32) -> Self {
+        if other == 0 {
+            return Curve::<T>::unit();
+        }
+        if self == Curve::<T>::unit() {
+            return Curve::<T>::unit();
+        }
+
+        let mut x: Curve<T> = self;
+        if other.is_negative() {
+            x = -x;
+        }
+        let mut result = Curve::<T>::unit();
+
+        let mut exp = other.unsigned_abs() as usize;
+        while exp > 0 {
+            if exp % 2 == 1 {
+                result = result + x;
+            }
+            exp >>= 1;
+            x = x + x;
+        }
+        result
+    }
+}
+
+/// The twisted curve consists of pairs
+///     (x, y): (Fp2<BN254>, Fp2<BN254>) | y^2 = x^3 + 3/(9 + i)
+/// with generator given as follows
+impl CyclicGroup for Curve<Fp2<BN254>> {
+    const GENERATOR: Curve<Fp2<BN254>> = Curve {
+        x: Fp2 {
+            re: BN254 {
+                val: U256([
+                    0x46debd5cd992f6ed,
+                    0x674322d4f75edadd,
+                    0x426a00665e5c4479,
+                    0x1800deef121f1e76,
+                ]),
+            },
+            im: BN254 {
+                val: U256([
+                    0x97e485b7aef312c2,
+                    0xf1aa493335a9e712,
+                    0x7260bfb731fb5d25,
+                    0x198e9393920d483a,
+                ]),
+            },
+        },
+        y: Fp2 {
+            re: BN254 {
+                val: U256([
+                    0x4ce6cc0166fa7daa,
+                    0xe3d1e7690c43d37b,
+                    0x4aab71808dcb408f,
+                    0x12c85ea5db8c6deb,
+                ]),
+            },
+            im: BN254 {
+                val: U256([
+                    0x55acdadcd122975b,
+                    0xbc4b313370b38ef3,
+                    0xec9e99ad690c3395,
+                    0x090689d0585ff075,
+                ]),
+            },
+        },
+    };
 }
 
 // The tate pairing takes a point each from the curve and its twist and outputs an Fp12 element
-pub fn tate(p: Curve, q: TwistedCurve) -> Fp12<BN254> {
-    let miller_output = miller_loop(p, q);
-    invariant_exponent(miller_output)
+pub fn bn_tate(p: Curve<BN254>, q: Curve<Fp2<BN254>>) -> Fp12<BN254> {
+    let miller_output = bn_miller_loop(p, q);
+    bn_final_exponent(miller_output)
 }
 
 /// Standard code for miller loop, can be found on page 99 at this url:
 /// https://static1.squarespace.com/static/5fdbb09f31d71c1227082339/t/5ff394720493bd28278889c6/1609798774687/PairingsForBeginners.pdf#page=107
-/// where EXP is a hardcoding of the array of Booleans that the loop traverses
-pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12<BN254> {
+/// where BN_EXP is a hardcoding of the array of Booleans that the loop traverses
+pub fn bn_miller_loop(p: Curve<BN254>, q: Curve<Fp2<BN254>>) -> Fp12<BN254> {
     let mut r = p;
     let mut acc: Fp12<BN254> = Fp12::<BN254>::UNIT;
     let mut line: Fp12<BN254>;
 
-    for i in EXP {
-        line = tangent(r, q);
+    for i in BN_EXP {
+        line = bn_tangent(r, q);
         r = r + r;
         acc = line * acc * acc;
         if i {
-            line = cord(p, r, q);
+            line = bn_cord(p, r, q);
             r = r + p;
             acc = line * acc;
         }
@@ -66,22 +213,22 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12<BN254> {
 }
 
 /// The sloped line function for doubling a point
-pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12<BN254> {
+pub fn bn_tangent(p: Curve<BN254>, q: Curve<Fp2<BN254>>) -> Fp12<BN254> {
     let cx = -BN254::new(3) * p.x * p.x;
     let cy = BN254::new(2) * p.y;
-    sparse_embed(p.y * p.y - BN254::new(9), q.x * cx, q.y * cy)
+    bn_sparse_embed(p.y * p.y - BN254::new(9), q.x * cx, q.y * cy)
 }
 
 /// The sloped line function for adding two points
-pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12<BN254> {
+pub fn bn_cord(p1: Curve<BN254>, p2: Curve<BN254>, q: Curve<Fp2<BN254>>) -> Fp12<BN254> {
     let cx = p2.y - p1.y;
     let cy = p1.x - p2.x;
-    sparse_embed(p1.y * p2.x - p2.y * p1.x, q.x * cx, q.y * cy)
+    bn_sparse_embed(p1.y * p2.x - p2.y * p1.x, q.x * cx, q.y * cy)
 }
 
 /// The tangent and cord functions output sparse Fp12 elements.
 /// This map embeds the nonzero coefficients into an Fp12.
-pub fn sparse_embed(g000: BN254, g01: Fp2<BN254>, g11: Fp2<BN254>) -> Fp12<BN254> {
+pub fn bn_sparse_embed(g000: BN254, g01: Fp2<BN254>, g11: Fp2<BN254>) -> Fp12<BN254> {
     let g0 = Fp6 {
         t0: Fp2 {
             re: g000,
@@ -100,8 +247,8 @@ pub fn sparse_embed(g000: BN254, g01: Fp2<BN254>, g11: Fp2<BN254>) -> Fp12<BN254
     Fp12 { z0: g0, z1: g1 }
 }
 
-pub fn gen_fp12_sparse<R: Rng + ?Sized>(rng: &mut R) -> Fp12<BN254> {
-    sparse_embed(
+pub fn gen_bn_fp12_sparse<R: Rng + ?Sized>(rng: &mut R) -> Fp12<BN254> {
+    bn_sparse_embed(
         rng.gen::<BN254>(),
         rng.gen::<Fp2<BN254>>(),
         rng.gen::<Fp2<BN254>>(),
@@ -120,10 +267,10 @@ pub fn gen_fp12_sparse<R: Rng + ?Sized>(rng: &mut R) -> Fp12<BN254> {
 ///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
 /// where 0 < a0, a1, a2 < p. Then the final power is given by
 ///     y = y_3 * (y^a2)_2 * (y^-a1)_1 * (y^-a0)
-pub fn invariant_exponent(f: Fp12<BN254>) -> Fp12<BN254> {
+pub fn bn_final_exponent(f: Fp12<BN254>) -> Fp12<BN254> {
     let mut y = f.frob(6) / f;
     y = y.frob(2) * y;
-    let (y_a2, y_a1, y_a0) = get_custom_powers(y);
+    let (y_a2, y_a1, y_a0) = get_bn_custom_powers(y);
     y.frob(3) * y_a2.frob(2) * y_a1.frob(1) * y_a0
 }
 
@@ -135,10 +282,10 @@ pub fn invariant_exponent(f: Fp12<BN254>) -> Fp12<BN254> {
 ///     y^a2, y^a1 = y^a4 * y^a2 * y^a2 * y^(-a0), y^(-a0)
 ///
 /// Representing a4, a2, a0 in *little endian* binary, define
-///     EXPS4 = [(a4[i], a2[i], a0[i]) for i in       0..len(a4)]
-///     EXPS2 = [       (a2[i], a0[i]) for i in len(a4)..len(a2)]
-///     EXPS0 = [               a0[i]  for i in len(a2)..len(a0)]
-fn get_custom_powers(f: Fp12<BN254>) -> (Fp12<BN254>, Fp12<BN254>, Fp12<BN254>) {
+///     BN_EXPS4 = [(a4[i], a2[i], a0[i]) for i in       0..len(a4)]
+///     BN_EXPS2 = [       (a2[i], a0[i]) for i in len(a4)..len(a2)]
+///     BN_EXPS0 = [               a0[i]  for i in len(a2)..len(a0)]
+fn get_bn_custom_powers(f: Fp12<BN254>) -> (Fp12<BN254>, Fp12<BN254>, Fp12<BN254>) {
     let mut sq: Fp12<BN254> = f;
     let mut y0: Fp12<BN254> = Fp12::<BN254>::UNIT;
     let mut y2: Fp12<BN254> = Fp12::<BN254>::UNIT;
@@ -147,7 +294,7 @@ fn get_custom_powers(f: Fp12<BN254>) -> (Fp12<BN254>, Fp12<BN254>, Fp12<BN254>)
     // proceed via standard squaring algorithm for exponentiation
 
     // must keep multiplying all three values: a4, a2, a0
-    for (a, b, c) in EXPS4 {
+    for (a, b, c) in BN_EXPS4 {
         if a {
             y4 = y4 * sq;
         }
@@ -163,7 +310,7 @@ fn get_custom_powers(f: Fp12<BN254>) -> (Fp12<BN254>, Fp12<BN254>, Fp12<BN254>)
     y4 = y4 * sq;
 
     // must keep multiplying remaining two values: a2, a0
-    for (a, b) in EXPS2 {
+    for (a, b) in BN_EXPS2 {
         if a {
             y2 = y2 * sq;
         }
@@ -176,7 +323,7 @@ fn get_custom_powers(f: Fp12<BN254>) -> (Fp12<BN254>, Fp12<BN254>, Fp12<BN254>)
     y2 = y2 * sq;
 
     // must keep multiplying final remaining value: a0
-    for a in EXPS0 {
+    for a in BN_EXPS0 {
         if a {
             y0 = y0 * sq;
         }
@@ -192,7 +339,7 @@ fn get_custom_powers(f: Fp12<BN254>) -> (Fp12<BN254>, Fp12<BN254>, Fp12<BN254>)
     (y2, y4 * y2 * y2 * y0_inv, y0_inv)
 }
 
-const EXP: [bool; 253] = [
+const BN_EXP: [bool; 253] = [
     true, false, false, false, false, false, true, true, false, false, true, false, false, false,
     true, false, false, true, true, true, false, false, true, true, true, false, false, true,
     false, true, true, true, false, false, false, false, true, false, false, true, true, false,
@@ -216,7 +363,7 @@ const EXP: [bool; 253] = [
 
 // The folowing constants are defined above get_custom_powers
 
-const EXPS4: [(bool, bool, bool); 64] = [
+const BN_EXPS4: [(bool, bool, bool); 64] = [
     (true, true, false),
     (true, true, true),
     (true, true, true),
@@ -283,7 +430,7 @@ const EXPS4: [(bool, bool, bool); 64] = [
     (true, true, true),
 ];
 
-const EXPS2: [(bool, bool); 62] = [
+const BN_EXPS2: [(bool, bool); 62] = [
     (true, false),
     (true, true),
     (false, false),
@@ -348,7 +495,7 @@ const EXPS2: [(bool, bool); 62] = [
     (true, true),
 ];
 
-const EXPS0: [bool; 65] = [
+const BN_EXPS0: [bool; 65] = [
     false, false, true, false, false, true, true, false, true, false, true, true, true, false,
     true, false, false, false, true, false, false, true, false, true, false, true, true, false,
     false, false, false, false, true, false, true, false, true, true, true, false, false, true,
diff --git a/evm/src/extension_tower.rs b/evm/src/extension_tower.rs
index ddcfe254..0e654c88 100644
--- a/evm/src/extension_tower.rs
+++ b/evm/src/extension_tower.rs
@@ -1,4 +1,4 @@
-use std::mem::transmute;
+use std::fmt::Debug;
 use std::ops::{Add, Div, Mul, Neg, Sub};
 
 use ethereum_types::{U256, U512};
@@ -7,6 +7,8 @@ use rand::Rng;
 
 pub trait FieldExt:
     Copy
+    + std::fmt::Debug
+    + std::cmp::PartialEq
     + std::ops::Add<Output = Self>
     + std::ops::Neg<Output = Self>
     + std::ops::Sub<Output = Self>
@@ -15,6 +17,7 @@ pub trait FieldExt:
 {
     const ZERO: Self;
     const UNIT: Self;
+    fn new(val: usize) -> Self;
     fn inv(self) -> Self;
 }
 
@@ -30,14 +33,6 @@ pub struct BN254 {
     pub val: U256,
 }
 
-impl BN254 {
-    pub fn new(val: usize) -> BN254 {
-        BN254 {
-            val: U256::from(val),
-        }
-    }
-}
-
 impl Distribution<BN254> for Standard {
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> BN254 {
         let xs = rng.gen::<[u64; 4]>();
@@ -91,6 +86,11 @@ impl Mul for BN254 {
 impl FieldExt for BN254 {
     const ZERO: Self = BN254 { val: U256::zero() };
     const UNIT: Self = BN254 { val: U256::one() };
+    fn new(val: usize) -> BN254 {
+        BN254 {
+            val: U256::from(val),
+        }
+    }
     fn inv(self) -> BN254 {
         let exp = BN_BASE - 2;
         let mut current = self;
@@ -131,12 +131,6 @@ pub struct BLS381 {
 }
 
 impl BLS381 {
-    pub fn new(val: usize) -> BLS381 {
-        BLS381 {
-            val: U512::from(val),
-        }
-    }
-
     pub fn lo(self) -> U256 {
         U256(self.val.0[..4].try_into().unwrap())
     }
@@ -234,6 +228,11 @@ impl Mul for BLS381 {
 impl FieldExt for BLS381 {
     const ZERO: Self = BLS381 { val: U512::zero() };
     const UNIT: Self = BLS381 { val: U512::one() };
+    fn new(val: usize) -> BLS381 {
+        BLS381 {
+            val: U512::from(val),
+        }
+    }
     fn inv(self) -> BLS381 {
         let exp = BLS_BASE - 2;
         let mut current = self;
@@ -365,6 +364,14 @@ impl<T: FieldExt> FieldExt for Fp2<T> {
         re: T::UNIT,
         im: T::ZERO,
     };
+
+    fn new(val: usize) -> Fp2<T> {
+        Fp2 {
+            re: T::new(val),
+            im: T::ZERO,
+        }
+    }
+
     /// The inverse of z is given by z'/||z||^2 since ||z||^2 = zz'
     fn inv(self) -> Fp2<T> {
         let norm_sq = self.norm_sq();
@@ -975,6 +982,14 @@ where
         t2: Fp2::<T>::ZERO,
     };
 
+    fn new(val: usize) -> Fp6<T> {
+        Fp6 {
+            t0: Fp2::<T>::new(val),
+            t1: Fp2::<T>::ZERO,
+            t2: Fp2::<T>::ZERO,
+        }
+    }
+
     /// Let x_n = x^(p^n) and note that
     ///     x_0 = x^(p^0) = x^1 = x
     ///     (x_n)_m = (x^(p^n))^(p^m) = x^(p^n * p^m) = x^(p^(n+m)) = x_{n+m}
@@ -1040,6 +1055,13 @@ where
         z1: Fp6::<T>::ZERO,
     };
 
+    fn new(val: usize) -> Fp12<T> {
+        Fp12 {
+            z0: Fp6::<T>::new(val),
+            z1: Fp6::<T>::ZERO,
+        }
+    }
+
     /// By Galois Theory, given x: Fp12, the product
     ///     phi = Prod_{i=0}^11 x_i
     /// lands in BN254, and hence the inverse of x is given by
@@ -1204,22 +1226,51 @@ pub trait Stack {
     fn on_stack(self) -> Vec<U256>;
 }
 
+impl Stack for BN254 {
+    fn on_stack(self) -> Vec<U256> {
+        vec![self.val]
+    }
+}
+
 impl Stack for BLS381 {
     fn on_stack(self) -> Vec<U256> {
         vec![self.lo(), self.hi()]
     }
 }
 
-impl Stack for Fp6<BN254> {
+impl<T> Stack for Fp2<T>
+where
+    T: FieldExt + Stack,
+{
     fn on_stack(self) -> Vec<U256> {
-        let f: [U256; 6] = unsafe { transmute(self) };
-        f.into_iter().collect()
+        let mut stack = self.re.on_stack();
+        stack.extend(self.im.on_stack());
+        stack
     }
 }
 
-impl Stack for Fp12<BN254> {
+impl<T> Stack for Fp6<T>
+where
+    T: FieldExt,
+    Fp2<T>: Adj + Stack,
+{
     fn on_stack(self) -> Vec<U256> {
-        let f: [U256; 12] = unsafe { transmute(self) };
-        f.into_iter().collect()
+        let mut stack = self.t0.on_stack();
+        stack.extend(self.t1.on_stack());
+        stack.extend(self.t2.on_stack());
+        stack
+    }
+}
+
+impl<T> Stack for Fp12<T>
+where
+    T: FieldExt,
+    Fp2<T>: Adj,
+    Fp6<T>: Stack,
+{
+    fn on_stack(self) -> Vec<U256> {
+        let mut stack = self.z0.on_stack();
+        stack.extend(self.z1.on_stack());
+        stack
     }
 }
diff --git a/evm/src/lib.rs b/evm/src/lib.rs
index dc07d233..02730321 100644
--- a/evm/src/lib.rs
+++ b/evm/src/lib.rs
@@ -9,11 +9,11 @@
 
 pub mod all_stark;
 pub mod arithmetic;
-pub mod bn254_pairing;
 pub mod config;
 pub mod constraint_consumer;
 pub mod cpu;
 pub mod cross_table_lookup;
+pub mod curve_pairings;
 pub mod extension_tower;
 pub mod fixed_recursive_verifier;
 pub mod generation;
diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs
index a5ebf2ac..94e13e50 100644
--- a/evm/src/witness/util.rs
+++ b/evm/src/witness/util.rs
@@ -45,9 +45,7 @@ pub(crate) fn kernel_peek<F: Field>(
     segment: Segment,
     virt: usize,
 ) -> U256 {
-    state
-        .memory
-        .get(MemoryAddress::new(state.registers.context, segment, virt))
+    state.memory.get(MemoryAddress::new(0, segment, virt))
 }
 
 pub(crate) fn mem_read_with_log<F: Field>(