diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 8fb72d03..d1050560 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -330,7 +330,7 @@ impl Fp6 {
     /// while the values of
     ///     t^(p^n) and t^(2p^n)
     /// are precomputed in the constant arrays FROB_T1 and FROB_T2
-    fn frob(self, n: usize) -> Fp6 {
+    pub fn frob(self, n: usize) -> Fp6 {
         let n = n % 6;
         let frob_t1 = FROB_T1[n];
         let frob_t2 = FROB_T2[n];
@@ -373,6 +373,11 @@ impl Fp6 {
         let prod_24 = prod_13.frob(1);
         prod_24.scale(prod_odds_over_phi)
     }
+
+    pub fn on_stack(self) -> Vec<U256> {
+        let f: [U256; 6] = unsafe { transmute(self) };
+        f.into_iter().collect()
+    }
 }
 
 #[allow(clippy::suspicious_arithmetic_impl)]
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 0663ba1c..c17117ab 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -2,7 +2,7 @@
 ///     out = miller_loop(P, Q)
 ///     return bn254_invariant_exponent(P, Q)
 global bn254_tate:
-    // stack:                      inp, out, retdest
+    // stack:                                inp, out, retdest
     %stack (inp, out) -> (inp, out, bn254_invariant_exponent, out)
     // stack: inp, out, bn254_invariant_exponent, out, retdest
     %jump(bn254_miller)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index ff6fb72c..cd81e6e7 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -46,12 +46,12 @@
 global mul_fp254_12:
     // stack:                                   inA, inB, out 
     DUP1  
-    %offset_fp254_6 
+    %add_const(6) 
     // stack:                             inA', inA, inB, out 
     %load_fp254_6
     // stack:                               f', inA, inB, out 
     DUP8  
-    %offset_fp254_6
+    %add_const(6)
     // stack:                         inB', f', inA, inB, out 
     %load_fp254_6
     // stack:                           g', f', inA, inB, out 
@@ -121,7 +121,7 @@ mul_fp254_12_3:
     %subr_fp254_6
     // stack:          (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
     DUP14  
-    %offset_fp254_6 
+    %add_const(6) 
     // stack:    out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
     %store_fp254_6
     // stack:                                    fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
@@ -183,7 +183,7 @@ mul_fp254_12_3:
 global mul_fp254_12_sparse:
     // stack:                                                                    inA, inB, out
     DUP1  
-    %offset_fp254_6
+    %add_const(6)
     // stack:                                                              inA', inA, inB, out
     %load_fp254_6
     // stack:                                                                f', inA, inB, out
@@ -265,7 +265,7 @@ global mul_fp254_12_sparse:
     %add_fp254_6_hole
     // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
     DUP9
-    %offset_fp254_6
+    %add_const(6)
     // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
     %store_fp254_6
     // stack:                                                                    inA, inB, out
@@ -317,9 +317,6 @@ global mul_fp254_12_sparse:
 ///
 /// f, f' consist of six elements on the stack
 
-global square_fp254_12_test:
-    POP
-
 global square_fp254_12:
     // stack:                                                                               inp, out
     DUP1
@@ -335,12 +332,12 @@ global square_fp254_12:
     %dup_fp254_6_2
     // stack:                                  f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     DUP16
-    %offset_fp254_6
+    %add_const(6)
     // stack:                            out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     PUSH square_fp254_12_1
     // stack:         square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     DUP10
-    %offset_fp254_6
+    %add_const(6)
     // stack:   inp', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     %load_fp254_6
     // stack:     f', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index 8c062b2a..ed282696 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -1,3 +1,5 @@
+// frob_fp12 tests
+
 global test_frob_fp254_12_1:
     // stack:         ptr
     %frob_fp254_12_1
@@ -42,13 +44,13 @@ global test_frob_fp254_12_6:
     // stack:   ptr, g, ptr
     %store_fp254_6
     // stack:           ptr
-    DUP1  %offset_fp254_6
+    DUP1  %add_const(6)
     // stack:     ptr', ptr
     %load_fp254_6
     // stack:       f', ptr
     %frobz_1
     // stack:       g', ptr
-    DUP7  %offset_fp254_6
+    DUP7  %add_const(6)
     // stack: ptr', g', ptr
     %store_fp254_6
     // stack:           ptr
@@ -67,13 +69,13 @@ global test_frob_fp254_12_6:
     // stack:   out, g, ptr , out
     %store_fp254_6 
     // stack:           ptr , out
-    %offset_fp254_6
+    %add_const(6)
     // stack:           ptr', out
     %load_fp254_6
     // stack:             f', out
     %frobz_2
     // stack:             g', out
-    DUP7  %offset_fp254_6
+    DUP7  %add_const(6)
     // stack:       out', g', out
     %store_fp254_6
     // stack:                 out
@@ -91,13 +93,13 @@ global test_frob_fp254_12_6:
     // stack:   ptr, g, ptr
     %store_fp254_6
     // stack:           ptr
-    DUP1  %offset_fp254_6
+    DUP1  %add_const(6)
     // stack:     ptr', ptr
     %load_fp254_6
     // stack:       f', ptr
     %frobz_3
     // stack:       g', ptr
-    DUP7  %offset_fp254_6
+    DUP7  %add_const(6)
     // stack: ptr', g', ptr
     %store_fp254_6
     // stack:           ptr
@@ -105,18 +107,38 @@ global test_frob_fp254_12_6:
 
 %macro frob_fp254_12_6
     // stack:           ptr
-    DUP1  %offset_fp254_6
+    DUP1  %add_const(6)
     // stack:     ptr', ptr
     %load_fp254_6
     // stack:       f', ptr
     %frobz_6
     // stack:       g', ptr
-    DUP7  %offset_fp254_6
+    DUP7  %add_const(6)
     // stack: ptr', g', ptr
     %store_fp254_6
     // stack:           ptr
 %endmacro
 
+// frob_fp12 tests
+
+global test_frob_fp254_6_1:
+    // stack:         ptr
+    %frob_fp254_6_1
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+global test_frob_fp254_6_2:
+    // stack:         ptr 
+    %frob_fp254_6_2
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+global test_frob_fp254_6_3:
+    // stack:         ptr
+    %frob_fp254_6_3
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
 
 /// let Z` denote the complex conjugate of Z
 
@@ -131,37 +153,37 @@ global test_frob_fp254_12_6:
     // stack: C0 , C1 , C2
     %conj_fp254_2
     // stack: D0 , C1 , C2
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: C2 , C1 , D0
     %conj_fp254_2
     // stack: C2`, C1 , D0
     %frobt2_1
     // stack: D2 , C1 , D0
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: D0 , C1 , D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: C1 , D0 , D2
     %conj_fp254_2
     // stack: C1`, D0 , D2
     %frobt1_1
     // stack: D1 , D0 , D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: D0 , D1 , D2
 %endmacro
 
 %macro frob_fp254_6_2
     // stack: C0, C1, C2
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: C2, C1, C0
     %frobt2_2
     // stack: D2, C1, C0
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: C0, C1, D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: C1, C0, D2
     %frobt1_2
     // stack: D1, C0, D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: D0, D1, D2
 %endmacro
 
@@ -169,21 +191,21 @@ global test_frob_fp254_12_6:
     // stack: C0 , C1 , C2
     %conj_fp254_2
     // stack: D0 , C1 , C2
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: C2 , C1 , D0
     %conj_fp254_2
     // stack: C2`, C1 , D0
     %frobt2_3
     // stack: D2 , C1 , D0
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: D0 , C1 , D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: C1 , D0 , D2
     %conj_fp254_2
     // stack: C1`, D0 , D2
     %frobt1_3
     // stack: D1 , D0 , D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: D0 , D1 , D2
 %endmacro
 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index e2bed9bf..c5262afd 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -1,10 +1,5 @@
-%macro offset_fp254_6
-    %add_const(6)
-%endmacro
-
 // fp254_2 macros
 
-// cost: 2 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp254_2
     // stack:       ptr
     DUP1  
@@ -28,30 +23,6 @@
     // stack: a, -b 
 %endmacro
 
-%macro swap_fp254_2
-    // stack: a , a_, b , b_
-    SWAP2
-    // stack: b , a_, a , b_
-    SWAP1
-    // stack: a_, b , a , b_
-    SWAP3
-    // stack: b_, b , a , a_
-    SWAP1 
-    // stack: b , b_, a , a_
-%endmacro
-
-%macro swap_fp254_2_hole_2
-    // stack: a , a_, X, b , b_
-    SWAP4
-    // stack: b , a_, X, a , b_
-    SWAP1
-    // stack: a_, b , X, a , b_
-    SWAP5
-    // stack: b_, b , X, a , a_
-    SWAP1 
-    // stack: b , b_, X, a , a_
-%endmacro
-
 %macro mul_fp254__fp254_2
     // stack:    c, x, y
     SWAP2
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 24a84906..3b6734c3 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -4,7 +4,7 @@ use anyhow::Result;
 use ethereum_types::U256;
 use rand::Rng;
 
-use crate::bn254_arithmetic::{Fp, Fp12, Fp2};
+use crate::bn254_arithmetic::{Fp, Fp12, Fp2, Fp6};
 use crate::bn254_pairing::{
     gen_fp12_sparse, invariant_exponent, miller_loop, tate, Curve, TwistedCurve,
 };
@@ -51,17 +51,64 @@ fn extract_kernel_memory(range: Range<usize>, interpreter: Interpreter<'static>)
     output
 }
 
-fn setup_mul_test(out: usize, f: Fp12, g: Fp12, label: &str) -> InterpreterSetup {
-    let in0: usize = 64;
-    let in1: usize = 76;
+fn extract_stack(interpreter: Interpreter<'static>) -> Vec<U256> {
+    let stack = interpreter.stack();
+    stack.iter().rev().cloned().collect::<Vec<U256>>()
+}
+
+fn setup_mul_fp6_test(f: Fp6, g: Fp6, label: &str) -> InterpreterSetup {
+    let mut stack = f.on_stack();
+    if label == "mul_fp254_6" {
+        stack.extend(g.on_stack());
+    }
+    stack.push(U256::from(0xdeadbeefu32));
     InterpreterSetup {
         label: label.to_string(),
-        stack: vec![
-            U256::from(in0),
-            U256::from(in1),
-            U256::from(out),
-            U256::from(0xdeadbeefu32),
-        ],
+        stack,
+        memory: vec![],
+    }
+}
+
+#[test]
+fn test_mul_fp6() -> Result<()> {
+    let mut rng = rand::thread_rng();
+    let f: Fp6 = rng.gen::<Fp6>();
+    let g: Fp6 = rng.gen::<Fp6>();
+
+    let setup_normal: InterpreterSetup = setup_mul_fp6_test(f, g, "mul_fp254_6");
+    let setup_square: InterpreterSetup = setup_mul_fp6_test(f, f, "square_fp254_6");
+
+    let intrptr_normal: Interpreter = setup_normal.run().unwrap();
+    let intrptr_square: Interpreter = setup_square.run().unwrap();
+
+    let out_normal: Vec<U256> = extract_stack(intrptr_normal);
+    let out_square: Vec<U256> = extract_stack(intrptr_square);
+
+    let exp_normal: Vec<U256> = (f * g).on_stack();
+    let exp_square: Vec<U256> = (f * f).on_stack();
+
+    assert_eq!(out_normal, exp_normal);
+    assert_eq!(out_square, exp_square);
+
+    Ok(())
+}
+
+fn setup_mul_fp12_test(out: usize, f: Fp12, g: Fp12, label: &str) -> InterpreterSetup {
+    let in0: usize = 64;
+    let in1: usize = 76;
+
+    let mut stack = vec![
+        U256::from(in0),
+        U256::from(in1),
+        U256::from(out),
+        U256::from(0xdeadbeefu32),
+    ];
+    if label == "square_fp254_12" {
+        stack.remove(0);
+    }
+    InterpreterSetup {
+        label: label.to_string(),
+        stack,
         memory: vec![(in0, f.on_stack()), (in1, g.on_stack())],
     }
 }
@@ -75,9 +122,9 @@ fn test_mul_fp12() -> Result<()> {
     let g: Fp12 = rng.gen::<Fp12>();
     let h: Fp12 = gen_fp12_sparse(&mut rng);
 
-    let setup_normal: InterpreterSetup = setup_mul_test(out, f, g, "mul_fp254_12");
-    let setup_sparse: InterpreterSetup = setup_mul_test(out, f, h, "mul_fp254_12_sparse");
-    let setup_square: InterpreterSetup = setup_mul_test(out, f, f, "square_fp254_12_test");
+    let setup_normal: InterpreterSetup = setup_mul_fp12_test(out, f, g, "mul_fp254_12");
+    let setup_sparse: InterpreterSetup = setup_mul_fp12_test(out, f, h, "mul_fp254_12_sparse");
+    let setup_square: InterpreterSetup = setup_mul_fp12_test(out, f, f, "square_fp254_12");
 
     let intrptr_normal: Interpreter = setup_normal.run().unwrap();
     let intrptr_sparse: Interpreter = setup_sparse.run().unwrap();
@@ -98,7 +145,43 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-fn setup_frob_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
+fn setup_frob_fp6_test(f: Fp6, label: &str) -> InterpreterSetup {
+    InterpreterSetup {
+        label: label.to_string(),
+        stack: f.on_stack(),
+        memory: vec![],
+    }
+}
+
+#[test]
+fn test_frob_fp6() -> Result<()> {
+    let mut rng = rand::thread_rng();
+    let f: Fp6 = rng.gen::<Fp6>();
+
+    let setup_frob_1 = setup_frob_fp6_test(f, "test_frob_fp254_6_1");
+    let setup_frob_2 = setup_frob_fp6_test(f, "test_frob_fp254_6_2");
+    let setup_frob_3 = setup_frob_fp6_test(f, "test_frob_fp254_6_3");
+
+    let intrptr_frob_1: Interpreter = setup_frob_1.run().unwrap();
+    let intrptr_frob_2: Interpreter = setup_frob_2.run().unwrap();
+    let intrptr_frob_3: Interpreter = setup_frob_3.run().unwrap();
+
+    let out_frob_1: Vec<U256> = extract_stack(intrptr_frob_1);
+    let out_frob_2: Vec<U256> = extract_stack(intrptr_frob_2);
+    let out_frob_3: Vec<U256> = extract_stack(intrptr_frob_3);
+
+    let exp_frob_1: Vec<U256> = f.frob(1).on_stack();
+    let exp_frob_2: Vec<U256> = f.frob(2).on_stack();
+    let exp_frob_3: Vec<U256> = f.frob(3).on_stack();
+
+    assert_eq!(out_frob_1, exp_frob_1);
+    assert_eq!(out_frob_2, exp_frob_2);
+    assert_eq!(out_frob_3, exp_frob_3);
+
+    Ok(())
+}
+
+fn setup_frob_fp12_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
     InterpreterSetup {
         label: label.to_string(),
         stack: vec![U256::from(ptr)],
@@ -113,10 +196,10 @@ fn test_frob_fp12() -> Result<()> {
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
 
-    let setup_frob_1 = setup_frob_test(ptr, f, "test_frob_fp254_12_1");
-    let setup_frob_2 = setup_frob_test(ptr, f, "test_frob_fp254_12_2");
-    let setup_frob_3 = setup_frob_test(ptr, f, "test_frob_fp254_12_3");
-    let setup_frob_6 = setup_frob_test(ptr, f, "test_frob_fp254_12_6");
+    let setup_frob_1 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_1");
+    let setup_frob_2 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_2");
+    let setup_frob_3 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_3");
+    let setup_frob_6 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_6");
 
     let intrptr_frob_1: Interpreter = setup_frob_1.run().unwrap();
     let intrptr_frob_2: Interpreter = setup_frob_2.run().unwrap();