From 5d2a9b3d5341c7d21bee9f65a16b70240c865204 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 9 Nov 2022 16:17:07 -0800
Subject: [PATCH 001/201] sparse mul finished

---
 .../{fp6_macros.asm => field_macros.asm}      | 145 +++++-
 evm/src/cpu/kernel/asm/fields/fp12_mul.asm    | 411 +++++++++++++++---
 evm/src/cpu/kernel/asm/fields/fp6_mul.asm     |   4 +
 evm/src/cpu/kernel/asm/fields/frobenius.asm   |   0
 4 files changed, 483 insertions(+), 77 deletions(-)
 rename evm/src/cpu/kernel/asm/fields/{fp6_macros.asm => field_macros.asm} (68%)
 create mode 100644 evm/src/cpu/kernel/asm/fields/frobenius.asm

diff --git a/evm/src/cpu/kernel/asm/fields/fp6_macros.asm b/evm/src/cpu/kernel/asm/fields/field_macros.asm
similarity index 68%
rename from evm/src/cpu/kernel/asm/fields/fp6_macros.asm
rename to evm/src/cpu/kernel/asm/fields/field_macros.asm
index b575c234..02651a1b 100644
--- a/evm/src/cpu/kernel/asm/fields/fp6_macros.asm
+++ b/evm/src/cpu/kernel/asm/fields/field_macros.asm
@@ -1,3 +1,7 @@
+%macro offset_fp6
+    %add_const(6)
+%endmacro
+
 // cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp6
     // stack: ptr
@@ -87,6 +91,51 @@
     // stack:
 %endmacro
 
+// cost: 6 stores + 7 swaps/dups + 5 adds + 6 doubles = 6*4 + 7*1 + 5*2 + 6*2 = 53
+%macro store_fp6_double
+    // stack:        ptr, x0, x1, x2, x3, x4, x5
+    SWAP6
+    // stack:         x5, x0, x1, x2, x3, x4, ptr
+    PUSH 2  MULFP254
+    // stack:       2*x5, x0, x1, x2, x3, x4, ptr
+    DUP7  %add_const(5)
+    // stack: ind5, 2*x5, x0, x1, x2, x3, x4, ptr
+    %mstore_kernel_general
+    // stack:             x0, x1, x2, x3, x4, ptr
+    PUSH 2  MULFP254
+    // stack:           2*x0, x1, x2, x3, x4, ptr
+    DUP6
+    // stack:     ind0, 2*x0, x1, x2, x3, x4, ptr
+    %mstore_kernel_general
+    // stack:                 x1, x2, x3, x4, ptr
+    PUSH 2  MULFP254
+    // stack:               2*x1, x2, x3, x4, ptr
+    DUP5  %add_const(1)
+    // stack:         ind1, 2*x1, x2, x3, x4, ptr
+    %mstore_kernel_general
+    // stack:                     x2, x3, x4, ptr
+    PUSH 2  MULFP254
+    // stack:                   2*x2, x3, x4, ptr
+    DUP4  %add_const(2)
+    // stack:             ind2, 2*x2, x3, x4, ptr
+    %mstore_kernel_general
+    // stack:                         x3, x4, ptr
+    PUSH 2  MULFP254
+    // stack:                       2*x3, x4, ptr
+    DUP3  %add_const(3)
+    // stack:                 ind3, 2*x3, x4, ptr
+    %mstore_kernel_general
+    // stack:                             x4, ptr
+    PUSH 2  MULFP254
+    // stack:                           2*x4, ptr
+    SWAP1
+    // stack:                           ptr, 2*x4
+    %add_const(4)
+    // stack:                          ind4, 2*x4
+    %mstore_kernel_general
+    // stack:
+%endmacro
+
 // cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro store_fp6(ptr)
     // stack:       x0, x1, x2, x3, x4, x5
@@ -175,31 +224,55 @@
     DUP6
     DUP6
     DUP6
-    // stack: f: 6, g: 6
+    // stack: f: 6, f: 6
+%endmacro 
+
+// cost: 6
+%macro dup_fp6_2
+    // stack:       X: 2, f: 6
+    DUP8
+    DUP8
+    DUP8
+    DUP8
+    DUP8
+    DUP8
+    // stack: f: 6, X: 2, f: 6
 %endmacro 
 
 // cost: 6
 %macro dup_fp6_6
-    // stack:       f: 6, g: 6
+    // stack:       X: 6, f: 6
     DUP12
     DUP12
     DUP12
     DUP12
     DUP12
     DUP12
-    // stack: g: 6, f: 6, g: 6
+    // stack: f: 6, X: 6, f: 6
 %endmacro
 
 // cost: 6
 %macro dup_fp6_7
-    // stack:       f: 6, g: 6
+    // stack:       X: 7, f: 6
     DUP13
     DUP13
     DUP13
     DUP13
     DUP13
     DUP13
-    // stack: g: 6, f: 6, g: 6
+    // stack: f: 6, X: 7, f: 6
+%endmacro
+
+// cost: 6
+%macro dup_fp6_8
+    // stack:       X: 8, f: 6
+    DUP14
+    DUP14
+    DUP14
+    DUP14
+    DUP14
+    DUP14
+    // stack: f: 6, X: 8, f: 6
 %endmacro
 
 // cost: 16
@@ -231,7 +304,7 @@
 
 // cost: 16
 // swap two fp6 elements with a stack term separating them
-//    (f: 6, x, g: 6) -> (g: 6, x, f: 6)
+//    (f: 6, X, g: 6) -> (g: 6, X, f: 6)
 %macro swap_fp6_hole
     // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
     SWAP7
@@ -258,6 +331,35 @@
     // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
 %endmacro
 
+// cost: 16
+// swap two fp6 elements with two stack terms separating them
+//    (f: 6, X: 2, g: 6) -> (g: 6, X: 2, f: 6)
+%macro swap_fp6_hole_2
+    // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
+    SWAP8
+    // stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5
+    SWAP1
+    SWAP9
+    SWAP1
+    // stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5
+    SWAP2
+    SWAP10
+    SWAP2
+    // stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5
+    SWAP3
+    SWAP11
+    SWAP3    
+    // stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5
+    SWAP4
+    SWAP12
+    SWAP4
+    // stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5
+    SWAP5
+    SWAP13
+    SWAP5
+    // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
+%endmacro
+
 // cost: 16
 %macro add_fp6
     // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
@@ -285,6 +387,37 @@
     // stack:                         h0, h1, h2, h3, h4, h5
 %endmacro
 
+// cost: 18
+// add two fp6 elements with a to-be-popped stack term separating them
+//    (f: 6, X, g: 6) -> (f + g: 6)
+%macro add_fp6_hole
+    // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
+    SWAP8
+    ADDFP254
+    SWAP7
+    // stack: f0,     f2, f3, f4, f5, X, g0, h1, g2, g3, g4, g5 
+    SWAP8
+    ADDFP254
+    SWAP7
+    // stack: f0,         f3, f4, f5, X, g0, h1, h2, g3, g4, g5 
+    SWAP8
+    ADDFP254
+    SWAP7
+    // stack: f0,             f4, f5, X, g0, h1, h2, h3, g4, g5
+    SWAP8
+    ADDFP254
+    SWAP7
+    // stack: f0,                 f5, X, g0, h1, h2, h3, h4, g5
+    SWAP8
+    ADDFP254
+    SWAP7
+    // stack: f0,                     X, g0, h1, h2, h3, h4, h5
+    SWAP1
+    POP
+    ADDFP254
+    // stack:                            h0, h1, h2, h3, h4, h5
+%endmacro
+
 // *reversed argument subtraction* cost: 17
 %macro subr_fp6
     // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
index 2f4b9024..253103da 100644
--- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
@@ -1,31 +1,31 @@
 /// Note: uncomment this to test
 
-/// global test_mul_Fp12:
-///     // stack:      f, in0 , f', g, in1 , g', in1, out, in0,       out
+/// global test_mul_fp12:
+///     // stack:      f, inA , f', g, inB , g', inB, out, inA,       out
 ///     DUP7
-///     // stack: in0, f, in0 , f', g, in1 , g', in1, out, in0,       out
+///     // stack: inA, f, inA , f', g, inB , g', inB, out, inA,       out
 ///     %store_fp6
-///     // stack:         in0 , f', g, in1 , g', in1, out, in0,       out
-///     %add_const(6)
-///     // stack:         in0', f', g, in1 , g', in1, out, in0,       out
+///     // stack:         inA , f', g, inB , g', inB, out, inA,       out
+///     %offset_fp6
+///     // stack:         inA', f', g, inB , g', inB, out, inA,       out
 ///     %store_fp6
-///     // stack:                   g, in1 , g', in1, out, in0,       out
+///     // stack:                   g, inB , g', inB, out, inA,       out
 ///     DUP7
-///     // stack:              in1, g, in1 , g', in1, out, in0,       out
+///     // stack:              inB, g, inB , g', inB, out, inA,       out
 ///     %store_fp6
-///     // stack:                      in1 , g', in1, out, in0,       out
-///     %add_const(6)
-///     // stack:                      in1', g', in1, out, in0,       out
+///     // stack:                      inB , g', inB, out, inA,       out
+///     %offset_fp6
+///     // stack:                      inB', g', inB, out, inA,       out
 ///     %store_fp6
-///     // stack:                                in1, out, in0,       out
+///     // stack:                                inB, out, inA,       out
 ///     PUSH ret_stack
-///     // stack:                     ret_stack, in1, out, in0,       out
+///     // stack:                     ret_stack, inB, out, inA,       out
 ///     SWAP3
-///     // stack:                           in0, in1, out, ret_stack, out
-///     %jump(mul_Fp12)
+///     // stack:                           inA, inB, out, ret_stack, out
+///     %jump(mul_fp12)
 /// ret_stack:
 ///     // stack:          out
-///     DUP1  %add_const(6)
+///     DUP1  %offset_fp6
 ///     // stack:    out', out
 ///     %load_fp6
 ///     // stack:      h', out
@@ -56,9 +56,9 @@
 ///  DUP   |   6
 ///  PUSH  |   6
 ///  POP   |   2
-///  JUMP  |   1
+///  JUMP  |   6
 ///
-/// TOTAL: 1196
+/// TOTAL: 1201
 
 /// inputs:
 ///     F = f + f'z
@@ -71,96 +71,365 @@
 ///     h' = (f+f')(g+g') - fg - f'g'
 ///
 /// memory pointers [ind' = ind+6]
-///     {in0: f, in0: f', in1: g, in1':g', out: h, out': h'}
+///     {inA: f, inA: f', inB: g, inB':g', out: h, out': h'}
 ///
 /// f, f', g, g' consist of six elements on the stack
 
-global mul_Fp12:
-    // stack:                                in0, in1, out 
-    DUP1  %add_const(6) 
-    // stack:                          in0', in0, in1, out 
+global mul_fp12:
+    // stack:                                inA, inB, out 
+    DUP1  %offset_fp6 
+    // stack:                          inA', inA, inB, out 
     %load_fp6
-    // stack:                            f', in0, in1, out 
-    DUP8  %add_const(6)
-    // stack:                      in1', f', in0, in1, out 
+    // stack:                            f', inA, inB, out 
+    DUP8  %offset_fp6
+    // stack:                      inB', f', inA, inB, out 
     %load_fp6
-    // stack:                        g', f', in0, in1, out 
+    // stack:                        g', f', inA, inB, out 
     PUSH ret_1
-    // stack:                 ret_1, g', f', in0, in1, out 
+    // stack:                 ret_1, g', f', inA, inB, out 
     %dup_fp6_7
-    // stack:             f', ret_1, g', f', in0, in1, out 
+    // stack:             f', ret_1, g', f', inA, inB, out 
     %dup_fp6_7
-    // stack:         g', f', ret_1, g', f', in0, in1, out 
+    // stack:         g', f', ret_1, g', f', inA, inB, out 
     %jump(mul_fp6)
 ret_1:
-    // stack:                f'g', g'  , f', in0, in1, out 
+    // stack:                f'g', g'  , f', inA, inB, out 
     %dup_fp6_0
-    // stack:          f'g', f'g', g'  , f', in0, in1, out 
-    %store_fp6_sh(100)                                    
-    // stack:                f'g', g'  , f', in0, in1, out  {100: sh(f'g')}
-    %store_fp6(106)
-    // stack:                      g'  , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:          f'g', f'g', g'  , f', inA, inB, out 
+    %store_fp6_sh(0)                                    
+    // stack:                f'g', g'  , f', inA, inB, out  {0: sh(f'g')}
+    %store_fp6(6)
+    // stack:                      g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     DUP13
-    // stack:                 in0, g'  , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:                 inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     DUP15  
-    // stack:            in1, in0, g'  , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:            inB, inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %load_fp6
-    // stack:             g , in0, g'  , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:             g , inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %swap_fp6_hole
-    // stack:             g', in0, g   , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:             g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %dup_fp6_7
-    // stack:           g,g', in0, g   , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:           g,g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %add_fp6
-    // stack:           g+g', in0, g   , f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:           g+g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %swap_fp6_hole
-    // stack:              g, in0, g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:              g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     PUSH ret_2
-    // stack:       ret_2, g, in0, g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:       ret_2, g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     SWAP7
-    // stack:       in0, g, ret_2, g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:       inA, g, ret_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %load_fp6
-    // stack:         f, g, ret_2, g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
+    // stack:         f, g, ret_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %jump(mul_fp6)
 ret_2:    
-    // stack:                  fg, g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g'}
-    %store_fp6(112)
-    // stack:                      g+g', f', in0, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:                  fg, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    %store_fp6(12)
+    // stack:                      g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %swap_fp6
-    // stack:                      f', g+g', in0, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:                      f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     PUSH ret_3
-    // stack:               ret_3, f', g+g', in0, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:               ret_3, f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     SWAP13
-    // stack:               in0, f', g+g', ret_3, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:               inA, f', g+g', ret_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %load_fp6
-    // stack:                  f,f', g+g', ret_3, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:                  f,f', g+g', ret_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %add_fp6
-    // stack:                  f+f', g+g', ret_3, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:                  f+f', g+g', ret_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %jump(mul_fp6)
 ret_3:
-    // stack:                       (f+f')(g+g'), in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %load_fp6(112)
-    // stack:                   fg, (f+f')(g+g'), in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:                       (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    %load_fp6(12)
+    // stack:                   fg, (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %swap_fp6
-    // stack:                   (f+f')(g+g'), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:                   (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %dup_fp6_6
-    // stack:               fg, (f+f')(g+g'), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %load_fp6(106)
-    // stack:          f'g',fg, (f+f')(g+g'), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:               fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    %load_fp6(6)
+    // stack:          f'g',fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %add_fp6
-    // stack:          f'g'+fg, (f+f')(g+g'), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:          f'g'+fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %subr_fp6
-    // stack:       (f+f')(g+g') - (f'g'+fg), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}   
-    DUP14  %add_const(6) 
-    // stack: out', (f+f')(g+g') - (f'g'+fg), fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}   
+    // stack:       (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
+    DUP14  %offset_fp6 
+    // stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
     %store_fp6
-    // stack:                                 fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
-    %load_fp6(100)
-    // stack:                      sh(f'g') , fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:                                 fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    %load_fp6(0)
+    // stack:                      sh(f'g') , fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %add_fp6
-    // stack:                      sh(f'g') + fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:                      sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     DUP8
-    // stack:                 out, sh(f'g') + fg, in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:                 out, sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %store_fp6
-    // stack:                                     in1, out  {100: sh(f'g'), 106: f'g', 112: fg}
+    // stack:                                     inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %pop2  
     JUMP
+
+
+/// fp6 functions:
+///  fn    | num | ops | cost
+///  -------------------------
+///  load  |   2 |  40 |   80
+///  store |   2 |  40 |   80
+///  dup   |   2 |   6 |   12
+///  swap  |   2 |  16 |   32
+///  add   |   1 |  16 |   16
+///  mul   |   1 | 157 |  157
+///  sq    |   2 |     |
+///  dbl   |   1 |  13 |   13
+///
+/// lone stack operations:
+///  op    | num 
+///  ------------
+///  ADD   |   3
+///  SWAP  |   4
+///  DUP   |   5
+///  PUSH  |   6
+///  POP   |   3
+///  JUMP  |   4
+///
+/// TOTAL: 
+
+/// input:
+///     F = f + f'z
+///
+/// output:
+///     H = h + h'z = FF
+///
+///     h  = ff + sh(f'f')
+///     h' = 2ff'
+///
+/// memory pointers [ind' = ind+6]
+///     {inp: f, inp: f', out: h, out': h'}
+///
+/// f, f' consist of six elements on the stack
+
+global square_fp12:
+    // stack:                                                    inp, out
+    DUP1  %offset_fp6
+    // stack:                                               inp, inp, out
+    %load_fp6 
+    // stack:                                                 f, inp, out
+    PUSH post_sq2
+    // stack:                                       post_sq2, f, inp, out
+    SWAP7
+    // stack:                                       inp, f, post_sq2, out
+    PUSH post_sq1
+    // stack:                             post_sq1, inp, f, post_sq2, out 
+    %dup_fp6_2
+    // stack:                         f , post_sq1, inp, f, post_sq2, out
+    DUP16  %offset_fp6
+    // stack:                   out', f , post_sq1, inp, f, post_sq2, out
+    PUSH post_mul
+    // stack:         post_mul, out', f , post_sq1, inp, f, post_sq2, out
+    DUP10  %offset_fp6
+    // stack:   inp', post_mul, out', f , post_sq1, inp, f, post_sq2, out
+    %load_fp6
+    // stack:     f', post_mul, out', f , post_sq1, inp, f, post_sq2, out
+    %swap_fp6_hole_2
+    // stack:     f , post_mul, out', f', post_sq1, inp, f, post_sq2, out
+    %dup_fp6_8
+    // stack: f', f , post_mul, out', f', post_sq1, inp, f, post_sq2, out
+    %jump(mul_fp6)
+post_mul:
+    // stack:              f'f, out', f', post_sq1, inp, f, post_sq2, out
+    DUP7
+    // stack:        out', f'f, out', f', post_sq1, inp, f, post_sq2, out
+    %store_fp6_double
+    // stack:                   out', f', post_sq1, inp, f, post_sq2, out
+    POP
+    // stack:                         f', post_sq1, inp, f, post_sq2, out
+    %jump(square_fp6)
+post_sq1:
+    // stack:                                 f'f', inp, f, post_sq2, out
+    %swap_fp6_hole
+    // stack:                                 f, inp, f'f', post_sq2, out
+    SWAP6  SWAP13  SWAP6
+    // stack:                                 f, post_sq2, f'f', inp, out
+    %jump(square_fp6)
+post_sq2:
+    // stack:                                         ff , f'f', inp, out
+    %add_fp6
+    // stack:                                         ff + f'f', inp, out
+    DUP8
+    // stack:                                    out, ff + f'f', inp, out
+    %store_fp6
+    // stack:                                                    inp, out
+    %pop2
+    JUMP
+
+/// fp6 functions:
+///  fn    | num | ops | cost
+///  -------------------------
+///  load  |   2 |  40 |   80
+///  store |   2 |  40 |   80
+///  dup   |   2 |   6 |   12
+///  swap  |   2 |  16 |   32
+///  add   |   1 |  16 |   16
+///  mul   |   1 | 157 |  157
+///  sq    |   2 |     |
+///  dbl   |   1 |  13 |   13
+///
+/// lone stack operations:
+///  op    | num 
+///  ------------
+///  ADD   |   3
+///  SWAP  |   4
+///  DUP   |   5
+///  PUSH  |   6
+///  POP   |   3
+///  JUMP  |   4
+///
+/// TOTAL: 
+
+/// input:
+///     F = f + f'z
+///     G = g0 + (G1)t + (G2)tz
+///
+/// output:
+///     H = h + h'z = FG
+///       = g0 * [f + f'z] + G1 * [sh(f) + sh(f')z] + G2 * [sh2(f') + sh(f)z]
+///     
+///     h  = g0 * f  + G1 * sh(f ) + G2 * sh2(f') 
+///     h' = g0 * f' + G1 * sh(f') + G2 * sh (f )
+///
+/// memory pointers [ind' = ind+6, inB2 = inB1 + 2 = inB + 3]
+///     { inA: f, inA': f', inB: g0, inB1: G1, inB2: G2, out: h, out': h'}
+///
+/// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element 
+
+
+global mul_fp12_sparse:
+    // stack:                                                                    inA, inB, out
+    DUP1  %offset_fp6
+    // stack:                                                              inA', inA, inB, out
+    %load_fp6
+    // stack:                                                                f', inA, inB, out
+    DUP8 
+    // stack:                                                           inB, f', inA, inB, out
+    DUP8
+    // stack:                                                      inA, inB, f', inA, inB, out
+    %load_fp6
+    // stack:                                                        f, inB, f', inA, inB, out
+    DUP16
+    // stack:                                                   out, f, inB, f', inA, inB, out
+    %dup_fp6_8 
+    // stack:                                               f', out, f, inB, f', inA, inB, out
+    DUP14
+    // stack:                                          inB, f', out, f, inB, f', inA, inB, out
+    %dup_fp6_8
+    // stack:                                       f, inB, f', out, f, inB, f', inA, inB, out
+    DUP7
+    // stack:                                  inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %dup_fp6_8
+    // stack:                              f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %dup_fp6_7
+    // stack:                           f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    DUP13 
+    // stack:                      inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %mload_kernel_general
+    // stack:                      g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %mul_fp_fp12
+    // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %swap_fp6
+    // stack:                    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    DUP13  %add_const(3)
+    // stack:           inB2,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %load_fp2
+    // stack:           G2  ,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %mul_fp2_fp12_sh2
+    // stack:           G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %add_fp6
+    // stack:           G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %swap_fp6_hole
+    // stack:          f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    DUP7  %add_const(1)
+    // stack: inB1,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    %load_fp2
+    // stack:  G1 ,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    %mul_fp2_fp12_sh
+    // stack:  G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    %add_fp6_hole
+    // stack:      G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    DUP14
+    // stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, out, f', f, inB, f', inA, inB, out
+    %store_fp6
+    // stack:                                          inB, out, f', f, inB, f', inA, inB, out
+    %pop2
+    // stack:                                                    f', f, inB, f', inA, inB, out
+    DUP13
+    // stack:                                               inB, f', f, inB, f', inA, inB, out
+    %mload_kernel_general
+    // stack:                                               g0 , f', f, inB, f', inA, inB, out
+    %mul_fp_fp12
+    // stack:                                               g0 * f', f, inB, f', inA, inB, out
+    %swap_fp6
+    // stack:                                             f  , g0 * f', inB, f', inA, inB, out
+    DUP13  %add_const(3)
+    // stack:                                    inB2,    f  , g0 * f', inB, f', inA, inB, out
+    %load_fp2
+    // stack:                                     G2 ,    f  , g0 * f', inB, f', inA, inB, out
+    %mul_fp2_fp12_sh
+    // stack:                                     G2 * sh(f) , g0 * f', inB, f', inA, inB, out
+    %add_fp6
+    // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
+    %swap_fp6_hole
+    // stack:                                    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
+    DUP7  %add_const(1)
+    // stack:                           inB1,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
+    %load_fp2
+    // stack:                            G1 ,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
+    %mul_fp2_fp12_sh
+    // stack:                            G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
+    %add_fp6_hole
+    // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
+    DUP9  %offset_fp6
+    // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
+    %store_fp6
+    // stack:                                                                    inA, inB, out
+    %pop3
+
+
+/// global mul_fp12_sparse_fast:
+///    // stack:                                                            inA, inB, out
+///    DUP2
+///    // stack:                                                       inB, inA, inB, out
+///    %load_fp12_sparse
+///    // stack:                                               g0, G1, G1', inA, inB, out
+///    DUP6  %offset_fp6
+///    // stack:                                         inA', g0, G1, G1', inA, inB, out
+///    %load_fp6
+///    // stack:                                           f', g0, G1, G1', inA, inB, out
+///    DUP12
+///    // stack:                                      inA, f', g0, G1, G1', inA, inB, out
+///    %load_fp6
+///    // stack:                                        f, f', g0, G1, G1', inA, inB, out
+///    %clone_mul_fp_fp6
+///    // stack:                                 (g0)f, f, f', g0, G1, G1', inA, inB, out
+///    %clone_mul_fp2_fp6_sh
+///    // stack:                     (G1)sh(f) , (g0)f, f, f', g0, G1, G1', inA, inB, out
+///    %add_fp6
+///    // stack:                     (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
+///    %clone_mul_fp2_fp6_sh2
+///    // stack:      (G1')sh2(f') , (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
+///    %add_fp6
+///    // stack:      (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
+///    DUP26
+///    // stack: out, (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
+///    %store_fp6
+///    // stack:                                        f, f', g0, G1, G1', inA, inB, out 
+///    %semiclone_mul_fp2_fp6_sh
+///    // stack:                               (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    %clone_mul_fp2_fp6_sh
+///    // stack:                  (G1)sh(f') , (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    %add_fp6
+///    // stack:                  (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    %clone_mul_fp_fp6
+///    // stack:         (g0)f' , (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    %add_fp6
+///    // stack:         (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    DUP20  offset_fp6
+///    // stack:   out', (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    %store_fp6
+///    // stack:                                           f', g0, G1, G1', inA, inB, out 
+///    %pop14
diff --git a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
index 0fc6dbdf..9be87aac 100644
--- a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
@@ -256,3 +256,7 @@ global mul_fp6:
 
     // stack: retdest, e0, e0_, e1, e1_, e2, e2_
     JUMP
+
+
+global square_fp6:
+    
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/fields/frobenius.asm b/evm/src/cpu/kernel/asm/fields/frobenius.asm
new file mode 100644
index 00000000..e69de29b

From 784a4c0869c2400c8a15f1eecaef9d9f15704950 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 9 Nov 2022 19:39:49 -0800
Subject: [PATCH 002/201] scaling macros

---
 .../cpu/kernel/asm/fields/field_macros.asm    | 273 ++++++++++++++++--
 evm/src/cpu/kernel/asm/fields/fp12_mul.asm    |  12 +-
 2 files changed, 259 insertions(+), 26 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/field_macros.asm b/evm/src/cpu/kernel/asm/fields/field_macros.asm
index 02651a1b..2d12aead 100644
--- a/evm/src/cpu/kernel/asm/fields/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/fields/field_macros.asm
@@ -2,9 +2,22 @@
     %add_const(6)
 %endmacro
 
+// cost: 2 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
+%macro load_fp2
+    // stack:       ptr
+    DUP1  %add_const(1)
+    // stack: ind1, ptr
+    %mload_kernel_general
+    // stack:   x1, ptr
+    SWAP1
+    // stack: ind0, x1
+    %mload_kernel_general
+    // stack:   x0, x1
+%endmacro
+
 // cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp6
-    // stack: ptr
+    // stack:                         ptr
     DUP1  %add_const(4)
     // stack:                   ind4, ptr
     %mload_kernel_general
@@ -196,25 +209,6 @@
     // stack:
 %endmacro
 
-// cost: 9; note this returns y, x for the output x + yi
-%macro i9
-    // stack:          a , b
-    DUP2
-    // stack:      b,  a,  b
-    DUP2
-    // stack:  a , b,  a , b
-    PUSH 9  MULFP254
-    // stack: 9a , b,  a , b
-    SUBFP254
-    // stack: 9a - b,  a , b
-    SWAP2 
-    // stack:  b , a, 9a - b
-    PUSH 9  MULFP254
-    // stack  9b , a, 9a - b
-    ADDFP254
-    // stack: 9b + a, 9a - b 
-%endmacro
-
 // cost: 6
 %macro dup_fp6_0
     // stack:       f: 6
@@ -445,3 +439,242 @@
     SUBFP254
     // stack:                         h0, h1, h2, h3, h4, h5
 %endmacro
+
+// cost: 21
+%macro mul_fp_fp6
+    // stack: c, f0,      f1,     f2,     f3,     f4,     f5
+    SWAP6
+    DUP7
+    MULFP254
+    SWAP6
+    // stack: c, f0,      f1,     f2,     f3,     f4, c * f5
+    SWAP5
+    DUP6
+    MULFP254
+    SWAP5
+    // stack: c , f0,     f1,     f2,     f3, c * f4, c * f5
+    SWAP4
+    DUP5
+    MULFP254
+    SWAP4
+    // stack: c , f0,     f1,     f2, c * f3, c * f4, c * f5
+    SWAP3 
+    DUP4 
+    MULFP254
+    SWAP3 
+    // stack: c , f0,     f1, c * f2, c * f3, c *f 4, c * f5
+    SWAP2  
+    DUP3  
+    MULFP254
+    SWAP2  
+    // stack: c , f0, c * f1, c * f2, c * f3, c * f4, c * f5
+    MULFP254
+    // stack: c * f0, c * f1, c * f2, c * f3, c * f4, c * f5
+%endmacro
+
+/// cost: 1 i9 (9) + 16 dups + 15 swaps + 12 muls + 6 adds/subs = 58
+///
+/// G0 + G1t + G2t^2 = (a+bi)t * (F0 + F1t + F2t^2) 
+///                  = (c+di)F2 + (a+bi)F0t + (a+bi)F1t^2
+/// where c+di = (a+bi)(9+i) = (9a-b) + (a+9b)i 
+///
+/// G0 = (c+di)(f0+f0_i) = (cf2 - df2_) + (df2 + cf2_)i
+/// G1 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i
+/// G2 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i
+
+%macro mul_fp2_fp6_sh
+    // stack:             a, b, f0, f0_, f1, f1_, f2, f2_
+    DUP6
+    DUP3
+    MULFP254
+    // stack:       bf1_, a, b, f0, f0_, f1, f1_, f2, f2_
+    DUP6 
+    DUP3
+    MULFP254
+    // stack: af1 , bf1_, a, b, f0, f0_, f1, f1_, f2, f2_
+    SUBFP254
+    // stack:         g2, a, b, f0, f0_, f1, f1_, f2, f2_
+    SWAP7
+    // stack:         f2, a, b, f0, f0_, f1, f1_, g2, f2_
+    SWAP5
+    // stack:         f1, a, b, f0, f0_, f2, f1_, g2, f2_
+    DUP3
+    MULFP254
+    // stack:        bf1, a, b, f0, f0_, f2, f1_, g2, f2_
+    SWAP1
+    SWAP6
+    // stack:        f1_, bf1, b, f0, f0_, f2, a, g2, f2_
+    DUP7
+    MULFP254
+    // stack:       af1_, bf1, b, f0, f0_, f2, a, g2, f2_
+    ADDFP254
+    // stack:             g2_, b, f0, f0_, f2, a, g2, f2_
+    SWAP7
+    // stack:             f2_, b, f0, f0_, f2, a, g2, g2_
+    DUP4
+    DUP3
+    MULFP254
+    // stack:       bf0_, f2_, b, f0, f0_, f2, a, g2, g2_
+    DUP4
+    DUP8
+    MULFP254
+    // stack:  af0, bf0_, f2_, b, f0, f0_, f2, a, g2, g2_
+    SUBFP254 
+    // stack:         g1, f2_, b, f0, f0_, f2, a, g2, g2_
+    SWAP5
+    // stack:         f2, f2_, b, f0, f0_, g1, a, g2, g2_
+    SWAP3
+    // stack:         f0, f2_, b, f2, f0_, g1, a, g2, g2_
+    DUP3
+    MULFP254
+    // stack:        bf0, f2_, b, f2, f0_, g1, a, g2, g2_
+    SWAP1
+    SWAP4
+    // stack:        f0_, bf0, b, f2, f2_, g1, a, g2, g2_
+    DUP7
+    MULFP254
+    // stack:       af0_, bf0, b, f2, f2_, g1, a, g2, g2_
+    ADDFP254
+    // stack:             g1_, b, f2, f2_, g1, a, g2, g2_
+    SWAP5 
+    // stack:             a, b, f2, f2_, g1, g1_, g2, g2_
+    %i9
+    // stack:             d, c, f2, f2_, g1, g1_, g2, g2_
+    DUP4
+    DUP2
+    MULFP254
+    // stack:       df2_, d, c, f2, f2_, g1, g1_, g2, g2_
+    DUP4
+    DUP4
+    MULFP254
+    // stack:  cf2, df2_, d, c, f2, f2_, g1, g1_, g2, g2_
+    SUBFP254
+    // stack:         g0, d, c, f2, f2_, g1, g1_, g2, g2_
+    SWAP3 
+    // stack:         f2, d, c, g0, f2_, g1, g1_, g2, g2_
+    MULFP254
+    // stack:           df2, c, g0, f2_, g1, g1_, g2, g2_
+    SWAP3
+    MULFP254
+    // stack:             cf2_, g0, df2, g1, g1_, g2, g2_
+    SWAP1 
+    SWAP2
+    // stack:             df2, cf2_, g0, g1, g1_, g2, g2_
+    ADDFP254
+    // stack:                   g0_, g0, g1, g1_, g2, g2_
+    SWAP1
+    // stack:                   g0, g0_, g1, g1_, g2, g2_
+%endmacro
+
+/// cost: 1 i9 (9) + 16 dups + 17 swaps + 12 muls + 6 adds/subs = 60
+///
+/// G0 + G1t + G2t^2 = (a+bi)t^2 * (F0 + F1t + F2t^2) 
+///                  = (c+di)F1 + (c+di)F2t + (a+bi)F0t^2
+/// where c+di = (a+bi)(9+i) = (9a-b) + (a+9b)i 
+///
+/// G0 = (c+di)(f0+f0_i) = (cf1 - df1_) + (df1 + cf1_)i
+/// G1 = (a+bi)(f0+f0_i) = (cf2 - df2_) + (df2 + cf2_)i
+/// G2 = (a+bi)(f1+f1_i) = (af0 - bf0_) + (bf0 + af0_)i
+
+%macro mul_fp2_fp6_sh2
+    // stack:             a, b, f0, f0_, f1, f1_, f2, f2_
+    DUP4
+    DUP3 
+    MULFP254
+    // stack:       bf0_, a, b, f0, f0_, f1, f1_, f2, f2_
+    DUP4
+    DUP3
+    MULFP254
+    // stack:  af0, bf0_, a, b, f0, f0_, f1, f1_, f2, f2_
+    SUBFP254
+    // stack:         g2, a, b, f0, f0_, f1, f1_, f2, f2_
+    SWAP7
+    SWAP3
+    // stack:         f0, a, b, f2, f0_, f1, f1_, g2, f2_
+    DUP3
+    MULFP254
+    // stack:        bf0, a, b, f2, f0_, f1, f1_, g2, f2_
+    SWAP1
+    SWAP4
+    // stack:        f0_, bf0, b, f2, a, f1, f1_, g2, f2_
+    DUP5 
+    MULFP254
+    // stack:       af0_, bf0, b, f2, a, f1, f1_, g2, f2_
+    ADDFP254 
+    // stack:             g2_, b, f2, a, f1, f1_, g2, f2_
+    SWAP7
+    SWAP3
+    // stack:             a, b, f2, f2_, f1, f1_, g2, g2_
+    %i9
+    // stack:             d, c, f2, f2_, f1, f1_, g2, g2_
+    DUP4
+    DUP2
+    MULFP254
+    // stack:       df2_, d, c, f2, f2_, f1, f1_, g2, g2_
+    DUP4
+    DUP4
+    MULFP254
+    // stack:  cf2, df2_, d, c, f2, f2_, f1, f1_, g2, g2_
+    SUBFP254
+    // stack:         g1, d, c, f2, f2_, f1, f1_, g2, g2_
+    SWAP5
+    SWAP3
+    // stack:         f2, d, c, f1, f2_, g1, f1_, g2, g2_
+    DUP2
+    MULFP254
+    // stack:        df2, d, c, f1, f2_, g1, f1_, g2, g2_
+    SWAP1
+    SWAP4
+    // stack:        f2_, df2, c, f1, d, g1, f1_, g2, g2_
+    DUP3
+    MULFP254
+    // stack:       cf2_, df2, c, f1, d, g1, f1_, g2, g2_
+    ADDFP254
+    // stack:             g1_, c, f1, d, g1, f1_, g2, g2_
+    SWAP5 
+    // stack:             f1_, c, f1, d, g1, g1_, g2, g2_
+    DUP1
+    DUP5 
+    MULFP254
+    // stack:       df1_, f1_, c, f1, d, g1, g1_, g2, g2_
+    DUP4
+    DUP4
+    MULFP254
+    // stack:  cf1, df1_, f1_, c, f1, d, g1, g1_, g2, g2_
+    ADDFP254
+    // stack:         g0, f1_, c, f1, d, g1, g1_, g2, g2_
+    SWAP3
+    // stack:         f1, f1_, c, g0, d, g1, g1_, g2, g2_
+    SWAP2
+    MULFP254
+    // stack:           cf1_, f1, g0, d, g1, g1_, g2, g2_
+    SWAP3 
+    MULFP254
+    // stack:             df1, g0, cf1_, g1, g1_, g2, g2_
+    SWAP1
+    SWAP2
+    // stack:             cf1_, df1, g0, g1, g1_, g2, g2_
+    ADDFP254
+    // stack:                   g0_, g0, g1, g1_, g2, g2_
+    SWAP1
+    // stack:                   g0, g0_, g1, g1_, g2, g2_
+%endmacro
+
+// cost: 9; note this returns y, x for the output x + yi
+%macro i9
+    // stack:          a , b
+    DUP2
+    // stack:      b,  a , b
+    DUP2
+    // stack:  a , b,  a , b
+    PUSH 9  MULFP254
+    // stack: 9a , b,  a , b
+    SUBFP254
+    // stack: 9a - b,  a , b
+    SWAP2 
+    // stack:  b , a, 9a - b
+    PUSH 9  MULFP254
+    // stack  9b , a, 9a - b
+    ADDFP254
+    // stack: 9b + a, 9a - b 
+%endmacro
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
index 253103da..8f220e5b 100644
--- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
@@ -329,7 +329,7 @@ global mul_fp12_sparse:
     // stack:                      inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     %mload_kernel_general
     // stack:                      g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp_fp12
+    %mul_fp_fp6
     // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     %swap_fp6
     // stack:                    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
@@ -337,7 +337,7 @@ global mul_fp12_sparse:
     // stack:           inB2,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
     %load_fp2
     // stack:           G2  ,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp2_fp12_sh2
+    %mul_fp2_fp6_sh2
     // stack:           G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
     %add_fp6
     // stack:           G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
@@ -347,7 +347,7 @@ global mul_fp12_sparse:
     // stack: inB1,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
     %load_fp2
     // stack:  G1 ,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp2_fp12_sh
+    %mul_fp2_fp6_sh
     // stack:  G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
     %add_fp6_hole
     // stack:      G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
@@ -361,7 +361,7 @@ global mul_fp12_sparse:
     // stack:                                               inB, f', f, inB, f', inA, inB, out
     %mload_kernel_general
     // stack:                                               g0 , f', f, inB, f', inA, inB, out
-    %mul_fp_fp12
+    %mul_fp_fp6
     // stack:                                               g0 * f', f, inB, f', inA, inB, out
     %swap_fp6
     // stack:                                             f  , g0 * f', inB, f', inA, inB, out
@@ -369,7 +369,7 @@ global mul_fp12_sparse:
     // stack:                                    inB2,    f  , g0 * f', inB, f', inA, inB, out
     %load_fp2
     // stack:                                     G2 ,    f  , g0 * f', inB, f', inA, inB, out
-    %mul_fp2_fp12_sh
+    %mul_fp2_fp6_sh
     // stack:                                     G2 * sh(f) , g0 * f', inB, f', inA, inB, out
     %add_fp6
     // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
@@ -379,7 +379,7 @@ global mul_fp12_sparse:
     // stack:                           inB1,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
     %load_fp2
     // stack:                            G1 ,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
-    %mul_fp2_fp12_sh
+    %mul_fp2_fp6_sh
     // stack:                            G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
     %add_fp6_hole
     // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out

From ad645ece17fe7c7d683472c6091036a87951b517 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 10 Nov 2022 10:51:40 -0800
Subject: [PATCH 003/201] aggregator

---
 evm/src/cpu/kernel/aggregator.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 194a93c8..8b032e42 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -32,7 +32,7 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/secp256k1/lift_x.asm"),
         include_str!("asm/curve/secp256k1/moddiv.asm"),
         include_str!("asm/exp.asm"),
-        include_str!("asm/fields/fp6_macros.asm"),
+        include_str!("asm/fields/field_macros.asm"),
         include_str!("asm/fields/fp6_mul.asm"),
         include_str!("asm/fields/fp12_mul.asm"),
         include_str!("asm/halt.asm"),

From e0ee489d38ec5bc572b5552c3ca80a92a578961f Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 14 Nov 2022 15:58:37 -0800
Subject: [PATCH 004/201] fix fp12 test

---
 evm/src/cpu/kernel/asm/fields/fp12_mul.asm |  97 +++++++------
 evm/src/cpu/kernel/tests/fields.rs         | 156 +++++++++++----------
 2 files changed, 128 insertions(+), 125 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
index 8f220e5b..249bf252 100644
--- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
@@ -1,39 +1,39 @@
 /// Note: uncomment this to test
 
-/// global test_mul_fp12:
-///     // stack:      f, inA , f', g, inB , g', inB, out, inA,       out
-///     DUP7
-///     // stack: inA, f, inA , f', g, inB , g', inB, out, inA,       out
-///     %store_fp6
-///     // stack:         inA , f', g, inB , g', inB, out, inA,       out
-///     %offset_fp6
-///     // stack:         inA', f', g, inB , g', inB, out, inA,       out
-///     %store_fp6
-///     // stack:                   g, inB , g', inB, out, inA,       out
-///     DUP7
-///     // stack:              inB, g, inB , g', inB, out, inA,       out
-///     %store_fp6
-///     // stack:                      inB , g', inB, out, inA,       out
-///     %offset_fp6
-///     // stack:                      inB', g', inB, out, inA,       out
-///     %store_fp6
-///     // stack:                                inB, out, inA,       out
-///     PUSH ret_stack
-///     // stack:                     ret_stack, inB, out, inA,       out
-///     SWAP3
-///     // stack:                           inA, inB, out, ret_stack, out
-///     %jump(mul_fp12)
-/// ret_stack:
-///     // stack:          out
-///     DUP1  %offset_fp6
-///     // stack:    out', out
-///     %load_fp6
-///     // stack:      h', out
-///     DUP7
-///     // stack: out, h', out
-///     %load_fp6
-///     // stack:   h, h', out
-///     %jump(0xdeadbeef)
+global test_mul_fp12:
+    // stack:      f, inA , f', g, inB , g', inB, out, inA,       out
+    DUP7
+    // stack: inA, f, inA , f', g, inB , g', inB, out, inA,       out
+    %store_fp6
+    // stack:         inA , f', g, inB , g', inB, out, inA,       out
+    %offset_fp6
+    // stack:         inA', f', g, inB , g', inB, out, inA,       out
+    %store_fp6
+    // stack:                   g, inB , g', inB, out, inA,       out
+    DUP7
+    // stack:              inB, g, inB , g', inB, out, inA,       out
+    %store_fp6
+    // stack:                      inB , g', inB, out, inA,       out
+    %offset_fp6
+    // stack:                      inB', g', inB, out, inA,       out
+    %store_fp6
+    // stack:                                inB, out, inA,       out
+    PUSH ret_stack
+    // stack:                     ret_stack, inB, out, inA,       out
+    SWAP3
+    // stack:                           inA, inB, out, ret_stack, out
+    %jump(mul_fp12)
+ret_stack:
+    // stack:          out
+    DUP1  %offset_fp6
+    // stack:    out', out
+    %load_fp6
+    // stack:      h', out
+    DUP7
+    // stack: out, h', out
+    %load_fp6
+    // stack:   h, h', out
+    %jump(0xdeadbeef)
 
 
 /// fp6 functions:
@@ -259,28 +259,25 @@ post_sq2:
     JUMP
 
 /// fp6 functions:
-///  fn    | num | ops | cost
-///  -------------------------
-///  load  |   2 |  40 |   80
-///  store |   2 |  40 |   80
-///  dup   |   2 |   6 |   12
-///  swap  |   2 |  16 |   32
-///  add   |   1 |  16 |   16
-///  mul   |   1 | 157 |  157
-///  sq    |   2 |     |
-///  dbl   |   1 |  13 |   13
+///  fn      | num | ops | cost
+///  ---------------------------
+///  load    |   2 |  40 |   80
+///  store   |   2 |  40 |   80
+///  dup     |   4 |   6 |   24
+///  swap    |   4 |  16 |   64
+///  add     |   4 |  16 |   64
+///  mul_fp  |   2 |  21 |   42
+///  mul_fp2 |   4 |  59 |  236
 ///
 /// lone stack operations:
 ///  op    | num 
 ///  ------------
-///  ADD   |   3
-///  SWAP  |   4
-///  DUP   |   5
+///  ADD   |   6
+///  DUP   |   9
 ///  PUSH  |   6
-///  POP   |   3
-///  JUMP  |   4
+///  POP   |   5
 ///
-/// TOTAL: 
+/// TOTAL: 618
 
 /// input:
 ///     F = f + f'z
diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs
index 289a8598..80155202 100644
--- a/evm/src/cpu/kernel/tests/fields.rs
+++ b/evm/src/cpu/kernel/tests/fields.rs
@@ -39,11 +39,11 @@ fn add3_fp2(a: [u32; 2], b: [u32; 2], c: [u32; 2]) -> [u32; 2] {
     [add3_fp(a, b, c), add3_fp(a_, b_, c_)]
 }
 
-// fn sub_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
-//     let [a, a_] = a;
-//     let [b, b_] = b;
-//     [sub_fp(a, b), sub_fp(a_, b_)]
-// }
+fn sub_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
+    let [a, a_] = a;
+    let [b, b_] = b;
+    [sub_fp(a, b), sub_fp(a_, b_)]
+}
 
 fn mul_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
     let [a, a_] = a;
@@ -59,25 +59,25 @@ fn i9(a: [u32; 2]) -> [u32; 2] {
     [sub_fp(mul_fp(9, a), a_), add_fp(a, mul_fp(9, a_))]
 }
 
-// fn add_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
-//     let [c0, c1, c2] = c;
-//     let [d0, d1, d2] = d;
+fn add_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
+    let [c0, c1, c2] = c;
+    let [d0, d1, d2] = d;
 
-//     let e0 = add_fp2(c0, d0);
-//     let e1 = add_fp2(c1, d1);
-//     let e2 = add_fp2(c2, d2);
-//     [e0, e1, e2]
-// }
+    let e0 = add_fp2(c0, d0);
+    let e1 = add_fp2(c1, d1);
+    let e2 = add_fp2(c2, d2);
+    [e0, e1, e2]
+}
 
-// fn sub_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
-//     let [c0, c1, c2] = c;
-//     let [d0, d1, d2] = d;
+fn sub_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
+    let [c0, c1, c2] = c;
+    let [d0, d1, d2] = d;
 
-//     let e0 = sub_fp2(c0, d0);
-//     let e1 = sub_fp2(c1, d1);
-//     let e2 = sub_fp2(c2, d2);
-//     [e0, e1, e2]
-// }
+    let e0 = sub_fp2(c0, d0);
+    let e1 = sub_fp2(c1, d1);
+    let e2 = sub_fp2(c2, d2);
+    [e0, e1, e2]
+}
 
 fn mul_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
     let [c0, c1, c2] = c;
@@ -101,20 +101,20 @@ fn mul_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
     ]
 }
 
-// fn sh(c: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
-//     let [c0, c1, c2] = c;
-//     [i9(c2), c0, c1]
-// }
+fn sh(c: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
+    let [c0, c1, c2] = c;
+    [i9(c2), c0, c1]
+}
 
-// fn mul_fp12(f: [[[u32; 2]; 3]; 2], g: [[[u32; 2]; 3]; 2]) -> [[[u32; 2]; 3]; 2] {
-//     let [f0, f1] = f;
-//     let [g0, g1] = g;
+fn mul_fp12(f: [[[u32; 2]; 3]; 2], g: [[[u32; 2]; 3]; 2]) -> [[[u32; 2]; 3]; 2] {
+    let [f0, f1] = f;
+    let [g0, g1] = g;
 
-//     let h0 = mul_fp6(f0, g0);
-//     let h1 = mul_fp6(f1, g1);
-//     let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1));
-//     [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
-// }
+    let h0 = mul_fp6(f0, g0);
+    let h1 = mul_fp6(f1, g1);
+    let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1));
+    [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
+}
 
 fn gen_fp6() -> [[u32; 2]; 3] {
     let mut rng = thread_rng();
@@ -151,52 +151,58 @@ fn test_fp6() -> Result<()> {
     Ok(())
 }
 
-// fn make_initial_stack(
-//     f0: [[u32; 2]; 3],
-//     f1: [[u32; 2]; 3],
-//     g0: [[u32; 2]; 3],
-//     g1: [[u32; 2]; 3],
-// ) -> Vec<U256> {
-//     // stack: in0, f, in0', f', in1, g, in1', g', in1, out, in0, out
-//     let f0: Vec<u32> = f0.into_iter().flatten().collect();
-//     let f1: Vec<u32> = f1.into_iter().flatten().collect();
-//     let g0: Vec<u32> = g0.into_iter().flatten().collect();
-//     let g1: Vec<u32> = g1.into_iter().flatten().collect();
+fn make_initial_stack(
+    in1: u32,
+    in2: u32,
+    out: u32,
+    f0: [[u32; 2]; 3],
+    f1: [[u32; 2]; 3],
+    g0: [[u32; 2]; 3],
+    g1: [[u32; 2]; 3],
+) -> Vec<U256> {
+    // stack: in0, f, in0', f', in1, g, in1', g', in1, out, in0, out
+    let f0: Vec<u32> = f0.into_iter().flatten().collect();
+    let f1: Vec<u32> = f1.into_iter().flatten().collect();
+    let g0: Vec<u32> = g0.into_iter().flatten().collect();
+    let g1: Vec<u32> = g1.into_iter().flatten().collect();
 
-//     let mut input = f0;
-//     input.extend(vec![0]);
-//     input.extend(f1);
-//     input.extend(g0);
-//     input.extend(vec![12]);
-//     input.extend(g1);
-//     input.extend(vec![12, 24, 0, 24]);
+    let mut input = f0;
+    input.extend(vec![in1]);
+    input.extend(f1);
+    input.extend(g0);
+    input.extend(vec![in2]);
+    input.extend(g1);
+    input.extend(vec![in2, out, in1, out]);
 
-//     as_stack(input)
-// }
+    as_stack(input)
+}
 
-// #[test]
-// fn test_fp12() -> Result<()> {
-//     let f0 = gen_fp6();
-//     let f1 = gen_fp6();
-//     let g0 = gen_fp6();
-//     let g1 = gen_fp6();
+#[test]
+fn test_fp12() -> Result<()> {
+    let in1 = 64;
+    let in2 = 76;
+    let out = 88;
 
-//     let kernel = combined_kernel();
-//     let initial_offset = kernel.global_labels["test_mul_Fp12"];
-//     let initial_stack: Vec<U256> = make_initial_stack(f0, f1, g0, g1);
-//     let final_stack: Vec<U256> = run_with_kernel(&kernel, initial_offset, initial_stack)?
-//         .stack()
-//         .to_vec();
+    let f0 = gen_fp6();
+    let f1 = gen_fp6();
+    let g0 = gen_fp6();
+    let g1 = gen_fp6();
 
-//     let mut output: Vec<u32> = mul_fp12([f0, f1], [g0, g1])
-//         .into_iter()
-//         .flatten()
-//         .flatten()
-//         .collect();
-//     output.extend(vec![24]);
-//     let expected = as_stack(output);
+    let initial_offset = KERNEL.global_labels["test_mul_fp12"];
+    let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, g0, g1);
+    let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
+        .stack()
+        .to_vec();
 
-//     assert_eq!(final_stack, expected);
+    let mut output: Vec<u32> = mul_fp12([f0, f1], [g0, g1])
+        .into_iter()
+        .flatten()
+        .flatten()
+        .collect();
+    output.extend(vec![out]);
+    let expected = as_stack(output);
 
-//     Ok(())
-// }
+    assert_eq!(final_stack, expected);
+
+    Ok(())
+}

From 69ce4f99fe18348858252cf1cf49afb0889a2659 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 14 Nov 2022 16:41:02 -0800
Subject: [PATCH 005/201] add total count

---
 evm/src/cpu/kernel/interpreter.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 6e3f9e5d..98900923 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -140,11 +140,14 @@ impl<'a> Interpreter<'a> {
             self.run_opcode()?;
         }
         println!("Opcode count:");
+        let mut tot = 0;
         for i in 0..0x100 {
             if self.opcode_count[i] > 0 {
+                tot = tot + self.opcode_count[i];
                 println!("{}: {}", get_mnemonic(i as u8), self.opcode_count[i])
             }
         }
+        println!("TOTAL: {}", tot);
         Ok(())
     }
 

From a3dfea9cf82dcb15c38620eaffff4bbdc6723b64 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 14 Nov 2022 16:41:36 -0800
Subject: [PATCH 006/201] update fp12

---
 evm/src/cpu/kernel/asm/fields/fp12_mul.asm | 364 +++++++++++----------
 evm/src/cpu/kernel/tests/fields.rs         |  47 ++-
 2 files changed, 236 insertions(+), 175 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
index 249bf252..32a8fe0e 100644
--- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
@@ -1,5 +1,6 @@
 /// Note: uncomment this to test
 
+/// cost: 220
 global test_mul_fp12:
     // stack:      f, inA , f', g, inB , g', inB, out, inA,       out
     DUP7
@@ -22,7 +23,7 @@ global test_mul_fp12:
     // stack:                     ret_stack, inB, out, inA,       out
     SWAP3
     // stack:                           inA, inB, out, ret_stack, out
-    %jump(mul_fp12)
+    %jump(mul_fp12_sparse)
 ret_stack:
     // stack:          out
     DUP1  %offset_fp6
@@ -35,6 +36,11 @@ ret_stack:
     // stack:   h, h', out
     %jump(0xdeadbeef)
 
+///////////////////////////////////////
+///// GENERAL FP12 MULTIPLICATION /////
+///////////////////////////////////////
+
+/// cost: 1063
 
 /// fp6 functions:
 ///  fn    | num | ops | cost
@@ -166,6 +172,189 @@ ret_3:
     JUMP
 
 
+//////////////////////////////////////
+///// SPARSE FP12 MULTIPLICATION /////
+//////////////////////////////////////
+
+/// cost: 646
+
+/// fp6 functions:
+///  fn      | num | ops | cost
+///  ---------------------------
+///  load    |   2 |  40 |   80
+///  store   |   2 |  40 |   80
+///  dup     |   4 |   6 |   24
+///  swap    |   4 |  16 |   64
+///  add     |   4 |  16 |   64
+///  mul_fp  |   2 |  21 |   42
+///  mul_fp2 |   4 |  59 |  236
+///
+/// lone stack operations:
+///  op    | num 
+///  ------------
+///  ADD   |   6
+///  DUP   |   9
+///  PUSH  |   6
+///  POP   |   5
+///
+/// TOTAL: 618
+
+/// input:
+///     F = f + f'z
+///     G = g0 + (G1)t + (G2)tz
+///
+/// output:
+///     H = h + h'z = FG
+///       = g0 * [f + f'z] + G1 * [sh(f) + sh(f')z] + G2 * [sh2(f') + sh(f)z]
+///     
+///     h  = g0 * f  + G1 * sh(f ) + G2 * sh2(f') 
+///     h' = g0 * f' + G1 * sh(f') + G2 * sh (f )
+///
+/// memory pointers [ind' = ind+6, inB2 = inB1 + 2 = inB + 3]
+///     { inA: f, inA': f', inB: g0, inB1: G1, inB2: G2, out: h, out': h'}
+///
+/// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element 
+
+global mul_fp12_sparse:
+    // stack:                                                                    inA, inB, out
+    DUP1  %offset_fp6
+    // stack:                                                              inA', inA, inB, out
+    %load_fp6
+    // stack:                                                                f', inA, inB, out
+    DUP8 
+    // stack:                                                           inB, f', inA, inB, out
+    DUP8
+    // stack:                                                      inA, inB, f', inA, inB, out
+    %load_fp6
+    // stack:                                                        f, inB, f', inA, inB, out
+    DUP16
+    // stack:                                                   out, f, inB, f', inA, inB, out
+    %dup_fp6_8 
+    // stack:                                               f', out, f, inB, f', inA, inB, out
+    DUP14
+    // stack:                                          inB, f', out, f, inB, f', inA, inB, out
+    %dup_fp6_8
+    // stack:                                       f, inB, f', out, f, inB, f', inA, inB, out
+    DUP7
+    // stack:                                  inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %dup_fp6_8
+    // stack:                              f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %dup_fp6_7
+    // stack:                           f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    DUP13 
+    // stack:                      inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %mload_kernel_general
+    // stack:                      g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %mul_fp_fp6
+    // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %swap_fp6
+    // stack:                    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    DUP13  %add_const(8)
+    // stack:           inB2,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %load_fp2
+    // stack:           G2  ,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %mul_fp2_fp6_sh2
+    // stack:           G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %add_fp6
+    // stack:           G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
+    %swap_fp6_hole
+    // stack:          f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    DUP7  %add_const(2)
+    // stack: inB1,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    %load_fp2
+    // stack:  G1 ,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    %mul_fp2_fp6_sh
+    // stack:  G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    %add_fp6_hole
+    // stack:      G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
+    DUP14
+    // stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, out, f', f, inB, f', inA, inB, out
+    %store_fp6
+    // stack:                                          inB, out, f', f, inB, f', inA, inB, out
+    %pop2
+    // stack:                                                    f', f, inB, f', inA, inB, out
+    DUP13
+    // stack:                                               inB, f', f, inB, f', inA, inB, out
+    %mload_kernel_general
+    // stack:                                               g0 , f', f, inB, f', inA, inB, out
+    %mul_fp_fp6
+    // stack:                                               g0 * f', f, inB, f', inA, inB, out
+    %swap_fp6
+    // stack:                                             f  , g0 * f', inB, f', inA, inB, out
+    DUP13  %add_const(8)
+    // stack:                                    inB2,    f  , g0 * f', inB, f', inA, inB, out
+    %load_fp2
+    // stack:                                     G2 ,    f  , g0 * f', inB, f', inA, inB, out
+    %mul_fp2_fp6_sh
+    // stack:                                     G2 * sh(f) , g0 * f', inB, f', inA, inB, out
+    %add_fp6
+    // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
+    %swap_fp6_hole
+    // stack:                                    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
+    DUP7  %add_const(2)
+    // stack:                           inB1,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
+    %load_fp2
+    // stack:                            G1 ,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
+    %mul_fp2_fp6_sh
+    // stack:                            G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
+    %add_fp6_hole
+    // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
+    DUP9  %offset_fp6
+    // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
+    %store_fp6
+    // stack:                                                                    inA, inB, out
+    %pop3
+    JUMP
+
+/// global mul_fp12_sparse_fast:
+///    // stack:                                                            inA, inB, out
+///    DUP2
+///    // stack:                                                       inB, inA, inB, out
+///    %load_fp12_sparse
+///    // stack:                                               g0, G1, G1', inA, inB, out
+///    DUP6  %offset_fp6
+///    // stack:                                         inA', g0, G1, G1', inA, inB, out
+///    %load_fp6
+///    // stack:                                           f', g0, G1, G1', inA, inB, out
+///    DUP12
+///    // stack:                                      inA, f', g0, G1, G1', inA, inB, out
+///    %load_fp6
+///    // stack:                                        f, f', g0, G1, G1', inA, inB, out
+///    %clone_mul_fp_fp6
+///    // stack:                                 (g0)f, f, f', g0, G1, G1', inA, inB, out
+///    %clone_mul_fp2_fp6_sh
+///    // stack:                     (G1)sh(f) , (g0)f, f, f', g0, G1, G1', inA, inB, out
+///    %add_fp6
+///    // stack:                     (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
+///    %clone_mul_fp2_fp6_sh2
+///    // stack:      (G1')sh2(f') , (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
+///    %add_fp6
+///    // stack:      (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
+///    DUP26
+///    // stack: out, (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
+///    %store_fp6
+///    // stack:                                        f, f', g0, G1, G1', inA, inB, out 
+///    %semiclone_mul_fp2_fp6_sh
+///    // stack:                               (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    %clone_mul_fp2_fp6_sh
+///    // stack:                  (G1)sh(f') , (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    %add_fp6
+///    // stack:                  (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    %clone_mul_fp_fp6
+///    // stack:         (g0)f' , (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    %add_fp6
+///    // stack:         (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    DUP20  offset_fp6
+///    // stack:   out', (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
+///    %store_fp6
+///    // stack:                                           f', g0, G1, G1', inA, inB, out 
+///    %pop14
+
+
+/////////////////////////
+///// FP12 SQUARING /////
+/////////////////////////
+
 /// fp6 functions:
 ///  fn    | num | ops | cost
 ///  -------------------------
@@ -257,176 +446,3 @@ post_sq2:
     // stack:                                                    inp, out
     %pop2
     JUMP
-
-/// fp6 functions:
-///  fn      | num | ops | cost
-///  ---------------------------
-///  load    |   2 |  40 |   80
-///  store   |   2 |  40 |   80
-///  dup     |   4 |   6 |   24
-///  swap    |   4 |  16 |   64
-///  add     |   4 |  16 |   64
-///  mul_fp  |   2 |  21 |   42
-///  mul_fp2 |   4 |  59 |  236
-///
-/// lone stack operations:
-///  op    | num 
-///  ------------
-///  ADD   |   6
-///  DUP   |   9
-///  PUSH  |   6
-///  POP   |   5
-///
-/// TOTAL: 618
-
-/// input:
-///     F = f + f'z
-///     G = g0 + (G1)t + (G2)tz
-///
-/// output:
-///     H = h + h'z = FG
-///       = g0 * [f + f'z] + G1 * [sh(f) + sh(f')z] + G2 * [sh2(f') + sh(f)z]
-///     
-///     h  = g0 * f  + G1 * sh(f ) + G2 * sh2(f') 
-///     h' = g0 * f' + G1 * sh(f') + G2 * sh (f )
-///
-/// memory pointers [ind' = ind+6, inB2 = inB1 + 2 = inB + 3]
-///     { inA: f, inA': f', inB: g0, inB1: G1, inB2: G2, out: h, out': h'}
-///
-/// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element 
-
-
-global mul_fp12_sparse:
-    // stack:                                                                    inA, inB, out
-    DUP1  %offset_fp6
-    // stack:                                                              inA', inA, inB, out
-    %load_fp6
-    // stack:                                                                f', inA, inB, out
-    DUP8 
-    // stack:                                                           inB, f', inA, inB, out
-    DUP8
-    // stack:                                                      inA, inB, f', inA, inB, out
-    %load_fp6
-    // stack:                                                        f, inB, f', inA, inB, out
-    DUP16
-    // stack:                                                   out, f, inB, f', inA, inB, out
-    %dup_fp6_8 
-    // stack:                                               f', out, f, inB, f', inA, inB, out
-    DUP14
-    // stack:                                          inB, f', out, f, inB, f', inA, inB, out
-    %dup_fp6_8
-    // stack:                                       f, inB, f', out, f, inB, f', inA, inB, out
-    DUP7
-    // stack:                                  inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %dup_fp6_8
-    // stack:                              f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %dup_fp6_7
-    // stack:                           f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    DUP13 
-    // stack:                      inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %mload_kernel_general
-    // stack:                      g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp_fp6
-    // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %swap_fp6
-    // stack:                    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    DUP13  %add_const(3)
-    // stack:           inB2,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %load_fp2
-    // stack:           G2  ,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp2_fp6_sh2
-    // stack:           G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %add_fp6
-    // stack:           G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %swap_fp6_hole
-    // stack:          f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    DUP7  %add_const(1)
-    // stack: inB1,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    %load_fp2
-    // stack:  G1 ,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp2_fp6_sh
-    // stack:  G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    %add_fp6_hole
-    // stack:      G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    DUP14
-    // stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, out, f', f, inB, f', inA, inB, out
-    %store_fp6
-    // stack:                                          inB, out, f', f, inB, f', inA, inB, out
-    %pop2
-    // stack:                                                    f', f, inB, f', inA, inB, out
-    DUP13
-    // stack:                                               inB, f', f, inB, f', inA, inB, out
-    %mload_kernel_general
-    // stack:                                               g0 , f', f, inB, f', inA, inB, out
-    %mul_fp_fp6
-    // stack:                                               g0 * f', f, inB, f', inA, inB, out
-    %swap_fp6
-    // stack:                                             f  , g0 * f', inB, f', inA, inB, out
-    DUP13  %add_const(3)
-    // stack:                                    inB2,    f  , g0 * f', inB, f', inA, inB, out
-    %load_fp2
-    // stack:                                     G2 ,    f  , g0 * f', inB, f', inA, inB, out
-    %mul_fp2_fp6_sh
-    // stack:                                     G2 * sh(f) , g0 * f', inB, f', inA, inB, out
-    %add_fp6
-    // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
-    %swap_fp6_hole
-    // stack:                                    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
-    DUP7  %add_const(1)
-    // stack:                           inB1,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
-    %load_fp2
-    // stack:                            G1 ,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
-    %mul_fp2_fp6_sh
-    // stack:                            G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
-    %add_fp6_hole
-    // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
-    DUP9  %offset_fp6
-    // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
-    %store_fp6
-    // stack:                                                                    inA, inB, out
-    %pop3
-
-
-/// global mul_fp12_sparse_fast:
-///    // stack:                                                            inA, inB, out
-///    DUP2
-///    // stack:                                                       inB, inA, inB, out
-///    %load_fp12_sparse
-///    // stack:                                               g0, G1, G1', inA, inB, out
-///    DUP6  %offset_fp6
-///    // stack:                                         inA', g0, G1, G1', inA, inB, out
-///    %load_fp6
-///    // stack:                                           f', g0, G1, G1', inA, inB, out
-///    DUP12
-///    // stack:                                      inA, f', g0, G1, G1', inA, inB, out
-///    %load_fp6
-///    // stack:                                        f, f', g0, G1, G1', inA, inB, out
-///    %clone_mul_fp_fp6
-///    // stack:                                 (g0)f, f, f', g0, G1, G1', inA, inB, out
-///    %clone_mul_fp2_fp6_sh
-///    // stack:                     (G1)sh(f) , (g0)f, f, f', g0, G1, G1', inA, inB, out
-///    %add_fp6
-///    // stack:                     (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
-///    %clone_mul_fp2_fp6_sh2
-///    // stack:      (G1')sh2(f') , (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
-///    %add_fp6
-///    // stack:      (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
-///    DUP26
-///    // stack: out, (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
-///    %store_fp6
-///    // stack:                                        f, f', g0, G1, G1', inA, inB, out 
-///    %semiclone_mul_fp2_fp6_sh
-///    // stack:                               (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    %clone_mul_fp2_fp6_sh
-///    // stack:                  (G1)sh(f') , (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    %add_fp6
-///    // stack:                  (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    %clone_mul_fp_fp6
-///    // stack:         (g0)f' , (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    %add_fp6
-///    // stack:         (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    DUP20  offset_fp6
-///    // stack:   out', (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    %store_fp6
-///    // stack:                                           f', g0, G1, G1', inA, inB, out 
-///    %pop14
diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs
index 80155202..693a12d0 100644
--- a/evm/src/cpu/kernel/tests/fields.rs
+++ b/evm/src/cpu/kernel/tests/fields.rs
@@ -106,6 +106,11 @@ fn sh(c: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
     [i9(c2), c0, c1]
 }
 
+fn sparse_embed(x: [u32; 5]) -> [[[u32; 2]; 3]; 2] {
+    let [g0, g1, g1_, g2, g2_] = x;
+    [[[g0, 0], [g1, g1_], [0, 0]], [[0, 0], [g2, g2_], [0, 0]]]
+}
+
 fn mul_fp12(f: [[[u32; 2]; 3]; 2], g: [[[u32; 2]; 3]; 2]) -> [[[u32; 2]; 3]; 2] {
     let [f0, f1] = f;
     let [g0, g1] = g;
@@ -125,6 +130,17 @@ fn gen_fp6() -> [[u32; 2]; 3] {
     ]
 }
 
+fn gen_fp12_sparse() -> [[[u32; 2]; 3]; 2] {
+    let mut rng = thread_rng();
+    sparse_embed([
+        rng.gen_range(0..P254),
+        rng.gen_range(0..P254),
+        rng.gen_range(0..P254),
+        rng.gen_range(0..P254),
+        rng.gen_range(0..P254),
+    ])
+}
+
 fn as_stack(xs: Vec<u32>) -> Vec<U256> {
     xs.iter().map(|&x| U256::from(x)).rev().collect()
 }
@@ -177,7 +193,7 @@ fn make_initial_stack(
     as_stack(input)
 }
 
-#[test]
+// #[test]
 fn test_fp12() -> Result<()> {
     let in1 = 64;
     let in2 = 76;
@@ -206,3 +222,32 @@ fn test_fp12() -> Result<()> {
 
     Ok(())
 }
+
+#[test]
+fn test_fp12_sparse() -> Result<()> {
+    let in1 = 64;
+    let in2 = 76;
+    let out = 88;
+
+    let f0 = gen_fp6();
+    let f1 = gen_fp6();
+    let [g0, g1] = gen_fp12_sparse();
+
+    let initial_offset = KERNEL.global_labels["test_mul_fp12"];
+    let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, g0, g1);
+    let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
+        .stack()
+        .to_vec();
+
+    let mut output: Vec<u32> = mul_fp12([f0, f1], [g0, g1])
+        .into_iter()
+        .flatten()
+        .flatten()
+        .collect();
+    output.extend(vec![out]);
+    let expected = as_stack(output);
+
+    assert_eq!(final_stack, expected);
+
+    Ok(())
+}

From f6e45ea47d6320a63f14f7722ff6a6dec174695c Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 14 Nov 2022 17:39:33 -0800
Subject: [PATCH 007/201] fix mul_fp2_fp6_sh2

---
 evm/src/cpu/kernel/asm/fields/field_macros.asm | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/field_macros.asm b/evm/src/cpu/kernel/asm/fields/field_macros.asm
index 2d12aead..a24771fe 100644
--- a/evm/src/cpu/kernel/asm/fields/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/fields/field_macros.asm
@@ -442,12 +442,12 @@
 
 // cost: 21
 %macro mul_fp_fp6
-    // stack: c, f0,      f1,     f2,     f3,     f4,     f5
+    // stack: c , f0,      f1,    f2,     f3,     f4,     f5
     SWAP6
     DUP7
     MULFP254
     SWAP6
-    // stack: c, f0,      f1,     f2,     f3,     f4, c * f5
+    // stack: c , f0,      f1,    f2,     f3,     f4, c * f5
     SWAP5
     DUP6
     MULFP254
@@ -641,7 +641,7 @@
     DUP4
     MULFP254
     // stack:  cf1, df1_, f1_, c, f1, d, g1, g1_, g2, g2_
-    ADDFP254
+    SUBFP254
     // stack:         g0, f1_, c, f1, d, g1, g1_, g2, g2_
     SWAP3
     // stack:         f1, f1_, c, g0, d, g1, g1_, g2, g2_

From a8a852f3ec498ca74f09194321d828557757d4b0 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 14 Nov 2022 17:51:25 -0800
Subject: [PATCH 008/201] sparse mul works!

---
 evm/src/cpu/kernel/asm/fields/fp12_mul.asm | 26 +++++++++-------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
index 32a8fe0e..d689c498 100644
--- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
@@ -268,26 +268,22 @@ global mul_fp12_sparse:
     %add_fp6_hole
     // stack:      G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
     DUP14
-    // stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, out, f', f, inB, f', inA, inB, out
+    // stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
     %store_fp6
-    // stack:                                          inB, out, f', f, inB, f', inA, inB, out
-    %pop2
-    // stack:                                                    f', f, inB, f', inA, inB, out
-    DUP13
-    // stack:                                               inB, f', f, inB, f', inA, inB, out
+    // stack:                                          inB, f', out, f, inB, f', inA, inB, out
     %mload_kernel_general
-    // stack:                                               g0 , f', f, inB, f', inA, inB, out
+    // stack:                                          g0 , f', out, f, inB, f', inA, inB, out
     %mul_fp_fp6
-    // stack:                                               g0 * f', f, inB, f', inA, inB, out
-    %swap_fp6
-    // stack:                                             f  , g0 * f', inB, f', inA, inB, out
-    DUP13  %add_const(8)
-    // stack:                                    inB2,    f  , g0 * f', inB, f', inA, inB, out
+    // stack:                                          g0 * f', out, f, inB, f', inA, inB, out
+    %swap_fp6_hole
+    // stack:                                        f  , out, g0 * f', inB, f', inA, inB, out
+    DUP14  %add_const(8)
+    // stack:                               inB2,    f  , out, g0 * f', inB, f', inA, inB, out
     %load_fp2
-    // stack:                                     G2 ,    f  , g0 * f', inB, f', inA, inB, out
+    // stack:                                G2 ,    f  , out, g0 * f', inB, f', inA, inB, out
     %mul_fp2_fp6_sh
-    // stack:                                     G2 * sh(f) , g0 * f', inB, f', inA, inB, out
-    %add_fp6
+    // stack:                                G2 * sh(f) , out, g0 * f', inB, f', inA, inB, out
+    %add_fp6_hole
     // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
     %swap_fp6_hole
     // stack:                                    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out

From 56da8c02b011b20f31cb54a47919d081b493e172 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 14 Nov 2022 17:53:22 -0800
Subject: [PATCH 009/201] correct ops

---
 evm/src/cpu/kernel/asm/fields/fp12_mul.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
index d689c498..491c20b0 100644
--- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
@@ -176,7 +176,7 @@ ret_3:
 ///// SPARSE FP12 MULTIPLICATION /////
 //////////////////////////////////////
 
-/// cost: 646
+/// cost: 645
 
 /// fp6 functions:
 ///  fn      | num | ops | cost

From 77820b0f23573f3f15607a8ec0a9fad21b81566c Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 15 Nov 2022 12:17:25 -0800
Subject: [PATCH 010/201] fp6 sq

---
 evm/src/cpu/kernel/asm/fields/fp6_mul.asm | 175 +++++++++++++++++++++-
 1 file changed, 174 insertions(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
index 9be87aac..402ee970 100644
--- a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
@@ -1,3 +1,7 @@
+//////////////////////////////////////
+///// GENERAL FP6 MULFP254TIPLICATION /////
+//////////////////////////////////////
+
 /// inputs:
 ///     C = C0 + C1t + C2t^2 
 ///       = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2
@@ -258,5 +262,174 @@ global mul_fp6:
     JUMP
 
 
+////////////////////////
+///// FP6 SQUARING /////
+////////////////////////
+
+/// inputs:
+///     C = C0 + C1t + C2t^2 
+///       = (c0 + c0_i) + (c1 + c1_i)t + (c2 + c2_i)t^2
+///
+/// output:
+///     E = E0 + E1t + E2t^2 = C^2
+///       = (e0 + e0_i) + (e1 + e1_i)t + (e2 + e2_i)t^2
+///
+/// initial stack: c0, c0_, c1, c1_, c2, c2_, retdest
+/// final   stack: e0, e0_, e1, e1_, e2, e2_
+
+/// computations:
+///
+/// E0 = C0C0 + i9(2C1C2) = (c0+c0_i)^2 + i9(2(c1+c1_i)(c2+c2_i))
+///    = (c0^2 - c0_^2) + (2c0c0_)i + i9[2(c1c2 - c1_c2_) + 2(c1_c2 + c1c2_)i]
+///
+/// E1 = 2*C0C1 + i9(C2C2) = 2(c0+c0_i)(c1+c1_i) + i9((c2+c2_i)(c2+c2_i))
+///    = 2(c0c1 - c0_c1_) + 2(c0c1_ + c0_c1)i + i9[(c2^2 - c2_^2) + (2c2c2_)i]
+///
+/// E2 = 2*C0C2 + C1C1
+///    = 2(c0c2 - c0_c2_) + 2(c0_c2 + c2c0_)i + (c1^2 - c1_^2) + (2c1c1_)i
+///
+/// e0  = (c0^2 - c0_^2) + x0
+/// e0_ = 2c0c0_ + x0_
+///     where x0_, x0 = %i9 c1c2 - c1_c2_, c1_c2 + c1c2_
+///
+/// e1  = 2(c0c1 - c0_c1_) + x1
+/// e1_ = 2(c0c1_ + c0_c1) + x1_
+///     where x1_, x1 = %i9 c2^2 - c2_^2, 2c2c2_
+///
+/// e2  = 2(c0c2 - c0_c2_) + (c1^2 - c1_^2)
+/// e2_ = 2(c0_c2 + c2c0_) + 2c1c1_
+
 global square_fp6:
-    
\ No newline at end of file
+    /// e0  = (c0^2 - c0_^2) + x0
+    /// e0_ = 2c0c0_ + x0_
+    ///     where x0_, x0 = %i9 2(c1c2 - c1_c2_), 2(c1_c2 + c1c2_)
+    DUP6
+    DUP4
+    MULFP254
+    DUP6
+    DUP6
+    MULFP254
+    ADDFP254
+    PUSH 2
+    MULFP254
+    DUP7
+    DUP6
+    MULFP254
+    DUP7
+    DUP6
+    MULFP254
+    SUBFP254
+    PUSH 2
+    MULFP254
+    %i9
+    // stack:          x0_, x0
+    DUP3
+    DUP5
+    MULFP254
+    PUSH 2
+    MULFP254
+    // stack:  2c0c0_, x0_, x0
+    ADDFP254
+    // stack:          e0_, x0
+    SWAP4
+    SWAP1
+    // stack:               x0
+    DUP4
+    DUP1
+    MULFP254
+    DUP4
+    DUP1
+    MULFP254
+    SUBFP254
+    // stack: c0^2 - c0_^2, x0
+    ADDFP254
+    // stack:               e0
+    SWAP3
+
+    /// e1  = 2(c0c1  - c0_c1_) + x1
+    /// e1_ = 2(c0c1_ + c0_c1 ) + x1_
+    ///     where x1_, x1 = %i9 c2^2 - c2_^2, 2c2c2_
+    DUP7
+    DUP9
+    MULFP254
+    PUSH 2
+    MULFP254
+    DUP9
+    DUP1
+    MULFP254
+    DUP9
+    DUP1
+    MULFP254
+    SUBFP254
+    %i9
+    // stack:                    x1_, x1
+    DUP4
+    DUP4
+    MULFP254
+    DUP9
+    DUP7
+    MULFP254
+    ADDFP254
+    PUSH 2
+    MULFP254
+    // stack:  2(c0c1_ + c0_c1), x1_, x1
+    ADDFP254
+    // stack:                    e1_, x1
+    SWAP8
+    SWAP1
+    // stack:                         x1
+    DUP8
+    DUP4
+    MULFP254
+    DUP5
+    DUP7
+    MULFP254
+    SUBFP254
+    PUSH 2
+    MULFP254
+    // stack:      2(c0c1  - c0_c1_), x1
+    ADDFP254
+    SWAP7
+
+    /// e2  = 2(c0c2 - c0_c2_) + (c1^2 - c1_^2)
+    /// e2_ = 2(c0_c2 + c2c0_ + c1c1_)
+    DUP1
+    DUP1
+    MULFP254
+    DUP5
+    DUP1
+    MULFP254
+    SUBFP254
+    DUP11
+    DUP5
+    MULFP254
+    DUP4
+    DUP8
+    MULFP254
+    SUBFP254
+    PUSH 2
+    MULFP254
+    ADDFP254
+    // stack: e2
+    SWAP10
+    // stack: c2_, c1_, c2, c0_, c1, c0
+    SWAP4
+    MULFP254
+    // stack:   c1c1_, c2, c0_, c2_, c0
+    SWAP2
+    MULFP254
+    // stack:    c0_c2 , c1c1_, c2_, c0
+    ADDFP254
+    // stack:    c0_c2 + c1c1_, c2_, c0
+    SWAP2
+    MULFP254
+    // stack:     c0c2_ , c0_c2 + c1c1_
+    ADDFP254
+    // stack:     c0c2_ + c0_c2 + c1c1_
+    PUSH 2
+    MULFP254
+    // stack:                       e2_
+    SWAP6
+
+    // stack: retdest, e0, e0_, e1, e1_, e2, e2_
+    JUMP

From 3dc66a259fb89c19f79376f25a18b5472e7e74b3 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 15 Nov 2022 13:34:47 -0800
Subject: [PATCH 011/201] fp12 sq works

---
 .../cpu/kernel/asm/fields/field_macros.asm    | 22 +++++++++++++-
 evm/src/cpu/kernel/asm/fields/fp12_mul.asm    | 23 +++++++++-----
 evm/src/cpu/kernel/asm/fields/fp6_mul.asm     |  2 +-
 evm/src/cpu/kernel/tests/fields.rs            | 30 ++++++++++++++++++-
 4 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/field_macros.asm b/evm/src/cpu/kernel/asm/fields/field_macros.asm
index a24771fe..d34b6ee6 100644
--- a/evm/src/cpu/kernel/asm/fields/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/fields/field_macros.asm
@@ -677,4 +677,24 @@
     // stack  9b , a, 9a - b
     ADDFP254
     // stack: 9b + a, 9a - b 
-%endmacro
\ No newline at end of file
+%endmacro
+
+%macro sh
+    // stack: f0 , f0_, f1,  f1_, f2 , f2_
+    SWAP2
+    // stack: f1 , f0_, g0 , f1_, f2 , f2_
+    SWAP4
+    // stack: f2 , f0_, g0 , f1_, g1 , f2_
+    SWAP1
+    // stack: f0_, f2 , g0 , f1_, g1 , f2_
+    SWAP3
+    // stack: f1_, f2 , g0 , g0_, g1 , f2_
+    SWAP5
+    // stack: f2_, f2 , g0 , g0_, g1 , g1_
+    SWAP1 
+    // stack: f2 , f2_, g0 , g0_, g1 , g1_
+    %i9
+    // stack: g2_, g2 , g0 , g0_, g1 , g1_
+    SWAP1
+    // stack: g2 , g2_, g0 , g0_, g1 , g1_
+%endmacro
diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
index 491c20b0..690b1124 100644
--- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
@@ -23,7 +23,7 @@ global test_mul_fp12:
     // stack:                     ret_stack, inB, out, inA,       out
     SWAP3
     // stack:                           inA, inB, out, ret_stack, out
-    %jump(mul_fp12_sparse)
+    %jump(square_fp12_test)
 ret_stack:
     // stack:          out
     DUP1  %offset_fp6
@@ -36,6 +36,11 @@ ret_stack:
     // stack:   h, h', out
     %jump(0xdeadbeef)
 
+square_fp12_test:
+    POP
+    %jump(square_fp12)
+
+
 ///////////////////////////////////////
 ///// GENERAL FP12 MULTIPLICATION /////
 ///////////////////////////////////////
@@ -360,7 +365,7 @@ global mul_fp12_sparse:
 ///  swap  |   2 |  16 |   32
 ///  add   |   1 |  16 |   16
 ///  mul   |   1 | 157 |  157
-///  sq    |   2 |     |
+///  sq    |   2 | 101 |  202
 ///  dbl   |   1 |  13 |   13
 ///
 /// lone stack operations:
@@ -391,7 +396,7 @@ global mul_fp12_sparse:
 
 global square_fp12:
     // stack:                                                    inp, out
-    DUP1  %offset_fp6
+    DUP1
     // stack:                                               inp, inp, out
     %load_fp6 
     // stack:                                                 f, inp, out
@@ -427,17 +432,19 @@ post_mul:
     %jump(square_fp6)
 post_sq1:
     // stack:                                 f'f', inp, f, post_sq2, out
+    %sh
+    // stack:                             sh(f'f'), inp, f, post_sq2, out
     %swap_fp6_hole
-    // stack:                                 f, inp, f'f', post_sq2, out
+    // stack:                             f, inp, sh(f'f'), post_sq2, out
     SWAP6  SWAP13  SWAP6
-    // stack:                                 f, post_sq2, f'f', inp, out
+    // stack:                             f, post_sq2, sh(f'f'), inp, out
     %jump(square_fp6)
 post_sq2:
-    // stack:                                         ff , f'f', inp, out
+    // stack:                                     ff , sh(f'f'), inp, out
     %add_fp6
-    // stack:                                         ff + f'f', inp, out
+    // stack:                                     ff + sh(f'f'), inp, out
     DUP8
-    // stack:                                    out, ff + f'f', inp, out
+    // stack:                                out, ff + sh(f'f'), inp, out
     %store_fp6
     // stack:                                                    inp, out
     %pop2
diff --git a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
index 402ee970..384b7944 100644
--- a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
@@ -58,7 +58,6 @@
 /// e2  = c0d2  + c1d1  + c2d0  - (c0_d2_ + c1_d1_ + c2_d0_)
 /// e2_ = c0d2_ + c0_d2 + c1d1_ +  c1_d1  + c2d0_  + c2_d0
 
-
 // cost: 157
 global mul_fp6:
     // e2
@@ -299,6 +298,7 @@ global mul_fp6:
 /// e2  = 2(c0c2 - c0_c2_) + (c1^2 - c1_^2)
 /// e2_ = 2(c0_c2 + c2c0_) + 2c1c1_
 
+// cost: 101
 global square_fp6:
     /// e0  = (c0^2 - c0_^2) + x0
     /// e0_ = 2c0c0_ + x0_
diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs
index 693a12d0..89be577e 100644
--- a/evm/src/cpu/kernel/tests/fields.rs
+++ b/evm/src/cpu/kernel/tests/fields.rs
@@ -223,7 +223,7 @@ fn test_fp12() -> Result<()> {
     Ok(())
 }
 
-#[test]
+// #[test]
 fn test_fp12_sparse() -> Result<()> {
     let in1 = 64;
     let in2 = 76;
@@ -251,3 +251,31 @@ fn test_fp12_sparse() -> Result<()> {
 
     Ok(())
 }
+
+#[test]
+fn test_fp12_square() -> Result<()> {
+    let in1 = 64;
+    let in2 = 76;
+    let out = 88;
+
+    let f0 = gen_fp6();
+    let f1 = gen_fp6();
+
+    let initial_offset = KERNEL.global_labels["test_mul_fp12"];
+    let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, f0, f1);
+    let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
+        .stack()
+        .to_vec();
+
+    let mut output: Vec<u32> = mul_fp12([f0, f1], [f0, f1])
+        .into_iter()
+        .flatten()
+        .flatten()
+        .collect();
+    output.extend(vec![out]);
+    let expected = as_stack(output);
+
+    assert_eq!(final_stack, expected);
+
+    Ok(())
+}
\ No newline at end of file

From c1f7d1ce5a21723cf4958dc86dd01c458f920acd Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 15 Nov 2022 13:40:14 -0800
Subject: [PATCH 012/201] comments

---
 evm/src/cpu/kernel/asm/fields/fp12_mul.asm | 2 ++
 evm/src/cpu/kernel/asm/fields/fp6_mul.asm  | 2 +-
 evm/src/cpu/kernel/tests/fields.rs         | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
index 690b1124..b90f93d9 100644
--- a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp12_mul.asm
@@ -356,6 +356,8 @@ global mul_fp12_sparse:
 ///// FP12 SQUARING /////
 /////////////////////////
 
+/// cost: 646
+
 /// fp6 functions:
 ///  fn    | num | ops | cost
 ///  -------------------------
diff --git a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
index 384b7944..3cc563c8 100644
--- a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
+++ b/evm/src/cpu/kernel/asm/fields/fp6_mul.asm
@@ -1,5 +1,5 @@
 //////////////////////////////////////
-///// GENERAL FP6 MULFP254TIPLICATION /////
+///// GENERAL FP6 MULTIPLICATION /////
 //////////////////////////////////////
 
 /// inputs:
diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs
index 89be577e..1eb61c16 100644
--- a/evm/src/cpu/kernel/tests/fields.rs
+++ b/evm/src/cpu/kernel/tests/fields.rs
@@ -278,4 +278,4 @@ fn test_fp12_square() -> Result<()> {
     assert_eq!(final_stack, expected);
 
     Ok(())
-}
\ No newline at end of file
+}

From b779b82512eae7c4e5ce12fb681caff9dba7da57 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 16 Nov 2022 15:12:42 -0800
Subject: [PATCH 013/201] mul_fp2

---
 .../cpu/kernel/asm/fields/field_macros.asm    | 113 ++++++++++++------
 1 file changed, 77 insertions(+), 36 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/field_macros.asm b/evm/src/cpu/kernel/asm/fields/field_macros.asm
index d34b6ee6..c65e1170 100644
--- a/evm/src/cpu/kernel/asm/fields/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/fields/field_macros.asm
@@ -13,7 +13,33 @@
     // stack: ind0, x1
     %mload_kernel_general
     // stack:   x0, x1
-%endmacro
+%endmacro 
+
+%macro mul_fp2
+    // stack:          a, b, c, d
+    DUP4
+    DUP3
+    MULFP254
+    // stack:      bd, a, b, c, d
+    DUP4 
+    DUP3
+    MULFP254
+    // stack: ac , bd, a, b, c, d 
+    SUBFP254
+    // stack: ac - bd, a, b, c, d 
+    SWAP4
+    // stack: d, a, b, c, ac - bd
+    MULFP254
+    // stack:   ad, b, c, ac - bd
+    SWAP2
+    // stack:   c, b, ad, ac - bd
+    MULFP254
+    // stack:    bc , ad, ac - bd
+    ADDFP254
+    // stack:    bc + ad, ac - bd
+    SWAP1
+    // stack:    ac - bd, bc + ad
+%endmacro 
 
 // cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp6
@@ -354,6 +380,26 @@
     // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
 %endmacro
 
+%macro sh
+    // stack: f0 , f0_, f1,  f1_, f2 , f2_
+    SWAP2
+    // stack: f1 , f0_, g0 , f1_, f2 , f2_
+    SWAP4
+    // stack: f2 , f0_, g0 , f1_, g1 , f2_
+    SWAP1
+    // stack: f0_, f2 , g0 , f1_, g1 , f2_
+    SWAP3
+    // stack: f1_, f2 , g0 , g0_, g1 , f2_
+    SWAP5
+    // stack: f2_, f2 , g0 , g0_, g1 , g1_
+    SWAP1 
+    // stack: f2 , f2_, g0 , g0_, g1 , g1_
+    %i9
+    // stack: g2_, g2 , g0 , g0_, g1 , g1_
+    SWAP1
+    // stack: g2 , g2_, g0 , g0_, g1 , g1_
+%endmacro
+
 // cost: 16
 %macro add_fp6
     // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
@@ -472,6 +518,25 @@
     // stack: c * f0, c * f1, c * f2, c * f3, c * f4, c * f5
 %endmacro
 
+// cost: 9; note this returns y, x for the output x + yi
+%macro i9
+    // stack:          a , b
+    DUP2
+    // stack:      b,  a , b
+    DUP2
+    // stack:  a , b,  a , b
+    PUSH 9  MULFP254
+    // stack: 9a , b,  a , b
+    SUBFP254
+    // stack: 9a - b,  a , b
+    SWAP2 
+    // stack:  b , a, 9a - b
+    PUSH 9  MULFP254
+    // stack  9b , a, 9a - b
+    ADDFP254
+    // stack: 9b + a, 9a - b 
+%endmacro
+
 /// cost: 1 i9 (9) + 16 dups + 15 swaps + 12 muls + 6 adds/subs = 58
 ///
 /// G0 + G1t + G2t^2 = (a+bi)t * (F0 + F1t + F2t^2) 
@@ -660,41 +725,17 @@
     // stack:                   g0, g0_, g1, g1_, g2, g2_
 %endmacro
 
-// cost: 9; note this returns y, x for the output x + yi
-%macro i9
-    // stack:          a , b
-    DUP2
-    // stack:      b,  a , b
-    DUP2
-    // stack:  a , b,  a , b
-    PUSH 9  MULFP254
-    // stack: 9a , b,  a , b
+%macro frob_fp2
+    // stack:     n  , a, b
+    PUSH 2
+    MULFP254
+    PUSH 1
     SUBFP254
-    // stack: 9a - b,  a , b
-    SWAP2 
-    // stack:  b , a, 9a - b
-    PUSH 9  MULFP254
-    // stack  9b , a, 9a - b
-    ADDFP254
-    // stack: 9b + a, 9a - b 
-%endmacro
-
-%macro sh
-    // stack: f0 , f0_, f1,  f1_, f2 , f2_
+    // stack:  1-2n  , a, b 
+    DUP3
+    MULFP254
+    // stack: (1-2n)b, a, b
     SWAP2
-    // stack: f1 , f0_, g0 , f1_, f2 , f2_
-    SWAP4
-    // stack: f2 , f0_, g0 , f1_, g1 , f2_
-    SWAP1
-    // stack: f0_, f2 , g0 , f1_, g1 , f2_
-    SWAP3
-    // stack: f1_, f2 , g0 , g0_, g1 , f2_
-    SWAP5
-    // stack: f2_, f2 , g0 , g0_, g1 , g1_
-    SWAP1 
-    // stack: f2 , f2_, g0 , g0_, g1 , g1_
-    %i9
-    // stack: g2_, g2 , g0 , g0_, g1 , g1_
-    SWAP1
-    // stack: g2 , g2_, g0 , g0_, g1 , g1_
+    POP
+    // stack:          a, (1-2n)b
 %endmacro

From ca92057b01dc4287655880e3309689881e2acca7 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 16 Nov 2022 16:28:36 -0800
Subject: [PATCH 014/201] frob_fp6

---
 .../cpu/kernel/asm/fields/field_macros.asm    | 104 ++++++++++++------
 evm/src/cpu/kernel/asm/fields/frobenius.asm   |  93 ++++++++++++++++
 2 files changed, 163 insertions(+), 34 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/field_macros.asm b/evm/src/cpu/kernel/asm/fields/field_macros.asm
index c65e1170..73736194 100644
--- a/evm/src/cpu/kernel/asm/fields/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/fields/field_macros.asm
@@ -2,6 +2,8 @@
     %add_const(6)
 %endmacro
 
+// fp2 macros
+
 // cost: 2 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp2
     // stack:       ptr
@@ -15,6 +17,72 @@
     // stack:   x0, x1
 %endmacro 
 
+%macro conj
+    // stack: a,  b
+    SWAP1 
+    PUSH 0
+    SUBFP254
+    SWAP1
+    // stack: a, -b 
+%endmacro
+
+%macro swap_fp2
+    // stack: a , a_, b , b_
+    SWAP2
+    // stack: b , a_, a , b_
+    SWAP1
+    // stack: a_, b , a , b_
+    SWAP3
+    // stack: b_, b , a , a_
+    SWAP1 
+    // stack: b , b_, a , a_
+%endmacro
+
+%macro swap_fp2_hole_2
+    // stack: a , a_, X, b , b_
+    SWAP4
+    // stack: b , a_, X, a , b_
+    SWAP1
+    // stack: a_, b , X, a , b_
+    SWAP5
+    // stack: b_, b , X, a , a_
+    SWAP1 
+    // stack: b , b_, X, a , a_
+%endmacro
+
+%macro mul_fp_fp2
+    // stack:    c, x, y
+    SWAP2
+    // stack:    y, x, c 
+    DUP3
+    // stack: c, y, x, c
+    MULFP254
+    // stack:   cy, x, c
+    SWAP2
+    // stack:   c, x, cy
+    MULFP254
+    // stack:     cx, cy 
+%endmacro
+
+// cost: 9; note this returns y, x for the output x + yi
+%macro i9
+    // stack:          a , b
+    DUP2
+    // stack:      b,  a , b
+    DUP2
+    // stack:  a , b,  a , b
+    PUSH 9  MULFP254
+    // stack: 9a , b,  a , b
+    SUBFP254
+    // stack: 9a - b,  a , b
+    SWAP2 
+    // stack:  b , a, 9a - b
+    PUSH 9  MULFP254
+    // stack  9b , a, 9a - b
+    ADDFP254
+    // stack: 9b + a, 9a - b 
+%endmacro
+
 %macro mul_fp2
     // stack:          a, b, c, d
     DUP4
@@ -41,6 +109,8 @@
     // stack:    ac - bd, bc + ad
 %endmacro 
 
+// fp6 macros
+
 // cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp6
     // stack:                         ptr
@@ -518,25 +588,6 @@
     // stack: c * f0, c * f1, c * f2, c * f3, c * f4, c * f5
 %endmacro
 
-// cost: 9; note this returns y, x for the output x + yi
-%macro i9
-    // stack:          a , b
-    DUP2
-    // stack:      b,  a , b
-    DUP2
-    // stack:  a , b,  a , b
-    PUSH 9  MULFP254
-    // stack: 9a , b,  a , b
-    SUBFP254
-    // stack: 9a - b,  a , b
-    SWAP2 
-    // stack:  b , a, 9a - b
-    PUSH 9  MULFP254
-    // stack  9b , a, 9a - b
-    ADDFP254
-    // stack: 9b + a, 9a - b 
-%endmacro
-
 /// cost: 1 i9 (9) + 16 dups + 15 swaps + 12 muls + 6 adds/subs = 58
 ///
 /// G0 + G1t + G2t^2 = (a+bi)t * (F0 + F1t + F2t^2) 
@@ -724,18 +775,3 @@
     SWAP1
     // stack:                   g0, g0_, g1, g1_, g2, g2_
 %endmacro
-
-%macro frob_fp2
-    // stack:     n  , a, b
-    PUSH 2
-    MULFP254
-    PUSH 1
-    SUBFP254
-    // stack:  1-2n  , a, b 
-    DUP3
-    MULFP254
-    // stack: (1-2n)b, a, b
-    SWAP2
-    POP
-    // stack:          a, (1-2n)b
-%endmacro
diff --git a/evm/src/cpu/kernel/asm/fields/frobenius.asm b/evm/src/cpu/kernel/asm/fields/frobenius.asm
index e69de29b..1080dac4 100644
--- a/evm/src/cpu/kernel/asm/fields/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/fields/frobenius.asm
@@ -0,0 +1,93 @@
+/// def frob_fp6(n, C0, C1, C2):
+///     if n%2:
+///         D0, D1, D2 = C0`, FROB_t1[n] * C1`, FROB_t2[n] * C2`
+///     else: 
+///         D0, D1, D2 = C0 , FROB_t1[n] * C1 , FROB_t2[n] * C2
+///     return D0, D1, D2 
+
+%macro frob_fp6_1
+    // stack: C0 , C1 , C2
+    %conj
+    // stack: D0 , C1 , C2
+    %swap_fp2_hole_2
+    // stack: C2 , C1 , D0
+    %conj
+    // stack: C2`, C1 , D0
+    PUSH 0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126
+    PUSH 0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762
+    %mul_fp2
+    // stack: D2 , C1 , D0
+    %swap_fp2_hole_2
+    // stack: D0 , C1 , D2
+    %swap_fp2
+    // stack: C1 , D0 , D2
+    %conj
+    // stack: C1`, D0 , D2
+    PUSH 0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2
+    PUSH 0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d
+    %mul_fp2
+    // stack: D1 , D0 , D2
+    %swap_fp2
+    // stack: D0 , D1 , D2
+%endmacro
+
+%macro frob_fp6_2
+    // stack: C0, C1, C2
+    %swap_fp2_hole_2
+    // stack: C2, C1, C0
+    PUSH 0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe
+    %mul_fp_fp2
+    // stack: D2, C1, C0
+    %swap_fp2_hole_2
+    // stack: C0, C1, D2
+    %swap_fp2
+    // stack: C1, C0, D2
+    PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48
+    %mul_fp_fp2
+    // stack: D1, C0, D2
+    %swap_fp2
+    // stack: D0, D1, D2
+%endmacro
+
+%macro frob_fp6_3
+    // stack: C0 , C1 , C2
+    %conj
+    // stack: D0 , C1 , C2
+    %swap_fp2_hole_2
+    // stack: C2 , C1 , D0
+    %conj
+    // stack: C2`, C1 , D0
+    PUSH 0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f
+    PUSH 0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066
+    %mul_fp2
+    // stack: D2 , C1 , D0
+    %swap_fp2_hole_2
+    // stack: D0 , C1 , D2
+    %swap_fp2
+    // stack: C1 , D0 , D2
+    %conj
+    // stack: C1`, D0 , D2
+    PUSH 0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de
+    PUSH 0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d
+    %mul_fp2
+    // stack: D1 , D0 , D2
+    %swap_fp2
+    // stack: D0 , D1 , D2
+%endmacro
+
+
+/// def Fp12_frob(n, f, f'):
+/// return                frob_fp6(n, f ),
+///           FROB_z[n] * frob_fp6(n, f')
+
+global frob_fp12_1:
+
+
+global frob_fp12_2:
+
+
+global frob_fp12_3:
+
+
+global frob_fp12_6:
+    

From 8a7d6c6529b24f663267d9b0fab9e8f2e2112971 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 16 Nov 2022 16:56:23 -0800
Subject: [PATCH 015/201] frob_fp12

---
 evm/src/cpu/kernel/asm/fields/frobenius.asm | 120 ++++++++++++++++++--
 1 file changed, 112 insertions(+), 8 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/frobenius.asm b/evm/src/cpu/kernel/asm/fields/frobenius.asm
index 1080dac4..9042ca6f 100644
--- a/evm/src/cpu/kernel/asm/fields/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/fields/frobenius.asm
@@ -1,4 +1,4 @@
-/// def frob_fp6(n, C0, C1, C2):
+/// def frob_fp6_n(C0, C1, C2):
 ///     if n%2:
 ///         D0, D1, D2 = C0`, FROB_t1[n] * C1`, FROB_t2[n] * C2`
 ///     else: 
@@ -76,18 +76,122 @@
 %endmacro
 
 
-/// def Fp12_frob(n, f, f'):
-/// return                frob_fp6(n, f ),
-///           FROB_z[n] * frob_fp6(n, f')
+/// def Fp12_frob_n(f, f'):
+///     g  =             frob_fp6(n, f )
+///     g' = FROB_z[n] * frob_fp6(n, f')
+///     return g, g'
 
 global frob_fp12_1:
-
+    // stack:           ptr, retdest
+    DUP1
+    // stack:      ptr, ptr, retdest 
+    %load_fp6
+    // stack:        f, ptr, retdest
+    %frob_fp6_1
+    // stack:        g, ptr, retdest
+    DUP7
+    // stack:   ptr, g, ptr, retdest
+    %load_fp6
+    // stack:           ptr, retdest
+    DUP1  %offset_fp6
+    // stack:     ptr', ptr, retdest
+    %load_fp6
+    // stack:       f', ptr, retdest
+    %frobz_1
+    // stack:       g', ptr, retdest
+    DUP1  %offset_fp6
+    // stack: ptr', g', ptr, retdest
+    %store_fp6
+    // stack:           ptr, retdest
+    SWAP1
+    JUMP
 
 global frob_fp12_2:
-
+    // stack:           ptr, retdest
+    DUP1
+    // stack:      ptr, ptr, retdest 
+    %load_fp6
+    // stack:        f, ptr, retdest
+    %frob_fp6_2
+    // stack:        g, ptr, retdest
+    DUP7
+    // stack:   ptr, g, ptr, retdest
+    %load_fp6
+    // stack:           ptr, retdest
+    DUP1  %offset_fp6
+    // stack:     ptr', ptr, retdest
+    %load_fp6
+    // stack:       f', ptr, retdest
+    %frobz_2
+    // stack:       g', ptr, retdest
+    DUP1  %offset_fp6
+    // stack: ptr', g', ptr, retdest
+    %store_fp6
+    // stack:           ptr, retdest
+    SWAP1
+    JUMP
 
 global frob_fp12_3:
-
+    // stack:           ptr, retdest
+    DUP1
+    // stack:      ptr, ptr, retdest 
+    %load_fp6
+    // stack:        f, ptr, retdest
+    %frob_fp6_3
+    // stack:        g, ptr, retdest
+    DUP7
+    // stack:   ptr, g, ptr, retdest
+    %load_fp6
+    // stack:           ptr, retdest
+    DUP1  %offset_fp6
+    // stack:     ptr', ptr, retdest
+    %load_fp6
+    // stack:       f', ptr, retdest
+    %frobz_3
+    // stack:       g', ptr, retdest
+    DUP1  %offset_fp6
+    // stack: ptr', g', ptr, retdest
+    %store_fp6
+    // stack:           ptr, retdest
+    SWAP1
+    JUMP
 
 global frob_fp12_6:
-    
+    // stack:           ptr, retdest
+    DUP1  %offset_fp6
+    // stack:     ptr', ptr, retdest
+    %load_fp6
+    // stack:       f', ptr, retdest
+    %frobz_6
+    // stack:       g', ptr, retdest
+    DUP7  %offset_fp6
+    // stack: ptr', g', ptr, retdest
+    %store_fp6
+    // stack:           ptr, retdest
+    SWAP1
+    JUMP
+
+%macro frobz_1
+    %frob_fp6_1
+    PUSH 0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac
+    PUSH 0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470
+    %mul_fp2_fp6
+%endmacro
+
+%macro frobz_2
+    %frob_fp6_2
+    PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49
+    %mul_fp_fp6
+%endmacro
+
+%macro frobz_3
+    %frob_fp6_3
+    PUSH 0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101
+    PUSH 0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f
+    %mul_fp2_fp6
+%endmacro
+
+%macro frobz_6
+    PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46
+    %mul_fp_fp6
+%endmacro

From c83dcccada92a1e93faf462ea0d96df6643d9812 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 16 Nov 2022 17:00:56 -0800
Subject: [PATCH 016/201] macros

---
 evm/src/cpu/kernel/asm/fields/frobenius.asm | 58 +++++++++++++++------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/fields/frobenius.asm b/evm/src/cpu/kernel/asm/fields/frobenius.asm
index 9042ca6f..fbca28b3 100644
--- a/evm/src/cpu/kernel/asm/fields/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/fields/frobenius.asm
@@ -13,9 +13,7 @@
     // stack: C2 , C1 , D0
     %conj
     // stack: C2`, C1 , D0
-    PUSH 0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126
-    PUSH 0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762
-    %mul_fp2
+    %froby2_1
     // stack: D2 , C1 , D0
     %swap_fp2_hole_2
     // stack: D0 , C1 , D2
@@ -23,9 +21,7 @@
     // stack: C1 , D0 , D2
     %conj
     // stack: C1`, D0 , D2
-    PUSH 0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2
-    PUSH 0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d
-    %mul_fp2
+    %froby1_1
     // stack: D1 , D0 , D2
     %swap_fp2
     // stack: D0 , D1 , D2
@@ -35,15 +31,13 @@
     // stack: C0, C1, C2
     %swap_fp2_hole_2
     // stack: C2, C1, C0
-    PUSH 0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe
-    %mul_fp_fp2
+    %froby2_2
     // stack: D2, C1, C0
     %swap_fp2_hole_2
     // stack: C0, C1, D2
     %swap_fp2
     // stack: C1, C0, D2
-    PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48
-    %mul_fp_fp2
+    %froby1_2
     // stack: D1, C0, D2
     %swap_fp2
     // stack: D0, D1, D2
@@ -57,9 +51,7 @@
     // stack: C2 , C1 , D0
     %conj
     // stack: C2`, C1 , D0
-    PUSH 0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f
-    PUSH 0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066
-    %mul_fp2
+    %froby2_3
     // stack: D2 , C1 , D0
     %swap_fp2_hole_2
     // stack: D0 , C1 , D2
@@ -67,9 +59,7 @@
     // stack: C1 , D0 , D2
     %conj
     // stack: C1`, D0 , D2
-    PUSH 0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de
-    PUSH 0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d
-    %mul_fp2
+    %froby1_3
     // stack: D1 , D0 , D2
     %swap_fp2
     // stack: D0 , D1 , D2
@@ -195,3 +185,39 @@ global frob_fp12_6:
     PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46
     %mul_fp_fp6
 %endmacro
+
+
+%macro froby1_1
+    PUSH 0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2
+    PUSH 0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d
+    %mul_fp2
+%endmacro
+
+%macro froby2_1
+    PUSH 0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126
+    PUSH 0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762
+    %mul_fp2
+%endmacro
+
+%macro froby1_2
+    PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48
+    %mul_fp_fp2
+%endmacro
+
+%macro froby2_2
+    PUSH 0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe
+    %mul_fp_fp2
+%endmacro
+
+
+%macro froby1_3
+    PUSH 0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f
+    PUSH 0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066
+    %mul_fp2
+%endmacro
+
+%macro froby2_3
+    PUSH 0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f
+    PUSH 0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066
+    %mul_fp2
+%endmacro

From 68cde336a3e614971d3489d9fc763874603e4684 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 17 Nov 2022 13:18:13 -0800
Subject: [PATCH 017/201] reorg

---
 evm/src/cpu/kernel/aggregator.rs              | 12 +++---
 .../{ => curve_arithmetic}/curve_add.asm      |  2 +-
 .../{ => curve_arithmetic}/curve_mul.asm      |  0
 .../curve/bn254/curve_arithmetic/lines.asm    | 16 ++++++++
 .../bn254/curve_arithmetic/tate_pairing.asm   | 41 +++++++++++++++++++
 .../bn254/field_arithmetic}/field_macros.asm  |  0
 .../bn254/field_arithmetic}/fp12_mul.asm      |  0
 .../bn254/field_arithmetic}/fp6_mul.asm       |  0
 .../bn254/field_arithmetic}/frobenius.asm     |  0
 .../bn254/{ => field_arithmetic}/moddiv.asm   |  0
 10 files changed, 64 insertions(+), 7 deletions(-)
 rename evm/src/cpu/kernel/asm/curve/bn254/{ => curve_arithmetic}/curve_add.asm (99%)
 rename evm/src/cpu/kernel/asm/curve/bn254/{ => curve_arithmetic}/curve_mul.asm (100%)
 create mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
 create mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
 rename evm/src/cpu/kernel/asm/{fields => curve/bn254/field_arithmetic}/field_macros.asm (100%)
 rename evm/src/cpu/kernel/asm/{fields => curve/bn254/field_arithmetic}/fp12_mul.asm (100%)
 rename evm/src/cpu/kernel/asm/{fields => curve/bn254/field_arithmetic}/fp6_mul.asm (100%)
 rename evm/src/cpu/kernel/asm/{fields => curve/bn254/field_arithmetic}/frobenius.asm (100%)
 rename evm/src/cpu/kernel/asm/curve/bn254/{ => field_arithmetic}/moddiv.asm (100%)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 412a9346..f973d7ce 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -23,9 +23,9 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/core/terminate.asm"),
         include_str!("asm/core/transfer.asm"),
         include_str!("asm/core/util.asm"),
-        include_str!("asm/curve/bn254/curve_add.asm"),
-        include_str!("asm/curve/bn254/curve_mul.asm"),
-        include_str!("asm/curve/bn254/moddiv.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/moddiv.asm"),
         include_str!("asm/curve/common.asm"),
         include_str!("asm/curve/secp256k1/curve_mul.asm"),
         include_str!("asm/curve/secp256k1/curve_add.asm"),
@@ -34,9 +34,9 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/secp256k1/lift_x.asm"),
         include_str!("asm/curve/secp256k1/moddiv.asm"),
         include_str!("asm/exp.asm"),
-        include_str!("asm/fields/field_macros.asm"),
-        include_str!("asm/fields/fp6_mul.asm"),
-        include_str!("asm/fields/fp12_mul.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/field_macros.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
         include_str!("asm/halt.asm"),
         include_str!("asm/main.asm"),
         include_str!("asm/memory/core.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
similarity index 99%
rename from evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
index dda82109..dae060c2 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@@ -310,4 +310,4 @@ global ec_double:
     SWAP2
     // stack: retdest, u256::MAX, u256::MAX
     JUMP
-%endmacro
\ No newline at end of file
+%endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
new file mode 100644
index 00000000..01adc040
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
@@ -0,0 +1,16 @@
+/// p1, p2 : [Fp; 2], q : [Fp2; 2]
+
+/// def cord(p1x, p1y, p2x, p2y, qx, qy):
+///     return sparse_embed(
+///         p1y*p2x - p2y*p1x, 
+///         (p2y - p1y) * qx, 
+///         (p1x - p2x) * qy,
+///     )
+    
+/// def tangent(px, py, qx, qy):
+///     return sparse_embed(
+///         -9 + py**2, 
+///         (-3*px**2) * qx, 
+///         (2*py)     * qy,
+///     )
+
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
new file mode 100644
index 00000000..98167675
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -0,0 +1,41 @@
+/// def tate(P : [Fp; 2], Q: [Fp2; 2]) -> Fp12:
+///     out = miller_loop(P)
+///
+///     inv = inverse_fp12(out)
+///     out = frob_fp12_6(out)
+///     out = mul_fp12(out, inv)
+///
+///     acx = frob_fp12_2(out)
+///     out = mul_fp12(acx, out)
+///
+///     pow = fast_exp(out)
+///     out = frob_fp12_3(out)
+///     return mul_fp12(out, pow)
+
+global tate:
+    // stack:         ptr, out
+    PUSH 1
+    // stack:      1, ptr, out
+    PUSH 100
+    // stack: 100, 1, ptr, out
+    %mstore_kernel_general
+
+
+/// def miller_loop(P):
+///     out = 1
+///     O = P
+///     for i in EXP[1:-1]:
+///         out = square_fp12(out)
+///         line = tangent(O, Q)
+///         out = mul_fp12_sparse(out, line)
+///         O += O
+///         if i:
+///             line = cord(P, O, Q)
+///             out = mul_fp12_sparse(out, line)
+///             O += P
+///     out = square_fp12(out)
+///     line = tangent(O, Q)
+///     return mul_fp12_sparse(out, line)
+
+
+
diff --git a/evm/src/cpu/kernel/asm/fields/field_macros.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/fields/field_macros.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
diff --git a/evm/src/cpu/kernel/asm/fields/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/fields/fp12_mul.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
diff --git a/evm/src/cpu/kernel/asm/fields/fp6_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp6_mul.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/fields/fp6_mul.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp6_mul.asm
diff --git a/evm/src/cpu/kernel/asm/fields/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/fields/frobenius.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/moddiv.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/curve/bn254/moddiv.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm

From 84c1954daf49008e6ccc9f6308c310b1fbddcc72 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 18 Nov 2022 16:48:11 -0800
Subject: [PATCH 018/201] tate

---
 .../bn254/curve_arithmetic/tate_pairing.asm   | 80 ++++++++++++++++---
 .../curve/bn254/field_arithmetic/fp12_mul.asm | 31 ++++---
 evm/src/cpu/kernel/tests/fields.rs            |  2 +-
 3 files changed, 82 insertions(+), 31 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 98167675..a109796c 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -1,30 +1,80 @@
 /// def tate(P : [Fp; 2], Q: [Fp2; 2]) -> Fp12:
-///     out = miller_loop(P)
+///     out = miller_loop(P, Q)
 ///
 ///     inv = inverse_fp12(out)
 ///     out = frob_fp12_6(out)
 ///     out = mul_fp12(out, inv)
 ///
-///     acx = frob_fp12_2(out)
-///     out = mul_fp12(acx, out)
+///     acc = frob_fp12_2(out)
+///     out = mul_fp12(out, acc)
 ///
 ///     pow = fast_exp(out)
-///     out = frob_fp12_3(out)
-///     return mul_fp12(out, pow)
+///     out = frob_fp12_3(out) 
+///     out = mul_fp12(out, pow)
+///
+///     return out
 
 global tate:
-    // stack:         ptr, out
-    PUSH 1
-    // stack:      1, ptr, out
+    // stack:                     ptr, out,                                             retdest
+    PUSH tate_mul3   SWAP2 
+    // stack:                     out, ptr,                                  tate_mul3, retdest
+    PUSH tate_mul2   SWAP2 
+    // stack:                     ptr, out,                       tate_mul2, tate_mul3, retdest
+    PUSH tate_mul1   SWAP2
+    // stack:                     out, ptr,            tate_mul1, tate_mul2, tate_mul3, retdest
+    PUSH post_mllr   SWAP2 
+    // stack:                     ptr, out, post_mllr, tate_mul1, tate_mul2, tate_mul3, retdest
+    %jump(miller_loop)
+post_mllr:
+    // stack:                          out,            tate_mul1, tate_mul2, tate_mul3, retdest
+    PUSH 100 
+    // stack:                     100, out,            tate_mul1, tate_mul2, tate_mul3, retdest
+    DUP2
+    // stack:                out, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest
+    %inverse_fp12
+    // stack:                     100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
+    PUSH mul_fp12
+    // stack:           mul_fp12, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
+    DUP3
+    // stack:      out, mul_fp12, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
+    %jump(frob_fp12_6)
+tate_mul1:
+    // stack:                          out,                       tate_mul2, tate_mul3, retdest  {100: inv}
+    DUP1
+    // stack:                     out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
+    PUSH mul_fp12
+    // stack:           mul_fp12, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
     PUSH 100
-    // stack: 100, 1, ptr, out
-    %mstore_kernel_general
+    // stack:      100, mul_fp12, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}       
+    DUP3
+    // stack: out, 100, mul_fp12, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
+    %jump(frob_fp12_2)
+tate_mul2: 
+    // stack:                          out,                                  tate_mul3, retdest  {100: acc}
+    PUSH post_pow
+    // stack:                post_pow, out,                                  tate_mul3, retdest  {100: acc}
+    PUSH 100
+    // stack:           100, post_pow, out,                                  tate_mul3, retdest  {100: acc}
+    DUP1
+    // stack:      out, 100, post_pow, out,                                  tate_mul3, retdest  {100: acc}
+    %jump(power)
+post_pow: 
+    // stack:                     100, out,                                  tate_mul3, retdest  {100: pow}
+    PUSH mul_fp12
+    // stack:           mul_fp12, 100, out,                                  tate_mul3, retdest  {100: pow}
+    DUP3 
+    // stack:      out, mul_fp12, 100, out,                                  tate_mul3, retdest  {100: pow}
+    %jump(frob_fp12_3)
+tate_mul3:
+    // stack:                          out,                                             retdest  {100: pow}
+    SWAP1
+    JUMP
 
 
-/// def miller_loop(P):
+/// def miller_loop(P, Q):
 ///     out = 1
 ///     O = P
-///     for i in EXP[1:-1]:
+///     for i in EXP:
 ///         out = square_fp12(out)
 ///         line = tangent(O, Q)
 ///         out = mul_fp12_sparse(out, line)
@@ -36,6 +86,10 @@ global tate:
 ///     out = square_fp12(out)
 ///     line = tangent(O, Q)
 ///     return mul_fp12_sparse(out, line)
+///
+/// EXP is the binary expansion of the BN254 prime
 
+global miller_loop:
+    // stack: ptr, out, retdest
 
-
+    // stack:      out
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index b90f93d9..53e13153 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -2,27 +2,27 @@
 
 /// cost: 220
 global test_mul_fp12:
-    // stack:      f, inA , f', g, inB , g', inB, out, inA,       out
+    // stack:      f, inA , f', g, inB , g', inB, out, inA
     DUP7
-    // stack: inA, f, inA , f', g, inB , g', inB, out, inA,       out
+    // stack: inA, f, inA , f', g, inB , g', inB, out, inA
     %store_fp6
-    // stack:         inA , f', g, inB , g', inB, out, inA,       out
+    // stack:         inA , f', g, inB , g', inB, out, inA
     %offset_fp6
-    // stack:         inA', f', g, inB , g', inB, out, inA,       out
+    // stack:         inA', f', g, inB , g', inB, out, inA
     %store_fp6
-    // stack:                   g, inB , g', inB, out, inA,       out
+    // stack:                   g, inB , g', inB, out, inA
     DUP7
-    // stack:              inB, g, inB , g', inB, out, inA,       out
+    // stack:              inB, g, inB , g', inB, out, inA
     %store_fp6
-    // stack:                      inB , g', inB, out, inA,       out
+    // stack:                      inB , g', inB, out, inA
     %offset_fp6
-    // stack:                      inB', g', inB, out, inA,       out
+    // stack:                      inB', g', inB, out, inA
     %store_fp6
-    // stack:                                inB, out, inA,       out
+    // stack:                                inB, out, inA
     PUSH ret_stack
-    // stack:                     ret_stack, inB, out, inA,       out
+    // stack:                     ret_stack, inB, out, inA
     SWAP3
-    // stack:                           inA, inB, out, ret_stack, out
+    // stack:                           inA, inB, out, ret_stack
     %jump(square_fp12_test)
 ret_stack:
     // stack:          out
@@ -173,8 +173,7 @@ ret_3:
     // stack:                 out, sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %store_fp6
     // stack:                                     inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %pop2  
-    JUMP
+    POP  SWAP1  JUMP
 
 
 //////////////////////////////////////
@@ -304,8 +303,7 @@ global mul_fp12_sparse:
     // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
     %store_fp6
     // stack:                                                                    inA, inB, out
-    %pop3
-    JUMP
+    %pop2  SWAP1  JUMP
 
 /// global mul_fp12_sparse_fast:
 ///    // stack:                                                            inA, inB, out
@@ -449,5 +447,4 @@ post_sq2:
     // stack:                                out, ff + sh(f'f'), inp, out
     %store_fp6
     // stack:                                                    inp, out
-    %pop2
-    JUMP
+    POP  SWAP1  JUMP
diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs
index 1eb61c16..8528f6c7 100644
--- a/evm/src/cpu/kernel/tests/fields.rs
+++ b/evm/src/cpu/kernel/tests/fields.rs
@@ -188,7 +188,7 @@ fn make_initial_stack(
     input.extend(g0);
     input.extend(vec![in2]);
     input.extend(g1);
-    input.extend(vec![in2, out, in1, out]);
+    input.extend(vec![in2, out, in1]);
 
     as_stack(input)
 }

From 9a68574e481975ae4579e0e6c995d91d8008ad15 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 18 Nov 2022 16:58:39 -0800
Subject: [PATCH 019/201] frob macro

---
 .../bn254/curve_arithmetic/tate_pairing.asm   |  37 +++---
 .../bn254/field_arithmetic/frobenius.asm      | 108 +++++++++---------
 2 files changed, 70 insertions(+), 75 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index a109796c..01dfb7ea 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -33,42 +33,41 @@ post_mllr:
     // stack:                out, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest
     %inverse_fp12
     // stack:                     100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
-    PUSH mul_fp12
-    // stack:           mul_fp12, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
-    DUP3
-    // stack:      out, mul_fp12, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
-    %jump(frob_fp12_6)
+    DUP2
+    // stack:                out, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
+    %frob_fp12_6
+    // stack:                out, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
+    %jump(mul_fp12)
 tate_mul1:
     // stack:                          out,                       tate_mul2, tate_mul3, retdest  {100: inv}
     DUP1
     // stack:                     out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
-    PUSH mul_fp12
-    // stack:           mul_fp12, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
     PUSH 100
-    // stack:      100, mul_fp12, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}       
-    DUP3
-    // stack: out, 100, mul_fp12, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
-    %jump(frob_fp12_2)
+    // stack:                100, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}       
+    DUP2
+    // stack:           out, 100, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
+    %frob_fp12_2
+    // stack:                100, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
+    %jump(mul_fp12)
 tate_mul2: 
     // stack:                          out,                                  tate_mul3, retdest  {100: acc}
     PUSH post_pow
     // stack:                post_pow, out,                                  tate_mul3, retdest  {100: acc}
     PUSH 100
     // stack:           100, post_pow, out,                                  tate_mul3, retdest  {100: acc}
-    DUP1
+    DUP3
     // stack:      out, 100, post_pow, out,                                  tate_mul3, retdest  {100: acc}
     %jump(power)
 post_pow: 
     // stack:                     100, out,                                  tate_mul3, retdest  {100: pow}
-    PUSH mul_fp12
-    // stack:           mul_fp12, 100, out,                                  tate_mul3, retdest  {100: pow}
-    DUP3 
-    // stack:      out, mul_fp12, 100, out,                                  tate_mul3, retdest  {100: pow}
-    %jump(frob_fp12_3)
+    DUP2
+    // stack:                out, 100, out,                                  tate_mul3, retdest  {100: pow}
+    %frob_fp12_3
+    // stack:                out, 100, out,                                  tate_mul3, retdest  {100: pow}
+    %jump(mul_fp12)
 tate_mul3:
     // stack:                          out,                                             retdest  {100: pow}
-    SWAP1
-    JUMP
+    SWAP1  JUMP
 
 
 /// def miller_loop(P, Q):
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index fbca28b3..6bab7ab0 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -71,95 +71,91 @@
 ///     g' = FROB_z[n] * frob_fp6(n, f')
 ///     return g, g'
 
-global frob_fp12_1:
-    // stack:           ptr, retdest
+%macro frob_fp12_1
+    // stack:           ptr
     DUP1
-    // stack:      ptr, ptr, retdest 
+    // stack:      ptr, ptr 
     %load_fp6
-    // stack:        f, ptr, retdest
+    // stack:        f, ptr
     %frob_fp6_1
-    // stack:        g, ptr, retdest
+    // stack:        g, ptr
     DUP7
-    // stack:   ptr, g, ptr, retdest
-    %load_fp6
-    // stack:           ptr, retdest
+    // stack:   ptr, g, ptr
+    %store_fp6
+    // stack:           ptr
     DUP1  %offset_fp6
-    // stack:     ptr', ptr, retdest
+    // stack:     ptr', ptr
     %load_fp6
-    // stack:       f', ptr, retdest
+    // stack:       f', ptr
     %frobz_1
-    // stack:       g', ptr, retdest
+    // stack:       g', ptr
     DUP1  %offset_fp6
-    // stack: ptr', g', ptr, retdest
+    // stack: ptr', g', ptr
     %store_fp6
-    // stack:           ptr, retdest
-    SWAP1
-    JUMP
+    // stack:           ptr
+%endmacro 
 
-global frob_fp12_2:
-    // stack:           ptr, retdest
+%macro frob_fp12_2
+    // stack:           ptr , out
     DUP1
-    // stack:      ptr, ptr, retdest 
+    // stack:      ptr, ptr , out
     %load_fp6
-    // stack:        f, ptr, retdest
+    // stack:        f, ptr , out
     %frob_fp6_2
-    // stack:        g, ptr, retdest
-    DUP7
-    // stack:   ptr, g, ptr, retdest
+    // stack:        g, ptr , out
+    DUP8
+    // stack:   out, g, ptr , out
+    %store_fp6 
+    // stack:           ptr , out
+    %offset_fp6
+    // stack:           ptr', out
     %load_fp6
-    // stack:           ptr, retdest
-    DUP1  %offset_fp6
-    // stack:     ptr', ptr, retdest
-    %load_fp6
-    // stack:       f', ptr, retdest
+    // stack:             f', out
     %frobz_2
-    // stack:       g', ptr, retdest
+    // stack:             g', out
     DUP1  %offset_fp6
-    // stack: ptr', g', ptr, retdest
+    // stack:       out', g', out
     %store_fp6
-    // stack:           ptr, retdest
-    SWAP1
-    JUMP
+    // stack:                 out
+%endmacro 
 
-global frob_fp12_3:
-    // stack:           ptr, retdest
+%macro frob_fp12_3
+    // stack:           ptr
     DUP1
-    // stack:      ptr, ptr, retdest 
+    // stack:      ptr, ptr 
     %load_fp6
-    // stack:        f, ptr, retdest
+    // stack:        f, ptr
     %frob_fp6_3
-    // stack:        g, ptr, retdest
+    // stack:        g, ptr
     DUP7
-    // stack:   ptr, g, ptr, retdest
-    %load_fp6
-    // stack:           ptr, retdest
+    // stack:   ptr, g, ptr
+    %store_fp6
+    // stack:           ptr
     DUP1  %offset_fp6
-    // stack:     ptr', ptr, retdest
+    // stack:     ptr', ptr
     %load_fp6
-    // stack:       f', ptr, retdest
+    // stack:       f', ptr
     %frobz_3
-    // stack:       g', ptr, retdest
+    // stack:       g', ptr
     DUP1  %offset_fp6
-    // stack: ptr', g', ptr, retdest
+    // stack: ptr', g', ptr
     %store_fp6
-    // stack:           ptr, retdest
-    SWAP1
-    JUMP
+    // stack:           ptr
+%endmacro 
 
-global frob_fp12_6:
-    // stack:           ptr, retdest
+%macro frob_fp12_6:
+    // stack:           ptr
     DUP1  %offset_fp6
-    // stack:     ptr', ptr, retdest
+    // stack:     ptr', ptr
     %load_fp6
-    // stack:       f', ptr, retdest
+    // stack:       f', ptr
     %frobz_6
-    // stack:       g', ptr, retdest
+    // stack:       g', ptr
     DUP7  %offset_fp6
-    // stack: ptr', g', ptr, retdest
+    // stack: ptr', g', ptr
     %store_fp6
-    // stack:           ptr, retdest
-    SWAP1
-    JUMP
+    // stack:           ptr
+%endmacro
 
 %macro frobz_1
     %frob_fp6_1

From 6b34f4ff0afd00d41d292d47b870dee471d2243a Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 21 Nov 2022 14:41:21 -0800
Subject: [PATCH 020/201] alphabetical

---
 evm/src/cpu/kernel/aggregator.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index a29afb95..24a01d09 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -27,6 +27,9 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/moddiv.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/field_macros.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
         include_str!("asm/curve/common.asm"),
         include_str!("asm/curve/secp256k1/curve_mul.asm"),
         include_str!("asm/curve/secp256k1/curve_add.asm"),
@@ -35,9 +38,6 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/secp256k1/lift_x.asm"),
         include_str!("asm/curve/secp256k1/moddiv.asm"),
         include_str!("asm/exp.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/field_macros.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
         include_str!("asm/halt.asm"),
         include_str!("asm/main.asm"),
         include_str!("asm/memory/core.asm"),

From c3dcdfd540fcddeeec9923eb0035745adee18092 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 28 Nov 2022 14:56:36 -0800
Subject: [PATCH 021/201] tate

---
 evm/src/cpu/kernel/aggregator.rs              |   1 +
 .../curve/bn254/curve_arithmetic/lines.asm    |  16 ---
 .../bn254/curve_arithmetic/tate_pairing.asm   | 108 +++++++++++++++---
 evm/src/cpu/kernel/asm/ripemd/box.asm         |   2 +-
 4 files changed, 94 insertions(+), 33 deletions(-)
 delete mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 24a01d09..64aef216 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -30,6 +30,7 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/field_arithmetic/field_macros.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
         include_str!("asm/curve/common.asm"),
         include_str!("asm/curve/secp256k1/curve_mul.asm"),
         include_str!("asm/curve/secp256k1/curve_add.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
deleted file mode 100644
index 01adc040..00000000
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
+++ /dev/null
@@ -1,16 +0,0 @@
-/// p1, p2 : [Fp; 2], q : [Fp2; 2]
-
-/// def cord(p1x, p1y, p2x, p2y, qx, qy):
-///     return sparse_embed(
-///         p1y*p2x - p2y*p1x, 
-///         (p2y - p1y) * qx, 
-///         (p1x - p2x) * qy,
-///     )
-    
-/// def tangent(px, py, qx, qy):
-///     return sparse_embed(
-///         -9 + py**2, 
-///         (-3*px**2) * qx, 
-///         (2*py)     * qy,
-///     )
-
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 01dfb7ea..7fda5f24 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -24,7 +24,7 @@ global tate:
     // stack:                     out, ptr,            tate_mul1, tate_mul2, tate_mul3, retdest
     PUSH post_mllr   SWAP2 
     // stack:                     ptr, out, post_mllr, tate_mul1, tate_mul2, tate_mul3, retdest
-    %jump(miller_loop)
+    %jump(miller_init)
 post_mllr:
     // stack:                          out,            tate_mul1, tate_mul2, tate_mul3, retdest
     PUSH 100 
@@ -70,25 +70,101 @@ tate_mul3:
     SWAP1  JUMP
 
 
-/// def miller_loop(P, Q):
+/// def miller(P, Q):
+///     miller_init()
+///     miller_loop()
+///
+/// def miller_init():
 ///     out = 1
 ///     O = P
-///     for i in EXP:
-///         out = square_fp12(out)
-///         line = tangent(O, Q)
-///         out = mul_fp12_sparse(out, line)
-///         O += O
-///         if i:
-///             line = cord(P, O, Q)
-///             out = mul_fp12_sparse(out, line)
-///             O += P
+///     times = 62
+///
+/// def miller_loop():
+///     while times:
+///         n_m = fetch_times()
+///         while n_m > 10:
+///             mul_tangent()
+///             mul_cord()
+///             n_m - 10
+///         while n_n:
+///             mul_tangent()
+///             n_m - 1
+///         times -= 1
+             
+/// Note: miller_data is formed by
+/// (1) taking the binary expansion of the BN254 prime p
+/// (2) popping the head and appending a 0:
+///     exp = bin(p)[1:-1] + [0]
+/// (3) counting the lengths of 1s and 0s in exp, e.g.
+///     exp = 1100010011110 => EXP = [(2,3), (1,2), (4,1)]
+/// (4) encoding each pair (n,m) as 10*n+m:
+///     miller_data = [10*n + m for (n,m) in EXP]
+
+miller_init:
+    // stack:         ptr, out, retdest
+    PUSH 1
+    // stack:      1, ptr, out, retdest
+    DUP3
+    // stack: out, 1, ptr, out, retdest
+    %mstore_kernel_general
+    // stack:         ptr, out, retdest
+    %load_fp6
+    // stack:        P, Q, out, retdest
+    DUP1  DUP1
+    // stack:     O, P, Q, out, retdest
+    PUSH 62
+    // stack: 62, O, P, Q, out, retdest
+    %jump(miller_loop)
+
+miller_loop:
+    // stack:        times, O, P, Q, out, retdest
+    DUP1
+    // stack: times, times, O, P, Q, out, retdest
+    mload_kernel_code(exp_runs)
+    // stack:    nm, times, O, P, Q, out, retdest
+    %jump(miller_step)
+
+miller_step:
+    
+
+miller_decr:
+    // stack:     times  , O, P, Q, out, retdest
+    %sub_const(1)
+    // stack:     times-1, O, P, Q, out, retdest
+    DUP1  %jumpi(miller_loop)
+    // stack:           0, O, P, Q, out, retdest
+    %pop3  %pop3  %pop3
+    // stack:                       out, retdest
+    %jump(post_mllr)
+
+
+/// def mul_tangent()
 ///     out = square_fp12(out)
 ///     line = tangent(O, Q)
-///     return mul_fp12_sparse(out, line)
+///     out = mul_fp12_sparse(out, line)
+///     O += O
 ///
-/// EXP is the binary expansion of the BN254 prime
+/// def mul_cord()
+///     line = cord(O, P, Q)
+///     out = mul_fp12_sparse(out, line)
+///     O += P
 
-global miller_loop:
-    // stack: ptr, out, retdest
+mul_tangent:
 
-    // stack:      out
+
+
+/// p1, p2 : [Fp; 2], q : [Fp2; 2]
+
+/// def cord(p1x, p1y, p2x, p2y, qx, qy):
+///     return sparse_embed(
+///         p1y*p2x - p2y*p1x, 
+///         (p2y - p1y) * qx, 
+///         (p1x - p2x) * qy,
+///     )
+    
+/// def tangent(px, py, qx, qy):
+///     return sparse_embed(
+///         -9 + py**2, 
+///         (-3*px**2) * qx, 
+///         (2*py)     * qy,
+///     )
diff --git a/evm/src/cpu/kernel/asm/ripemd/box.asm b/evm/src/cpu/kernel/asm/ripemd/box.asm
index d60d9b8c..87e1f56a 100644
--- a/evm/src/cpu/kernel/asm/ripemd/box.asm
+++ b/evm/src/cpu/kernel/asm/ripemd/box.asm
@@ -72,7 +72,7 @@ post_rol:
 
 
 %macro get_round
-    // stack: sides, rounds
+    // stack:        sides , rounds
     %mul_const(5)  
     PUSH 10  
     SUB  

From 709b520ce37c9f88ab4552427b05164be0294b69 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 29 Nov 2022 20:43:49 -0800
Subject: [PATCH 022/201] miller loop

---
 evm/src/cpu/kernel/aggregator.rs              |   3 +
 .../bn254/curve_arithmetic/constants.asm      |   9 +
 .../curve/bn254/curve_arithmetic/lines.asm    |  14 ++
 .../bn254/curve_arithmetic/miller_loop.asm    | 165 ++++++++++++++++++
 .../bn254/curve_arithmetic/tate_pairing.asm   | 102 +----------
 5 files changed, 192 insertions(+), 101 deletions(-)
 create mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
 create mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
 create mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 64aef216..bb2dce92 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -24,8 +24,11 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/core/terminate.asm"),
         include_str!("asm/core/transfer.asm"),
         include_str!("asm/core/util.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/moddiv.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/field_macros.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
new file mode 100644
index 00000000..13807c41
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@@ -0,0 +1,9 @@
+global miller_data:
+    BYTES 0x21, 0x13, 0x11, 0x61, 0x52, 0x24, 0x21, 0x21, 
+    BYTES 0x11, 0x25, 0x13, 0x15, 0x44, 0x12, 0x21, 0x13, 
+    BYTES 0x11, 0x11, 0x32, 0x33, 0x14, 0x21, 0x11, 0x13, 
+    BYTES 0x12, 0x11, 0x11, 0x21, 0x11, 0x46, 0x11, 0x22, 
+    BYTES 0x31, 0x11, 0x24, 0x11, 0x11, 0x26, 0x16, 0x21, 
+    BYTES 0x21, 0x21, 0x11, 0x13, 0x15, 0x11, 0x34, 0x21, 
+    BYTES 0x12, 0x11, 0x17, 0x21, 0x23, 0x12, 0x34, 0x11, 
+    BYTES 0x32, 0x32, 0x12, 0x13, 0x22, 0x15
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
new file mode 100644
index 00000000..a9297076
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
@@ -0,0 +1,14 @@
+/// p1, p2 : [Fp; 2], q : [Fp2; 2]
+
+/// def tangent(px, py, qx, qy):
+///     return
+///         py**2 - 9, 
+///         (-3*px**2) * qx, 
+///         (2*py)     * qy,
+
+/// def cord(p1x, p1y, p2x, p2y, qx, qy):
+///     return
+///         p1y*p2x - p2y*p1x, 
+///         (p2y - p1y) * qx, 
+///         (p1x - p2x) * qy,
+    
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
new file mode 100644
index 00000000..5ff74c6f
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -0,0 +1,165 @@
+/// def miller(P, Q):
+///     miller_init()
+///     miller_loop()
+///
+/// def miller_init():
+///     out = 1
+///     O = P
+///     times = 62
+///
+/// def miller_loop():
+///     while times:
+///         0xnm = load(miller_data)
+///         while 0xnm > 0x10:
+///             miller_one()
+///         while 0xnm:
+///             miller_zero()
+///         times -= 1
+///
+/// def miller_one():
+///     0xnm -= 0x10
+///     mul_tangent()
+///     mul_cord()
+///
+/// def miller_zero():
+///     0xnm -= 1
+///     mul_tangent()
+
+/// Note: miller_data was defined by
+/// (1) taking the binary expansion of the BN254 prime p
+/// (2) popping the head and appending a 0:
+///     exp = bin(p)[1:-1] + [0]
+/// (3) counting the lengths of runs of 1s then 0s in exp, e.g.
+///     exp = 1100010011110 => EXP = [(2,3), (1,2), (4,1)]
+/// (4) encoding each pair (n,m) as 0xnm:
+///     miller_data = [(0x10)n + m for (n,m) in EXP]
+
+global miller_init:
+    // stack:         ptr, out, retdest
+    PUSH 1
+    // stack:      1, ptr, out, retdest
+    DUP3
+    // stack: out, 1, ptr, out, retdest
+    %mstore_kernel_general
+    // stack:         ptr, out, retdest
+    %load_fp6
+    // stack:        P, Q, out, retdest
+    DUP2  DUP2
+    // stack:     O, P, Q, out, retdest
+    PUSH 62
+    // stack: 62, O, P, Q, out, retdest
+miller_loop:
+    // stack:          times  , O, P, Q, out, retdest
+    DUP1  ISZERO
+    // stack:  break?, times  , O, P, Q, out, retdest
+    %jumpi(miller_end)
+    // stack:          times  , O, P, Q, out, retdest
+    %sub_const(1)
+    // stack:          times-1, O, P, Q, out, retdest
+    DUP1
+    // stack: times-1, times-1, O, P, Q, out, retdest
+    %mload_kernel_code(miller_data)
+    // stack:    0xnm, times-1, O, P, Q, out, retdest
+    %jump(miller_one)
+miller_end:
+    // stack: times, O, P, Q, out, retdest
+    %pop3  %pop3  %pop3
+    // stack:                 out, retdest
+    %jump(post_mllr)
+
+
+miller_one:
+    // stack:               0xnm, times, O, P, Q, out, retdest
+    PUSH 0x10  DUP2  LT       
+    // stack:        skip?, 0xnm, times, O, P, Q, out, retdest
+    %jumpi(miller_zero)
+    // stack:               0xnm, times, O, P, Q, out, retdest
+    %sub_const(0x10)
+    // stack:           0x{n-1}m, times, O, P, Q, out, retdest
+    PUSH mul_cord
+    // stack: mul_cord, 0x{n-1}m, times, O, P, Q, out, retdest
+    %jump(mul_tangent)
+
+miller_zero:
+    // stack:              m  , times, O, P, Q, out, retdest
+    DUP1  ISZERO
+    // stack:       skip?, m  , times, O, P, Q, out, retdest
+    %jumpi(miller_loop)
+    // stack:              m  , times, O, P, Q, out, retdest
+    %sub_const(1)
+    // stack:              m-1, times, O, P, Q, out, retdest
+    PUSH miller_zero
+    // stack: miller_zero, m-1, times, O, P, Q, out, retdest
+    %jump(mul_tangent)
+
+
+/// def mul_tangent()
+///     out = square_fp12(out)
+///     line = tangent(O, Q)
+///     out = mul_fp12_sparse(out, line)
+///     O += O
+
+mul_tangent:
+    // stack:                                         retdest, 0xnm, times, O, P, Q, out
+    PUSH mul_tangent_2  PUSH mul_tangent_1
+    // stack:           mul_tangent_1, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    DUP13  DUP1
+    // stack: out, out, mul_tangent_1, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    %jump(square_fp12)
+mul_tangent_1:
+    // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    DUP12  DUP12  DUP12  DUP12
+    // stack:        Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    DUP10  DUP10
+    // stack:     O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    %tangent
+    // stack:     line, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    %sparse_store(100)
+    // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
+    PUSH 100  DUP2
+    // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
+    %jump(mul_fp12_sparse)
+mul_tangent_2:
+    // stack: out, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    POP  DUP5  DUP5
+    // stack:   O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    %ec_double_bn254
+    // stack: 2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    SWAP5  SWAP1  SWAP6  SWAP1
+    // stack: 2*O, retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
+    %pop2
+    // stack:      retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
+    JUMP
+
+
+/// def mul_cord()
+///     line = cord(O, P, Q)
+///     out = mul_fp12_sparse(out, line)
+///     O += P
+
+mul_cord:
+    // stack:                            0xnm, times, O, P, Q, out
+    PUSH mul_cord_1
+    // stack:                mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP11  DUP11  DUP11  DUP11  DUP11  DUP11  DUP11  DUP11
+    // stack:       O, P, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    %cord
+    // stack:          line, mul_cord_1, 0xnm, times, O, P, Q, out
+    %sparse_store(100)
+    // stack:                mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP12
+    // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out
+    PUSH 100
+    // stack:      100, out, mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP2
+    // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out
+    %jump(mul_fp12_sparse)
+mul_cord_1:
+    // stack:        0xnm, times, O  , P, Q, out
+    DUP6  DUP6  DUP6  DUP6
+    // stack: O , P, 0xnm, times, O  , P, Q, out
+    %ec_add_bn254
+    // stack: O + P, 0xnm, times, O  , P, Q, out
+    SWAP4  SWAP1  SWAP5  SWAP1
+    // stack:        0xnm, times, O+P, P, Q, out
+    %jump(miller_one)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 7fda5f24..93ba4d6e 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -25,7 +25,7 @@ global tate:
     PUSH post_mllr   SWAP2 
     // stack:                     ptr, out, post_mllr, tate_mul1, tate_mul2, tate_mul3, retdest
     %jump(miller_init)
-post_mllr:
+global post_mllr:
     // stack:                          out,            tate_mul1, tate_mul2, tate_mul3, retdest
     PUSH 100 
     // stack:                     100, out,            tate_mul1, tate_mul2, tate_mul3, retdest
@@ -68,103 +68,3 @@ post_pow:
 tate_mul3:
     // stack:                          out,                                             retdest  {100: pow}
     SWAP1  JUMP
-
-
-/// def miller(P, Q):
-///     miller_init()
-///     miller_loop()
-///
-/// def miller_init():
-///     out = 1
-///     O = P
-///     times = 62
-///
-/// def miller_loop():
-///     while times:
-///         n_m = fetch_times()
-///         while n_m > 10:
-///             mul_tangent()
-///             mul_cord()
-///             n_m - 10
-///         while n_n:
-///             mul_tangent()
-///             n_m - 1
-///         times -= 1
-             
-/// Note: miller_data is formed by
-/// (1) taking the binary expansion of the BN254 prime p
-/// (2) popping the head and appending a 0:
-///     exp = bin(p)[1:-1] + [0]
-/// (3) counting the lengths of 1s and 0s in exp, e.g.
-///     exp = 1100010011110 => EXP = [(2,3), (1,2), (4,1)]
-/// (4) encoding each pair (n,m) as 10*n+m:
-///     miller_data = [10*n + m for (n,m) in EXP]
-
-miller_init:
-    // stack:         ptr, out, retdest
-    PUSH 1
-    // stack:      1, ptr, out, retdest
-    DUP3
-    // stack: out, 1, ptr, out, retdest
-    %mstore_kernel_general
-    // stack:         ptr, out, retdest
-    %load_fp6
-    // stack:        P, Q, out, retdest
-    DUP1  DUP1
-    // stack:     O, P, Q, out, retdest
-    PUSH 62
-    // stack: 62, O, P, Q, out, retdest
-    %jump(miller_loop)
-
-miller_loop:
-    // stack:        times, O, P, Q, out, retdest
-    DUP1
-    // stack: times, times, O, P, Q, out, retdest
-    mload_kernel_code(exp_runs)
-    // stack:    nm, times, O, P, Q, out, retdest
-    %jump(miller_step)
-
-miller_step:
-    
-
-miller_decr:
-    // stack:     times  , O, P, Q, out, retdest
-    %sub_const(1)
-    // stack:     times-1, O, P, Q, out, retdest
-    DUP1  %jumpi(miller_loop)
-    // stack:           0, O, P, Q, out, retdest
-    %pop3  %pop3  %pop3
-    // stack:                       out, retdest
-    %jump(post_mllr)
-
-
-/// def mul_tangent()
-///     out = square_fp12(out)
-///     line = tangent(O, Q)
-///     out = mul_fp12_sparse(out, line)
-///     O += O
-///
-/// def mul_cord()
-///     line = cord(O, P, Q)
-///     out = mul_fp12_sparse(out, line)
-///     O += P
-
-mul_tangent:
-
-
-
-/// p1, p2 : [Fp; 2], q : [Fp2; 2]
-
-/// def cord(p1x, p1y, p2x, p2y, qx, qy):
-///     return sparse_embed(
-///         p1y*p2x - p2y*p1x, 
-///         (p2y - p1y) * qx, 
-///         (p1x - p2x) * qy,
-///     )
-    
-/// def tangent(px, py, qx, qy):
-///     return sparse_embed(
-///         -9 + py**2, 
-///         (-3*px**2) * qx, 
-///         (2*py)     * qy,
-///     )

From 1b5a3d0ff075690405273a6e728f4826523891ac Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 29 Nov 2022 22:02:24 -0800
Subject: [PATCH 023/201] fix

---
 evm/src/cpu/kernel/aggregator.rs              |  1 +
 .../bn254/curve_arithmetic/constants.asm      | 14 +--
 .../curve/bn254/curve_arithmetic/lines.asm    |  7 +-
 .../bn254/curve_arithmetic/miller_loop.asm    | 20 +++-
 .../bn254/curve_arithmetic/tate_pairing.asm   |  4 +-
 .../bn254/field_arithmetic/field_macros.asm   | 98 ++++++++++++++++++-
 .../bn254/field_arithmetic/frobenius.asm      |  2 +-
 7 files changed, 127 insertions(+), 19 deletions(-)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index bb2dce92..0080b351 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -27,6 +27,7 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/lines.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/moddiv.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
index 13807c41..573d4c04 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@@ -1,9 +1,9 @@
 global miller_data:
-    BYTES 0x21, 0x13, 0x11, 0x61, 0x52, 0x24, 0x21, 0x21, 
-    BYTES 0x11, 0x25, 0x13, 0x15, 0x44, 0x12, 0x21, 0x13, 
-    BYTES 0x11, 0x11, 0x32, 0x33, 0x14, 0x21, 0x11, 0x13, 
-    BYTES 0x12, 0x11, 0x11, 0x21, 0x11, 0x46, 0x11, 0x22, 
-    BYTES 0x31, 0x11, 0x24, 0x11, 0x11, 0x26, 0x16, 0x21, 
-    BYTES 0x21, 0x21, 0x11, 0x13, 0x15, 0x11, 0x34, 0x21, 
-    BYTES 0x12, 0x11, 0x17, 0x21, 0x23, 0x12, 0x34, 0x11, 
+    BYTES 0x21, 0x13, 0x11, 0x61, 0x52, 0x24, 0x21, 0x21
+    BYTES 0x11, 0x25, 0x13, 0x15, 0x44, 0x12, 0x21, 0x13 
+    BYTES 0x11, 0x11, 0x32, 0x33, 0x14, 0x21, 0x11, 0x13 
+    BYTES 0x12, 0x11, 0x11, 0x21, 0x11, 0x46, 0x11, 0x22 
+    BYTES 0x31, 0x11, 0x24, 0x11, 0x11, 0x26, 0x16, 0x21 
+    BYTES 0x21, 0x21, 0x11, 0x13, 0x15, 0x11, 0x34, 0x21 
+    BYTES 0x12, 0x11, 0x17, 0x21, 0x23, 0x12, 0x34, 0x11 
     BYTES 0x32, 0x32, 0x12, 0x13, 0x22, 0x15
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
index a9297076..f4e8bed9 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
@@ -6,9 +6,14 @@
 ///         (-3*px**2) * qx, 
 ///         (2*py)     * qy,
 
+%macro tangent
+%endmacro
+
 /// def cord(p1x, p1y, p2x, p2y, qx, qy):
 ///     return
 ///         p1y*p2x - p2y*p1x, 
 ///         (p2y - p1y) * qx, 
 ///         (p1x - p2x) * qy,
-    
\ No newline at end of file
+
+%macro cord
+%endmacro
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 5ff74c6f..463e9573 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -114,7 +114,7 @@ mul_tangent_1:
     // stack:     O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %tangent
     // stack:     line, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    %sparse_store(100)
+    %sparse_store
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
     PUSH 100  DUP2
     // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
@@ -123,7 +123,7 @@ mul_tangent_2:
     // stack: out, retdest, 0xnm, times,   O, P, Q, out  {100: line}
     POP  DUP5  DUP5
     // stack:   O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    %ec_double_bn254
+    // %ec_double_bn254
     // stack: 2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
     SWAP5  SWAP1  SWAP6  SWAP1
     // stack: 2*O, retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
@@ -145,7 +145,7 @@ mul_cord:
     // stack:       O, P, Q, mul_cord_1, 0xnm, times, O, P, Q, out
     %cord
     // stack:          line, mul_cord_1, 0xnm, times, O, P, Q, out
-    %sparse_store(100)
+    %sparse_store
     // stack:                mul_cord_1, 0xnm, times, O, P, Q, out
     DUP12
     // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out
@@ -158,8 +158,20 @@ mul_cord_1:
     // stack:        0xnm, times, O  , P, Q, out
     DUP6  DUP6  DUP6  DUP6
     // stack: O , P, 0xnm, times, O  , P, Q, out
-    %ec_add_bn254
+    // %ec_add_bn254
     // stack: O + P, 0xnm, times, O  , P, Q, out
     SWAP4  SWAP1  SWAP5  SWAP1
     // stack:        0xnm, times, O+P, P, Q, out
     %jump(miller_one)
+
+
+%macro sparse_store
+    // stack: g0, G1, G1'
+    PUSH 100  %mstore_kernel_general
+    // stack:     G1, G1'
+    PUSH 102  %mstore_kernel_general
+    PUSH 103  %mstore_kernel_general
+    // stack:         G1'
+    PUSH 108  %mstore_kernel_general
+    PUSH 109  %mstore_kernel_general
+%endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 93ba4d6e..1065fd3a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -31,7 +31,7 @@ global post_mllr:
     // stack:                     100, out,            tate_mul1, tate_mul2, tate_mul3, retdest
     DUP2
     // stack:                out, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest
-    %inverse_fp12
+    // %inverse_fp12
     // stack:                     100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
     DUP2
     // stack:                out, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
@@ -57,7 +57,7 @@ tate_mul2:
     // stack:           100, post_pow, out,                                  tate_mul3, retdest  {100: acc}
     DUP3
     // stack:      out, 100, post_pow, out,                                  tate_mul3, retdest  {100: acc}
-    %jump(power)
+    // %jump(power)
 post_pow: 
     // stack:                     100, out,                                  tate_mul3, retdest  {100: pow}
     DUP2
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
index 73736194..a76ed2ae 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
@@ -588,13 +588,103 @@
     // stack: c * f0, c * f1, c * f2, c * f3, c * f4, c * f5
 %endmacro
 
+/// cost: 
+///
+/// G0 + G1t + G2t^2 = (a+bi) * (F0 + F1t + F2t^2) 
+///                  = (a+bi)F0 + (a+bi)F1t + (a+bi)F2t^2
+///
+/// G0 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i
+/// G1 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i
+/// G2 = (a+bi)(f2+f2_i) = (af2 - bf2_) + (bf2 + af2_)i
+
+%macro mul_fp2_fp6
+    // stack:             a, b, f0, f0_, f1, f1_, f2, f2_
+    DUP2
+    DUP5
+    MULFP254
+    // stack:       bf0_, a, b, f0, f0_, f1, f1_, f2, f2_
+    DUP2
+    DUP5
+    MULFP254
+    // stack:  af0, bf0_, a, b, f0, f0_, f1, f1_, f2, f2_
+    SUBFP254
+    // stack:         g0, a, b, f0, f0_, f1, f1_, f2, f2_
+    SWAP3
+    // stack:         f0, a, b, g0, f0_, f1, f1_, f2, f2_
+    DUP3
+    MULFP254
+    // stack:        bf0, a, b, g0, f0_, f1, f1_, f2, f2_
+    SWAP1
+    SWAP4
+    // stack:        f0_, bf0, b, g0, a, f1, f1_, f2, f2_
+    DUP5
+    MULFP254
+    // stack:       af0_, bf0, b, g0, a, f1, f1_, f2, f2_
+    ADDFP254
+    // stack:             g0_, b, g0, a, f1, f1_, f2, f2_
+    SWAP3
+    // stack:             a, b, g0, g0_, f1, f1_, f2, f2_
+    DUP2
+    DUP7
+    MULFP254
+    // stack:       bf1_, a, b, g0, g0_, f1, f1_, f2, f2_
+    DUP2
+    DUP7
+    MULFP254
+    // stack:  af1, bf1_, a, b, g0, g0_, f1, f1_, f2, f2_
+    SUBFP254
+    // stack:         g1, a, b, g0, g0_, f1, f1_, f2, f2_
+    SWAP5
+    // stack:         f1, a, b, g0, g0_, g1, f1_, f2, f2_
+    DUP3
+    MULFP254
+    // stack:        bf1, a, b, g0, g0_, g1, f1_, f2, f2_
+    SWAP1
+    SWAP6
+    // stack:        f1_, bf1, b, g0, g0_, g1, a, f2, f2_
+    DUP7
+    MULFP254
+    // stack:       af1_, bf1, b, g0, g0_, g1, a, f2, f2_
+    ADDFP254
+    // stack:             g1_, b, g0, g0_, g1, a, f2, f2_
+    SWAP5
+    // stack:             a, b, g0, g0_, g1, g1_, f2, f2_
+    DUP2
+    DUP9
+    MULFP254
+    // stack:       bf2_, a, b, g0, g0_, g1, g1_, f2, f2_
+    DUP2
+    DUP9
+    MULFP254
+    // stack:  af2, bf2_, a, b, g0, g0_, g1, g1_, f2, f2_
+    SUBFP254
+    // stack:         g2, a, b, g0, g0_, g1, g1_, f2, f2_
+    SWAP7
+    // stack:         f2, a, b, g0, g0_, g1, g1_, g2, f2_
+    SWAP8
+    // stack:         f2_, a, b, g0, g0_, g1, g1_, g2, f2
+    MULFP254
+    // stack:           af2_, b, g0, g0_, g1, g1_, g2, f2
+    SWAP7
+    // stack:           f2, b, g0, g0_, g1, g1_, g2, af2_
+    MULFP254
+    // stack:             bf2, g0, g0_, g1, g1_, g2, af2_
+    SWAP1
+    SWAP6
+    // stack:             af2_, bf2, g0_, g1, g1_, g2, g0
+    ADDFP254
+    // stack:                   g2_, g0_, g1, g1_, g2, g0
+    SWAP5
+    // stack:                   g0, g0_, g1, g1_, g2, g2_
+%endmacro 
+
 /// cost: 1 i9 (9) + 16 dups + 15 swaps + 12 muls + 6 adds/subs = 58
 ///
 /// G0 + G1t + G2t^2 = (a+bi)t * (F0 + F1t + F2t^2) 
 ///                  = (c+di)F2 + (a+bi)F0t + (a+bi)F1t^2
 /// where c+di = (a+bi)(9+i) = (9a-b) + (a+9b)i 
 ///
-/// G0 = (c+di)(f0+f0_i) = (cf2 - df2_) + (df2 + cf2_)i
+/// G0 = (c+di)(f2+f2_i) = (cf2 - df2_) + (df2 + cf2_)i
 /// G1 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i
 /// G2 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i
 
@@ -688,9 +778,9 @@
 ///                  = (c+di)F1 + (c+di)F2t + (a+bi)F0t^2
 /// where c+di = (a+bi)(9+i) = (9a-b) + (a+9b)i 
 ///
-/// G0 = (c+di)(f0+f0_i) = (cf1 - df1_) + (df1 + cf1_)i
-/// G1 = (a+bi)(f0+f0_i) = (cf2 - df2_) + (df2 + cf2_)i
-/// G2 = (a+bi)(f1+f1_i) = (af0 - bf0_) + (bf0 + af0_)i
+/// G0 = (c+di)(f1+f1_i) = (cf1 - df1_) + (df1 + cf1_)i
+/// G1 = (a+bi)(f2+f2_i) = (cf2 - df2_) + (df2 + cf2_)i
+/// G2 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i
 
 %macro mul_fp2_fp6_sh2
     // stack:             a, b, f0, f0_, f1, f1_, f2, f2_
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index 6bab7ab0..37845f4b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -143,7 +143,7 @@
     // stack:           ptr
 %endmacro 
 
-%macro frob_fp12_6:
+%macro frob_fp12_6
     // stack:           ptr
     DUP1  %offset_fp6
     // stack:     ptr', ptr

From e9c5c678141acdd454a08ee8548bb203fdb5b3fc Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 5 Dec 2022 12:16:54 -0800
Subject: [PATCH 024/201] minor

---
 .../kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 1065fd3a..5b9dd170 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -47,7 +47,7 @@ tate_mul1:
     DUP2
     // stack:           out, 100, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
     %frob_fp12_2
-    // stack:                100, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
+    // stack:                100, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv} 
     %jump(mul_fp12)
 tate_mul2: 
     // stack:                          out,                                  tate_mul3, retdest  {100: acc}

From 3785e312f149c0ce8ab1c35eaa2497dcf1732fa4 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 5 Dec 2022 13:27:01 -0800
Subject: [PATCH 025/201] cord and tangent

---
 evm/src/cpu/kernel/aggregator.rs              |   1 -
 .../curve/bn254/curve_arithmetic/lines.asm    |  19 ---
 .../bn254/curve_arithmetic/miller_loop.asm    | 126 +++++++++++++++---
 3 files changed, 105 insertions(+), 41 deletions(-)
 delete mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 0080b351..bb2dce92 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -27,7 +27,6 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/lines.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/moddiv.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
deleted file mode 100644
index f4e8bed9..00000000
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm
+++ /dev/null
@@ -1,19 +0,0 @@
-/// p1, p2 : [Fp; 2], q : [Fp2; 2]
-
-/// def tangent(px, py, qx, qy):
-///     return
-///         py**2 - 9, 
-///         (-3*px**2) * qx, 
-///         (2*py)     * qy,
-
-%macro tangent
-%endmacro
-
-/// def cord(p1x, p1y, p2x, p2y, qx, qy):
-///     return
-///         p1y*p2x - p2y*p1x, 
-///         (p2y - p1y) * qx, 
-///         (p1x - p2x) * qy,
-
-%macro cord
-%endmacro
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 463e9573..3f625d16 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -112,9 +112,7 @@ mul_tangent_1:
     // stack:        Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     DUP10  DUP10
     // stack:     O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    %tangent
-    // stack:     line, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    %sparse_store
+    %store_tangent
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
     PUSH 100  DUP2
     // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
@@ -133,7 +131,7 @@ mul_tangent_2:
 
 
 /// def mul_cord()
-///     line = cord(O, P, Q)
+///     line = cord(P, O, Q)
 ///     out = mul_fp12_sparse(out, line)
 ///     O += P
 
@@ -141,18 +139,20 @@ mul_cord:
     // stack:                            0xnm, times, O, P, Q, out
     PUSH mul_cord_1
     // stack:                mul_cord_1, 0xnm, times, O, P, Q, out
-    DUP11  DUP11  DUP11  DUP11  DUP11  DUP11  DUP11  DUP11
-    // stack:       O, P, Q, mul_cord_1, 0xnm, times, O, P, Q, out
-    %cord
-    // stack:          line, mul_cord_1, 0xnm, times, O, P, Q, out
-    %sparse_store
-    // stack:                mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP11  DUP11  DUP11  DUP11
+    // stack:             Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP9  DUP9
+    // stack:          O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP13  DUP13
+    // stack:       P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    %store_cord 
+    // stack:                mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     DUP12
-    // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     PUSH 100
-    // stack:      100, out, mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack:      100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     DUP2
-    // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     %jump(mul_fp12_sparse)
 mul_cord_1:
     // stack:        0xnm, times, O  , P, Q, out
@@ -161,17 +161,101 @@ mul_cord_1:
     // %ec_add_bn254
     // stack: O + P, 0xnm, times, O  , P, Q, out
     SWAP4  SWAP1  SWAP5  SWAP1
-    // stack:        0xnm, times, O+P, P, Q, out
-    %jump(miller_one)
+    // stack:     O, 0xnm, times, O+P, P, Q, out
+    %pop2  %jump(miller_one)
 
 
-%macro sparse_store
-    // stack: g0, G1, G1'
+/// def store_cord(p1x, p1y, p2x, p2y, qx, qy):
+///     return sparse_store(
+///         p1y*p2x - p2y*p1x, 
+///         (p2y - p1y) * qx, 
+///         (p1x - p2x) * qy,
+///     )
+
+%macro store_cord
+    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    DUP1  DUP5  MULFP254
+    // stack:           p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    DUP3  DUP5  MULFP254
+    // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    SUBFP254
+    // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     PUSH 100  %mstore_kernel_general
-    // stack:     G1, G1'
-    PUSH 102  %mstore_kernel_general
-    PUSH 103  %mstore_kernel_general
-    // stack:         G1'
+    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    SWAP3
+    // stack:                    p2y , p1y, p2x , p1x, qx, qx_, qy, qy_
+    SUBFP254
+    // stack:                    p2y - p1y, p2x , p1x, qx, qx_, qy, qy_
+    SWAP2
+    // stack:                    p1x , p2x, p2y - p1y, qx, qx_, qy, qy_
+    SUBFP254
+    // stack:                    p1x - p2x, p2y - p1y, qx, qx_, qy, qy_
+    SWAP4
+    // stack:                    qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
+    DUP5  MULFP254
+    // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
     PUSH 108  %mstore_kernel_general
+    // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
+    SWAP1
+    // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
+    DUP2  MULFP254
+    // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
+    PUSH 102  %mstore_kernel_general
+    // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
+    MULFP254
+    // stack:                            (p2y - p1y)qx_, p1x - p2x, qy_
+    PUSH 103  %mstore_kernel_general
+    // stack:                                            p1x - p2x, qy_
+    MULFP254
+    // stack:                                            (p1x - p2x)qy_
+    PUSH 109  %mstore_kernel_general
+%endmacro
+
+
+/// def store_tangent(px, py, qx, qy):
+///     return sparse_store(
+///         py**2 - 9, 
+///         (-3px**2) * qx, 
+///         (2py)     * qy,
+///     )
+
+%macro store_tangent
+    // stack:                px, py, qx, qx_, qy, qy_
+    PUSH 9
+    // stack:             9, px, py, qx, qx_, qy, qy_
+    DUP3
+    // stack:        py , 9, px, py, qx, qx_, qy, qy_
+    DUP1  MULFP254
+    // stack:     py**2 , 9, px, py, qx, qx_, qy, qy_
+    SUBFP254
+    // stack:     py**2 - 9, px, py, qx, qx_, qy, qy_
+    PUSH 100  %mstore_kernel_general
+    // stack:                px, py, qx, qx_, qy, qy_
+    DUP1  MULFP254
+    // stack:             px**2, py, qx, qx_, qy, qy_
+    PUSH 3  MULFP254
+    // stack:           3*px**2, py, qx, qx_, qy, qy_
+    PUSH 0  SUBFP254
+    // stack:          -3*px**2, py, qx, qx_, qy, qy_
+    SWAP2
+    // stack:           qx, py, -3px**2, qx_, qy, qy_
+    DUP3  MULFP254
+    // stack: (-3*px**2)qx, py, -3px**2, qx_, qy, qy_ 
+    PUSH 102  %mstore_kernel_general
+    // stack:               py, -3px**2, qx_, qy, qy_ 
+    PUSH 2  MULFP254
+    // stack:              2py, -3px**2, qx_, qy, qy_ 
+    SWAP3 
+    // stack:              qy, -3px**2, qx_, 2py, qy_ 
+    DUP4  MULFP254
+    // stack:         (2py)qy, -3px**2, qx_, 2py, qy_ 
+    PUSH 108  %mstore_kernel_general
+    // stack:                  -3px**2, qx_, 2py, qy_ 
+    MULFP254
+    // stack:                  (-3px**2)qx_, 2py, qy_ 
+    PUSH 103  %mstore_kernel_general
+    // stack:                                2py, qy_ 
+    MULFP254
+    // stack:                                (2py)qy_ 
     PUSH 109  %mstore_kernel_general
 %endmacro

From 3d6f24781b1d0a5260c1e36e09edbae75983011e Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 5 Dec 2022 13:30:03 -0800
Subject: [PATCH 026/201] frob fix

---
 .../cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index 37845f4b..6d789eb1 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -207,8 +207,8 @@
 
 
 %macro froby1_3
-    PUSH 0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f
-    PUSH 0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066
+    PUSH 0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de
+    PUSH 0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d,
     %mul_fp2
 %endmacro
 

From 8d60b17ecd29474d7500c3680b54e3c6f8196d3e Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 5 Dec 2022 13:31:13 -0800
Subject: [PATCH 027/201] ,

---
 .../cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index 6d789eb1..27276989 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -208,7 +208,7 @@
 
 %macro froby1_3
     PUSH 0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de
-    PUSH 0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d,
+    PUSH 0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d
     %mul_fp2
 %endmacro
 

From 57252c7fd25e285a4a49451299abe8c500382448 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 5 Dec 2022 14:29:59 -0800
Subject: [PATCH 028/201] simplify original

---
 .../curve/bn254/field_arithmetic/moddiv.asm   | 31 +++++--------------
 evm/src/cpu/kernel/interpreter.rs             | 11 +++++--
 2 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
index 780473b9..fd54fe2a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
@@ -1,39 +1,22 @@
 /// Division modulo 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47, the BN254 base field order
-/// To replace with more efficient method using non-determinism later.
 
 // Returns y * (x^-1) where the inverse is taken modulo N
 %macro moddiv
-    // stack: x, y
+    // stack: x   , y
     %inverse
     // stack: x^-1, y
-    %mulmodn
-%endmacro
-
-%macro mulmodn
-    // stack: x, y
-    %bn_base
-    // stack: N, x, y
-    SWAP2
-    // stack: y, x, N
-    MULMOD
-%endmacro
-
-%macro squaremodn
-    // stack: x
-    DUP1
-    // stack: x, x
-    %mulmodn
+    MULFP254
 %endmacro
 
 // Non-deterministically provide the inverse modulo N.
 %macro inverse
     // stack: x
     PROVER_INPUT(ff::bn254_base::inverse)
-    // stack: x^-1, x
-    %stack (inv, x) -> (inv, x, @BN_BASE, inv)
-    // stack: x^-1, x, N, x^-1
-    MULMOD
+    // stack: x^-1 , x
+    SWAP1  DUP2
+    // stack: x^-1 , x, x^-1
+    MULFP254
     // stack: x^-1 * x, x^-1
     %assert_eq_const(1)
-    // stack: x^-1
+    // stack:           x^-1
 %endmacro
diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 3871db84..b63911ed 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -1,6 +1,7 @@
 //! An EVM interpreter for testing and debugging purposes.
 
 use std::collections::HashMap;
+use std::str::FromStr;
 
 use anyhow::{anyhow, bail, ensure};
 use ethereum_types::{U256, U512};
@@ -386,22 +387,26 @@ impl<'a> Interpreter<'a> {
     // TODO: 107 is hardcoded as a dummy prime for testing
     // should be changed to the proper implementation prime
 
+    fn bn_base_order_() -> U256 {
+        U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47").unwrap()
+    }
+
     fn run_addfp254(&mut self) {
         let x = self.pop();
         let y = self.pop();
-        self.push((x + y) % 107);
+        self.push((x + y) % Self::bn_base_order_());
     }
 
     fn run_mulfp254(&mut self) {
         let x = self.pop();
         let y = self.pop();
-        self.push(U256::try_from(x.full_mul(y) % 107).unwrap());
+        self.push(U256::try_from(x.full_mul(y) % Self::bn_base_order_()).unwrap());
     }
 
     fn run_subfp254(&mut self) {
         let x = self.pop();
         let y = self.pop();
-        self.push((U256::from(107) + x - y) % 107);
+        self.push((Self::bn_base_order_() + x - y) % Self::bn_base_order_());
     }
 
     fn run_div(&mut self) {

From 0ced2b3e06fb0480badc70b26de0275e0e0a7939 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 5 Dec 2022 14:31:05 -0800
Subject: [PATCH 029/201] div name

---
 .../cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm | 4 ++--
 .../cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
index dae060c2..f62c7ed8 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@@ -83,7 +83,7 @@ global ec_add_valid_points:
     // stack: x0, x1, y0 - y1, x0, y0, x1, y1, retdest
     %submod
     // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest
-    %moddiv
+    %divfp254
     // stack: lambda, x0, y0, x1, y1, retdest
     %jump(ec_add_valid_points_with_lambda)
 
@@ -201,7 +201,7 @@ ec_add_equal_points:
     // stack: 3/2 * x0^2, x0, y0, x1, y1, retdest
     DUP3
     // stack: y0, 3/2 * x0^2, x0, y0, x1, y1, retdest
-    %moddiv
+    %divfp254
     // stack: lambda, x0, y0, x1, y1, retdest
     %jump(ec_add_valid_points_with_lambda)
 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
index fd54fe2a..09d7d336 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
@@ -1,7 +1,7 @@
 /// Division modulo 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47, the BN254 base field order
 
 // Returns y * (x^-1) where the inverse is taken modulo N
-%macro moddiv
+%macro divfp254
     // stack: x   , y
     %inverse
     // stack: x^-1, y

From 97f90b226683d4004da2924b3770c2e207d29b30 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 5 Dec 2022 18:27:50 -0800
Subject: [PATCH 030/201] update curve add

---
 .../bn254/curve_arithmetic/curve_add.asm      | 300 +++++++-----------
 .../curve/bn254/field_arithmetic/moddiv.asm   |   2 +-
 2 files changed, 123 insertions(+), 179 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
index f62c7ed8..3da6f931 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@@ -1,33 +1,21 @@
-// #define N 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 // BN254 base field order
+// BN254 elliptic curve addition via the standard affine addition formula.
 
-// BN254 elliptic curve addition.
-// Uses the standard affine addition formula.
 global ec_add:
-    // Uncomment for test inputs.
-    // PUSH 0xdeadbeef
-    // PUSH 2
-    // PUSH 1
-    // PUSH 0x1bf9384aa3f0b3ad763aee81940cacdde1af71617c06f46e11510f14f3d5d121
-    // PUSH 0xe7313274bb29566ff0c8220eb9841de1d96c2923c6a4028f7dd3c6a14cee770
-    // stack: x0, y0, x1, y1, retdest
+    // stack:                                    x0, y0, x1, y1, retdest
 
     // Check if points are valid BN254 points.
-    DUP2
-    // stack: y0, x0, y0, x1, y1, retdest
-    DUP2
-    // stack: x0, y0, x0, y0, x1, y1, retdest
+    DUP2  DUP2    
+    // stack:                            x0, y0, x0, y0, x1, y1, retdest
     %ec_check
-    // stack: isValid(x0, y0), x0, y0, x1, y1, retdest
-    DUP5
-    // stack: x1, isValid(x0, y0), x0, y0, x1, y1, retdest
-    DUP5
-    // stack: x1, y1, isValid(x0, y0), x0, y0, x1, y1, retdest
+    // stack:                   isValid(x0, y0), x0, y0, x1, y1, retdest
+    DUP5  DUP5    
+    // stack:         x1, y1  , isValid(x0, y0), x0, y0, x1, y1, retdest
     %ec_check
-    // stack: isValid(x1, y1), isValid(x0, y0), x0, y0, x1, y1, retdest
+    // stack: isValid(x1, y1) , isValid(x0, y0), x0, y0, x1, y1, retdest
     AND
     // stack: isValid(x1, y1) & isValid(x0, y0), x0, y0, x1, y1, retdest
     %jumpi(ec_add_valid_points)
-    // stack: x0, y0, x1, y1, retdest
+    // stack:                                    x0, y0, x1, y1, retdest
 
     // Otherwise return
     %pop4
@@ -37,59 +25,50 @@ global ec_add:
 // BN254 elliptic curve addition.
 // Assumption: (x0,y0) and (x1,y1) are valid points.
 global ec_add_valid_points:
-    // stack: x0, y0, x1, y1, retdest
+    // stack:                   x0, y0, x1, y1, retdest
 
     // Check if the first point is the identity.
-    DUP2
-    // stack: y0, x0, y0, x1, y1, retdest
-    DUP2
-    // stack: x0, y0, x0, y0, x1, y1, retdest
+    DUP2  DUP2
+    // stack:           x0,y0 , x0, y0, x1, y1, retdest
     %ec_isidentity
-    // stack: (x0,y0)==(0,0), x0, y0, x1, y1, retdest
-    %jumpi(ec_add_first_zero)
-    // stack: x0, y0, x1, y1, retdest
+    // stack:   (0,0)==(x0,y0), x0, y0, x1, y1, retdest
+    %jumpi(ec_add_fst_zero)
+    // stack:                   x0, y0, x1, y1, retdest
 
-    // Check if the first point is the identity.
-    DUP4
-    // stack: y1, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: x1, y1, x0, y0, x1, y1, retdest
+    // Check if the second point is the identity.
+    DUP4  DUP4    
+    // stack:           x1,y1 , x0, y0, x1, y1, retdest
     %ec_isidentity
-    // stack: (x1,y1)==(0,0), x0, y0, x1, y1, retdest
+    // stack:   (0,0)==(x1,y1), x0, y0, x1, y1, retdest
     %jumpi(ec_add_snd_zero)
-    // stack: x0, y0, x1, y1, retdest
+    // stack:                   x0, y0, x1, y1, retdest
 
     // Check if both points have the same x-coordinate.
-    DUP3
-    // stack: x1, x0, y0, x1, y1, retdest
-    DUP2
-    // stack: x0, x1, x0, y0, x1, y1, retdest
+    DUP3  DUP2    
+    // stack:         x0 ,  x1, x0, y0, x1, y1, retdest
     EQ
-    // stack: x0 == x1, x0, y0, x1, y1, retdest
+    // stack:         x0 == x1, x0, y0, x1, y1, retdest
     %jumpi(ec_add_equal_first_coord)
-    // stack: x0, y0, x1, y1, retdest
 
+
+    // stack:                   x0, y0, x1, y1, retdest
     // Otherwise, we can use the standard formula.
     // Compute lambda = (y0 - y1)/(x0 - x1)
-    DUP4
-    // stack: y1, x0, y0, x1, y1, retdest
-    DUP3
-    // stack: y0, y1, x0, y0, x1, y1, retdest
-    %submod
-    // stack: y0 - y1, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: x1, y0 - y1, x0, y0, x1, y1, retdest
-    DUP3
-    // stack: x0, x1, y0 - y1, x0, y0, x1, y1, retdest
-    %submod
+    DUP4  DUP3
+    // stack:          y0 , y1, x0, y0, x1, y1, retdest
+    SUBFP254
+    // stack:          y0 - y1, x0, y0, x1, y1, retdest
+    DUP4  DUP3
+    // stack: x0 , x1, y0 - y1, x0, y0, x1, y1, retdest
+    SUBFP254
     // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest
     %divfp254
-    // stack: lambda, x0, y0, x1, y1, retdest
+    // stack:           lambda, x0, y0, x1, y1, retdest
     %jump(ec_add_valid_points_with_lambda)
 
 // BN254 elliptic curve addition.
 // Assumption: (x0,y0) == (0,0)
-ec_add_first_zero:
+ec_add_fst_zero:
     // stack: x0, y0, x1, y1, retdest
     // Just return (x1,y1)
     %stack (x0, y0, x1, y1, retdest) -> (retdest, x1, y1)
@@ -99,7 +78,6 @@ ec_add_first_zero:
 // Assumption: (x1,y1) == (0,0)
 ec_add_snd_zero:
     // stack: x0, y0, x1, y1, retdest
-
     // Just return (x0,y0)
     %stack (x0, y0, x1, y1, retdest) -> (retdest, x0, y0)
     JUMP
@@ -107,45 +85,37 @@ ec_add_snd_zero:
 // BN254 elliptic curve addition.
 // Assumption: lambda = (y0 - y1)/(x0 - x1)
 ec_add_valid_points_with_lambda:
-    // stack: lambda, x0, y0, x1, y1, retdest
+    // stack:                             lambda, x0, y0, x1, y1, retdest
 
     // Compute x2 = lambda^2 - x1 - x0
-    DUP2
-    // stack: x0, lambda, x0, y0, x1, y1, retdest
-    DUP5
-    // stack: x1, x0, lambda, x0, y0, x1, y1, retdest
-    %bn_base
-    // stack: N, x1, x0, lambda, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest
-    DUP1
-    // stack: lambda, lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest
-    MULMOD
-    // stack: lambda^2, x1, x0, lambda, x0, y0, x1, y1, retdest
-    %submod
-    // stack: lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest
-    %submod
-    // stack: x2, lambda, x0, y0, x1, y1, retdest
+    DUP2  DUP5
+    // stack:                     x1, x0, lambda, x0, y0, x1, y1, retdest
+    DUP3
+    // stack:          lambda   , x1, x0, lambda, x0, y0, x1, y1, retdest
+    DUP1  MULFP254
+    // stack:          lambda^2 , x1, x0, lambda, x0, y0, x1, y1, retdest
+    SUBFP254
+    // stack:          lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest
+    SUBFP254
+    // stack:                         x2, lambda, x0, y0, x1, y1, retdest
 
     // Compute y2 = lambda*(x1 - x2) - y1
-    %bn_base
-    // stack: N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP2
-    // stack: x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP7
-    // stack: x1, x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    %submod
-    // stack: x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: lambda, x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    MULMOD
-    // stack: lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
+    DUP1
+    // stack:                    x2 , x2, lambda, x0, y0, x1, y1, retdest
+    DUP6
+    // stack:               x1 , x2 , x2, lambda, x0, y0, x1, y1, retdest
+    SUBFP254
+    // stack:               x1 - x2 , x2, lambda, x0, y0, x1, y1, retdest
+    DUP3
+    // stack:     lambda ,  x1 - x2 , x2, lambda, x0, y0, x1, y1, retdest
+    MULFP254
+    // stack:     lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
     DUP7
     // stack: y1, lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
     SWAP1
     // stack: lambda * (x1 - x2), y1, x2, lambda, x0, y0, x1, y1, retdest
-    %submod
-    // stack: y2, x2, lambda, x0, y0, x1, y1, retdest
+    SUBFP254
+    // stack:                     y2, x2, lambda, x0, y0, x1, y1, retdest
 
     // Return x2,y2
     %stack (y2, x2, lambda, x0, y0, x1, y1, retdest) -> (retdest, x2, y2)
@@ -154,24 +124,20 @@ ec_add_valid_points_with_lambda:
 // BN254 elliptic curve addition.
 // Assumption: (x0,y0) and (x1,y1) are valid points and x0 == x1
 ec_add_equal_first_coord:
-    // stack: x0, y0, x1, y1, retdest with x0 == x1
+    // stack:           x0, y0, x1, y1, retdest with x0 == x1
 
     // Check if the points are equal
-    DUP2
-    // stack: y0, x0, y0, x1, y1, retdest
-    DUP5
-    // stack: y1, y0, x0, y0, x1, y1, retdest
+    DUP2  DUP5
+    // stack: y1  , y0, x0, y0, x1, y1, retdest
     EQ
     // stack: y1 == y0, x0, y0, x1, y1, retdest
     %jumpi(ec_add_equal_points)
-    // stack: x0, y0, x1, y1, retdest
+    // stack:           x0, y0, x1, y1, retdest
 
     // Otherwise, one is the negation of the other so we can return (0,0).
     %pop4
-    // stack: retdest
-    PUSH 0
-    // stack: 0, retdest
-    PUSH 0
+    // stack:       retdest
+    PUSH 0  PUSH 0
     // stack: 0, 0, retdest
     SWAP2
     // stack: retdest, 0, 0
@@ -182,37 +148,29 @@ ec_add_equal_first_coord:
 // Assumption: x0 == x1 and y0 == y1
 // Standard doubling formula.
 ec_add_equal_points:
-    // stack: x0, y0, x1, y1, retdest
-
+    // stack:                 x0, y0, x1, y1, retdest
     // Compute lambda = 3/2 * x0^2 / y0
-    %bn_base
-    // stack: N, x0, y0, x1, y1, retdest
-    %bn_base
-    // stack: N, N, x0, y0, x1, y1, retdest
-    DUP3
-    // stack: x0, N, N, x0, y0, x1, y1, retdest
+
     DUP1
-    // stack: x0, x0, N, N, x0, y0, x1, y1, retdest
-    MULMOD
-    // stack: x0^2, N, x0, y0, x1, y1, retdest with
-    PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5 // 3/2 in the base field
-    // stack: 3/2, x0^2, N, x0, y0, x1, y1, retdest
-    MULMOD
-    // stack: 3/2 * x0^2, x0, y0, x1, y1, retdest
+    // stack:           x0  , x0, y0, x1, y1, retdest
+    DUP1  MULFP254
+    // stack:           x0^2, x0, y0, x1, y1, retdest
+    %bn_3_over_2
+    // stack:     3/2 , x0^2, x0, y0, x1, y1, retdest
+    MULFP254
+    // stack:     3/2 * x0^2, x0, y0, x1, y1, retdest
     DUP3
     // stack: y0, 3/2 * x0^2, x0, y0, x1, y1, retdest
     %divfp254
-    // stack: lambda, x0, y0, x1, y1, retdest
+    // stack:         lambda, x0, y0, x1, y1, retdest
     %jump(ec_add_valid_points_with_lambda)
 
 // BN254 elliptic curve doubling.
 // Assumption: (x0,y0) is a valid point.
 // Standard doubling formula.
 global ec_double:
-    // stack: x0, y0, retdest
-    DUP2
-    // stack: y0, x0, y0, retdest
-    DUP2
+    // stack:         x0, y0, retdest
+    DUP2  DUP2    
     // stack: x0, y0, x0, y0, retdest
     %jump(ec_add_equal_points)
 
@@ -221,79 +179,65 @@ global ec_double:
     PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47
 %endmacro
 
-// Assumption: x, y < N and 2N < 2^256.
-// Note: Doesn't hold for Secp256k1 base field.
-%macro submod
-    // stack: x, y
-    %bn_base
-    // stack: N, x, y
-    ADD
-    // stack: N + x, y // Doesn't overflow since 2N < 2^256
-    SUB
-    // stack: N + x - y // Doesn't underflow since y < N
-    %bn_base
-    // stack: N, N + x - y
-    SWAP1
-    // stack: N + x - y, N
-    MOD
-    // stack: (N + x - y) % N = (x-y) % N
+%macro bn_3_over_2
+    // 3/2 in the base field
+    PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5
 %endmacro
 
 // Check if (x,y) is a valid curve point.
-// Puts y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) on top of the stack.
+// Returns range & curve || is_identity
+// where
+//     range = (x < N) & (y < N) 
+//     curve = y^2 == (x^3 + 3) 
+//     ident = (x,y) == (0,0)
+
 %macro ec_check
-    // stack: x, y
-    %bn_base
-    // stack: N, x, y
-    DUP2
-    // stack: x, N, x, y
-    LT
-    // stack: x < N, x, y
-    %bn_base
-    // stack: N, x < N, x, y
-    DUP4
-    // stack: y, N, x < N, x, y
-    LT
-    // stack: y < N, x < N, x, y
-    AND
-    // stack: (y < N) & (x < N), x, y
-    %stack (b, x, y) -> (x, x, @BN_BASE, x, @BN_BASE, @BN_BASE, x, y, b)
-    // stack: x, x, N, x, N, N, x, y, b
-    MULMOD
-    // stack: x^2 % N, x, N, N, x, y, b
-    MULMOD
-    // stack: x^3 % N, N, x, y, b
-    PUSH 3
-    // stack: 3, x^3 % N, N, x, y, b
-    ADDMOD
-    // stack: (x^3 + 3) % N, x, y, b
-    DUP3
-    // stack: y, (x^3 + 3) % N, x, y, b
-    %bn_base
-    // stack: N, y, (x^3 + 3) % N, x, y, b
-    SWAP1
-    // stack: y, N, (x^3 + 3) % N, x, y, b
+    // stack:                       x, y
     DUP1
-    // stack: y, y, N, (x^3 + 3) % N, x, y, b
-    MULMOD
-    // stack: y^2 % N, (x^3 + 3) % N, x, y, b
-    EQ
-    // stack: y^2 % N == (x^3 + 3) % N, x, y, b
+    // stack:                    x, x, y
+    %bn_base
+    // stack:                N , x, x, y
+    DUP1
+    // stack:             N, N , x, x, y
+    DUP5
+    // stack:         y , N, N , x, x, y
+    LT  
+    // stack:         y < N, N , x, x, y
     SWAP2
-    // stack: y, x, y^2 % N == (x^3 + 3) % N, b
-    %ec_isidentity
-    // stack: (x,y)==(0,0), y^2 % N == (x^3 + 3) % N, b
-    SWAP2
-    // stack: b, y^2 % N == (x^3 + 3) % N, (x,y)==(0,0)
+    // stack:         x , N, y < N, x, y
+    LT
+    // stack:         x < N, y < N, x, y
     AND
-    // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N), (x,y)==(0,0)
+    // stack:                range, x, y
+    SWAP2
+    // stack:                y, x, range
+    DUP2 
+    // stack:           x  , y, x, range
+    DUP1  DUP1  MULFP254  MULFP254
+    // stack:           x^3, y, x, range
+    PUSH 3  ADDFP254
+    // stack:       3 + x^3, y, x, range
+    DUP2
+    // stack:  y  , 3 + x^3, y, x, range
+    DUP1  MULFP254
+    // stack:  y^2, 3 + x^3, y, x, range
+    EQ
+    // stack:         curve, y, x, range
+    SWAP2
+    // stack:         x, y, curve, range
+    %ec_isidentity
+    // stack:       ident , curve, range
+    SWAP2
+    // stack:       range , curve, ident
+    AND
+    // stack:       range & curve, ident
     OR
-    // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0)
+    // stack:                   is_valid
 %endmacro
 
 // Check if (x,y)==(0,0)
 %macro ec_isidentity
-    // stack: x, y
+    // stack: x , y
     OR
     // stack: x | y
     ISZERO
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
index 09d7d336..578eddae 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
@@ -1,4 +1,4 @@
-/// Division modulo 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47, the BN254 base field order
+/// Division modulo the BN254 prime
 
 // Returns y * (x^-1) where the inverse is taken modulo N
 %macro divfp254

From 48149f93d153fa8840c72c71763e9c8af86d604a Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 5 Dec 2022 22:50:45 -0800
Subject: [PATCH 031/201] inverse

---
 evm/src/cpu/kernel/aggregator.rs                    |  2 +-
 .../curve/bn254/curve_arithmetic/miller_loop.asm    |  2 +-
 .../field_arithmetic/{moddiv.asm => inverse.asm}    | 13 +++++++++++++
 evm/src/cpu/kernel/interpreter.rs                   |  3 ++-
 4 files changed, 17 insertions(+), 3 deletions(-)
 rename evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/{moddiv.asm => inverse.asm} (62%)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index bb2dce92..c0f8de77 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -29,7 +29,7 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/moddiv.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/field_macros.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 3f625d16..2b630186 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -70,7 +70,7 @@ miller_end:
 
 miller_one:
     // stack:               0xnm, times, O, P, Q, out, retdest
-    PUSH 0x10  DUP2  LT       
+    DUP1  %gt_const(0x10) 
     // stack:        skip?, 0xnm, times, O, P, Q, out, retdest
     %jumpi(miller_zero)
     // stack:               0xnm, times, O, P, Q, out, retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
similarity index 62%
rename from evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 578eddae..6d5169e8 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -20,3 +20,16 @@
     %assert_eq_const(1)
     // stack:           x^-1
 %endmacro
+
+// Non-deterministically provide the inverse modulo N.
+%macro inverse
+    // stack: x
+    PROVER_INPUT(ff::bn254_base::inverse)
+    // stack: x^-1 , x
+    SWAP1  DUP2
+    // stack: x^-1 , x, x^-1
+    MULFP254
+    // stack: x^-1 * x, x^-1
+    %assert_eq_const(1)
+    // stack:           x^-1
+%endmacro
diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index b63911ed..f075b11d 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -388,7 +388,8 @@ impl<'a> Interpreter<'a> {
     // should be changed to the proper implementation prime
 
     fn bn_base_order_() -> U256 {
-        U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47").unwrap()
+        U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47")
+            .unwrap()
     }
 
     fn run_addfp254(&mut self) {

From 0c183467aa960cab05a2383c22664ed1fb051698 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 5 Dec 2022 23:28:57 -0800
Subject: [PATCH 032/201] fmt

---
 .../bn254/curve_arithmetic/curve_mul.asm      | 57 +++++++++----------
 .../curve/bn254/field_arithmetic/inverse.asm  | 53 ++++++++++++-----
 2 files changed, 66 insertions(+), 44 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm
index b1472812..843053e9 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm
@@ -1,54 +1,49 @@
 // BN254 elliptic curve scalar multiplication.
 // Recursive implementation, same algorithm as in `exp.asm`.
 global ec_mul:
-    // Uncomment for test inputs.
-    // PUSH 0xdeadbeef
-    // PUSH 0xd
-    // PUSH 2
-    // PUSH 1
-    // stack: x, y, s, retdest
+    // stack:                x, y, s, retdest
     DUP2
-    // stack: y, x, y, s, retdest
+    // stack:            y , x, y, s, retdest
     DUP2
-    // stack: x, y, x, y, s, retdest
+    // stack:          x,y , x, y, s, retdest
     %ec_isidentity
-    // stack: (x,y)==(0,0), x, y, s, retdest
+    // stack:  (0,0)==(x,y), x, y, s, retdest
     %jumpi(ret_zero_ec_mul)
-    // stack: x, y, s, retdest
+    // stack:                x, y, s, retdest
     DUP2
-    // stack: y, x, y, s, retdest
+    // stack:             y, x, y, s, retdest
     DUP2
-    // stack: x, y, x, y, s, retdest
+    // stack:          x, y, x, y, s, retdest
     %ec_check
     // stack: isValid(x, y), x, y, s, retdest
     %jumpi(ec_mul_valid_point)
-    // stack: x, y, s, retdest
+    // stack:                x, y, s, retdest
     %pop3
     %ec_invalid_input
 
 // Same algorithm as in `exp.asm`
 ec_mul_valid_point:
-    // stack: x, y, s, retdest
+    // stack:    x, y, s, retdest
     DUP3
     // stack: s, x, y, s, retdest
     %jumpi(step_case)
-    // stack: x, y, s, retdest
+    // stack:    x, y, s, retdest
     %jump(ret_zero_ec_mul)
 
 step_case:
-    // stack: x, y, s, retdest
+    // stack:                                                 x, y, s, retdest
     PUSH recursion_return
-    // stack: recursion_return, x, y, s, retdest
+    // stack:                               recursion_return, x, y, s, retdest
     PUSH 2
-    // stack: 2, recursion_return, x, y, s, retdest
+    // stack:                            2, recursion_return, x, y, s, retdest
     DUP5
-    // stack: s, 2, recursion_return, x, y, s, retdest
+    // stack:                        s , 2, recursion_return, x, y, s, retdest
     DIV
-    // stack: s / 2, recursion_return, x, y, s, retdest
+    // stack:                        s / 2, recursion_return, x, y, s, retdest
     PUSH step_case_contd
-    // stack: step_case_contd, s / 2, recursion_return, x, y, s, retdest
+    // stack:       step_case_contd, s / 2, recursion_return, x, y, s, retdest
     DUP5
-    // stack: y, step_case_contd, s / 2, recursion_return, x, y, s, retdest
+    // stack:    y, step_case_contd, s / 2, recursion_return, x, y, s, retdest
     DUP5
     // stack: x, y, step_case_contd, s / 2, recursion_return, x, y, s, retdest
     %jump(ec_double)
@@ -59,11 +54,11 @@ step_case_contd:
     %jump(ec_mul_valid_point)
 
 recursion_return:
-    // stack: x', y', x, y, s, retdest
+    // stack:     x', y', x, y, s, retdest
     SWAP4
-    // stack: s, y', x, y, x', retdest
+    // stack:     s, y', x, y, x', retdest
     PUSH 1
-    // stack: 1, s, y', x, y, x', retdest
+    // stack:  1, s, y', x, y, x', retdest
     AND
     // stack: s & 1, y', x, y, x', retdest
     SWAP1
@@ -77,17 +72,17 @@ recursion_return:
     SWAP1
     // stack: s & 1, x', y', x, y, retdest
     %jumpi(odd_scalar)
-    // stack: x', y', x, y, retdest
+    // stack:        x', y', x, y, retdest
     SWAP3
-    // stack: y, y', x, x', retdest
+    // stack:        y, y', x, x', retdest
     POP
-    // stack: y', x, x', retdest
+    // stack:           y', x, x', retdest
     SWAP1
-    // stack: x, y', x', retdest
+    // stack:           x, y', x', retdest
     POP
-    // stack: y', x', retdest
+    // stack:              y', x', retdest
     SWAP2
-    // stack: retdest, x', y'
+    // stack:              retdest, x', y'
     JUMP
 
 odd_scalar:
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 6d5169e8..d164aed1 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -10,7 +10,7 @@
 
 // Non-deterministically provide the inverse modulo N.
 %macro inverse
-    // stack: x
+    // stack:        x
     PROVER_INPUT(ff::bn254_base::inverse)
     // stack: x^-1 , x
     SWAP1  DUP2
@@ -21,15 +21,42 @@
     // stack:           x^-1
 %endmacro
 
-// Non-deterministically provide the inverse modulo N.
-%macro inverse
-    // stack: x
-    PROVER_INPUT(ff::bn254_base::inverse)
-    // stack: x^-1 , x
-    SWAP1  DUP2
-    // stack: x^-1 , x, x^-1
-    MULFP254
-    // stack: x^-1 * x, x^-1
-    %assert_eq_const(1)
-    // stack:           x^-1
-%endmacro
+global inverse_fp12:
+    // stack:                           ptr, inv, retdest
+    // DUP1  %load_fp12
+    // stack:                        f, ptr, inv, retdest
+    DUP14
+    // stack:                   inv, f, ptr, inv, retdest 
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    // stack:             f^-1, inv, f, ptr, inv, retdest
+    DUP13
+    // stack:        inv, f^-1, inv, f, ptr, inv, retdest
+    // %store_fp12  POP
+    // stack:                        f, ptr, inv, retdest
+    %pop4  %pop4  %pop4
+    // stack:                           ptr, inv, retdest 
+    PUSH check_inv  PUSH 200
+    // stack:           200, check_inv, ptr, inv, retdest 
+    DUP4  DUP4
+    // stack: ptr, inv, 200, check_inv, ptr, inv, retdest 
+    %jump(mul_fp12)
+global check_inv:
+    // stack:                      200, ptr, inv, retdest
+    // %eq_unit_fp12
+    // stack:                  is_unit, ptr, inv, retdest
+    %assert_nonzero
+    // stack:                           ptr, inv, retdest
+    POP  SWAP1  
+    // stack:                                retdest, inv
+    JUMP

From 51dc601a94e4d6959816a02694c138338cb7bc7c Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 5 Dec 2022 23:37:22 -0800
Subject: [PATCH 033/201] call curve add

---
 .../bn254/curve_arithmetic/miller_loop.asm    | 38 ++++++++++---------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 2b630186..501eab61 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -118,15 +118,16 @@ mul_tangent_1:
     // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
     %jump(mul_fp12_sparse)
 mul_tangent_2:
-    // stack: out, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    POP  DUP5  DUP5
-    // stack:   O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    // %ec_double_bn254
-    // stack: 2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    SWAP5  SWAP1  SWAP6  SWAP1
-    // stack: 2*O, retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
-    %pop2
-    // stack:      retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
+    // stack:             out, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    POP  PUSH after_double
+    // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    DUP5  DUP5
+    // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    %jump(ec_double)
+after_double:
+    // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    SWAP5  POP  SWAP5  POP
+    // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
     JUMP
 
 
@@ -155,14 +156,17 @@ mul_cord:
     // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     %jump(mul_fp12_sparse)
 mul_cord_1:
-    // stack:        0xnm, times, O  , P, Q, out
-    DUP6  DUP6  DUP6  DUP6
-    // stack: O , P, 0xnm, times, O  , P, Q, out
-    // %ec_add_bn254
-    // stack: O + P, 0xnm, times, O  , P, Q, out
-    SWAP4  SWAP1  SWAP5  SWAP1
-    // stack:     O, 0xnm, times, O+P, P, Q, out
-    %pop2  %jump(miller_one)
+    // stack:                   0xnm, times, O  , P, Q, out
+    PUSH after_add
+    // stack:        after_add, 0xnm, times, O  , P, Q, out
+    DUP7  DUP7  DUP7  DUP7
+    // stack: O , P, after_add, 0xnm, times, O  , P, Q, out
+    %jump(ec_add_valid_points)
+after_add:
+    // stack:            O + P, 0xnm, times, O  , P, Q, out
+    SWAP4  POP  SWAP4  POP
+    // stack:                   0xnm, times, O+P, P, Q, out
+    %jump(miller_one)
 
 
 /// def store_cord(p1x, p1y, p2x, p2y, qx, qy):

From 779a1a3f98fe476eff8f27a9f4285bef0f4df513 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 6 Dec 2022 16:01:02 -0800
Subject: [PATCH 034/201] power function

---
 .../bn254/curve_arithmetic/miller_loop.asm    |  20 ++--
 .../curve/bn254/curve_arithmetic/power.asm    | 103 ++++++++++++++++++
 2 files changed, 113 insertions(+), 10 deletions(-)
 create mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 501eab61..cd58b677 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -184,7 +184,7 @@ after_add:
     // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     SUBFP254
     // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    PUSH 100  %mstore_kernel_general
+    %mstore_kernel_general(100)
     // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     SWAP3
     // stack:                    p2y , p1y, p2x , p1x, qx, qx_, qy, qy_
@@ -198,21 +198,21 @@ after_add:
     // stack:                    qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
     DUP5  MULFP254
     // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
-    PUSH 108  %mstore_kernel_general
+    %mstore_kernel_general(108)
     // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
     SWAP1
     // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
     DUP2  MULFP254
     // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
-    PUSH 102  %mstore_kernel_general
+    %mstore_kernel_general(102)
     // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
     MULFP254
     // stack:                            (p2y - p1y)qx_, p1x - p2x, qy_
-    PUSH 103  %mstore_kernel_general
+    %mstore_kernel_general(103)
     // stack:                                            p1x - p2x, qy_
     MULFP254
     // stack:                                            (p1x - p2x)qy_
-    PUSH 109  %mstore_kernel_general
+    %mstore_kernel_general(109)
 %endmacro
 
 
@@ -233,7 +233,7 @@ after_add:
     // stack:     py**2 , 9, px, py, qx, qx_, qy, qy_
     SUBFP254
     // stack:     py**2 - 9, px, py, qx, qx_, qy, qy_
-    PUSH 100  %mstore_kernel_general
+    %mstore_kernel_general(100)
     // stack:                px, py, qx, qx_, qy, qy_
     DUP1  MULFP254
     // stack:             px**2, py, qx, qx_, qy, qy_
@@ -245,7 +245,7 @@ after_add:
     // stack:           qx, py, -3px**2, qx_, qy, qy_
     DUP3  MULFP254
     // stack: (-3*px**2)qx, py, -3px**2, qx_, qy, qy_ 
-    PUSH 102  %mstore_kernel_general
+    %mstore_kernel_general(102)
     // stack:               py, -3px**2, qx_, qy, qy_ 
     PUSH 2  MULFP254
     // stack:              2py, -3px**2, qx_, qy, qy_ 
@@ -253,13 +253,13 @@ after_add:
     // stack:              qy, -3px**2, qx_, 2py, qy_ 
     DUP4  MULFP254
     // stack:         (2py)qy, -3px**2, qx_, 2py, qy_ 
-    PUSH 108  %mstore_kernel_general
+    %mstore_kernel_general(108)
     // stack:                  -3px**2, qx_, 2py, qy_ 
     MULFP254
     // stack:                  (-3px**2)qx_, 2py, qy_ 
-    PUSH 103  %mstore_kernel_general
+    %mstore_kernel_general(103)
     // stack:                                2py, qy_ 
     MULFP254
     // stack:                                (2py)qy_ 
-    PUSH 109  %mstore_kernel_general
+    %mstore_kernel_general(109)
 %endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm
new file mode 100644
index 00000000..198c3dfa
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm
@@ -0,0 +1,103 @@
+/// def power(square):
+///     power_init()
+///     power_loop_0()
+///     power_loop_1()
+///     power_loop_2()
+///     power_return()
+///
+/// def power_init()
+///     y0, y1, y2 = 1, 1, 1
+///
+/// def power_return()
+///     y0  = y0^{-1}
+///     y1 *= y0 * (y2**2)
+///     y1  = frob_fp12_1(y1)
+///     y2  = frob_fp12_2(y2)
+///     return y2 * y1 * y0 
+
+global power:
+    // stack:                                           sqr, out, retdest
+    PUSH 1  DUP1  DUP1
+    // stack:                                  1, 1, 1, sqr, out, retdest
+    %mstore_kernel_general(200)  %mstore_kernel_general(212)  %mstore_kernel_general(224)
+    // stack:                                           sqr, out, retdest  {200: y0, 212: y1, 224: y2}
+    PUSH power_loop_2  PUSH power_loop_1  PUSH power_return    
+    // stack: power_return, power_loop_1, power_loop_2, sqr, out, retdest  {200: y0, 212: y1, 224: y2}
+    SWAP3
+    // stack: sqr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
+    %jump(power_loop_0)
+
+power_return:
+    // stack:                                out, retdest  {200: y0, 212: y1, 224: y2}
+    PUSH power_return_1  PUSH 236  PUSH 200
+    // stack:      200, 236, power_return_1, out, retdest  {200: y0, 212: y1, 224: y2}
+    %jump(inverse_fp12)
+power_return_1:
+    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
+    PUSH power_return_2  PUSH 248  PUSH 224
+    // stack:      224, 248, power_return_2, out, retdest  {200: y0, 212: y1, 224: y2}
+    %jump(square_fp12)
+power_return_2:
+    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2, 248: y2^2}
+    PUSH power_return_3  PUSH 248  PUSH 224  PUSH 248
+    // stack: 248, 236, 248, power_return_3, out, retdest  {236: y0, 212: y1, 224: y2, 248: y2^2}
+    %jump(mul_fp12)
+power_return_3:
+    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2, 248: y0*y2^2}
+    PUSH power_return_4  PUSH 212  PUSH 248  PUSH 212
+    // stack: 212, 248, 212, power_return_4, out, retdest  {236: y0, 212: y1, 224: y2, 248: y0*y2^2}
+    %jump(mul_fp12)
+power_return_4:
+    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
+    PUSH 212
+    // stack:                           212, out, retdest  {236: y0, 212: y1, 224: y2}
+    %frob_fp12_1
+    // stack:                           212, out, retdest  {236: y0, 212: y1, 224: y2}
+    POP
+    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
+    PUSH 224  DUP1
+    // stack:                      224, 224, out, retdest  {236: y0, 212: y1, 224: y2}
+    %frob_fp12_2
+    // stack:                           224, out, retdest  {236: y0, 212: y1, 224: y2}
+    POP
+    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
+    PUSH power_return_5  SWAP1
+    // stack:                out, power_return_5, retdest  {236: y0, 212: y1, 224: y2}
+    PUSH 236  PUSH 212
+    // stack:      212, 236, out, power_return_5, retdest  {236: y0, 212: y1, 224: y2}
+    %jump(mul_fp12)
+power_return_5:
+    // stack:                                 out, retdest  {236: y0, 212: y1, 224: y2}
+    PUSH 224  DUP2
+    // stack:                       out, 224, out, retdest  {236: y0, 212: y1, 224: y2}
+    %jump(mul_fp12)
+
+/// def power_loop_0():
+///     for i in range(1, len4):
+///         if EXP4[-i]:
+///             y1 *= square
+///         if EXP2[-i]:
+///             y2 *= square
+///         if EXP0[-i]:
+///             y0 *= square
+///         square = square_fp12(square)
+///     y1 *= square
+///
+/// def power_loop_1():
+///     for i in range(len4, len2):
+///        if EXP2[-i]:
+///            y2 *= square
+///        if EXP0[-i]:
+///            y0 *= square
+///        square = square_fp12(square)
+///     y2 *= square
+///
+/// def power_loop_2():
+///     for i in range(len2, len0):
+///         if EXP0[-i]:
+///             y0 *= square
+///         square = square_fp12(square)
+///     y0 *= square
+
+
+

From 77ec96f6f4900f98e4d876cfd671a69b899f483d Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 6 Dec 2022 18:30:12 -0800
Subject: [PATCH 035/201] power function complete

---
 .../bn254/curve_arithmetic/miller_loop.asm    |   2 +-
 .../curve/bn254/curve_arithmetic/power.asm    | 158 ++++++++++++++++--
 2 files changed, 146 insertions(+), 14 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index cd58b677..c6eaad3a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -70,7 +70,7 @@ miller_end:
 
 miller_one:
     // stack:               0xnm, times, O, P, Q, out, retdest
-    DUP1  %gt_const(0x10) 
+    DUP1  %lt_const(0x10) 
     // stack:        skip?, 0xnm, times, O, P, Q, out, retdest
     %jumpi(miller_zero)
     // stack:               0xnm, times, O, P, Q, out, retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm
index 198c3dfa..949a42c8 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm
@@ -16,15 +16,17 @@
 ///     return y2 * y1 * y0 
 
 global power:
-    // stack:                                           sqr, out, retdest
+    // stack:                                                            sqr, out, retdest
     PUSH 1  DUP1  DUP1
-    // stack:                                  1, 1, 1, sqr, out, retdest
+    // stack:                                                   1, 1, 1, sqr, out, retdest
     %mstore_kernel_general(200)  %mstore_kernel_general(212)  %mstore_kernel_general(224)
-    // stack:                                           sqr, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:                                                            sqr, out, retdest  {200: y0, 212: y1, 224: y2}
     PUSH power_loop_2  PUSH power_loop_1  PUSH power_return    
-    // stack: power_return, power_loop_1, power_loop_2, sqr, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:                  power_return, power_loop_1, power_loop_2, sqr, out, retdest  {200: y0, 212: y1, 224: y2}
     SWAP3
-    // stack: sqr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:             sqr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
+    PUSH 65  PUSH 62  PUSH 65
+    // stack: 65, 62, 65, sqr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
     %jump(power_loop_0)
 
 power_return:
@@ -67,37 +69,167 @@ power_return_4:
     // stack:      212, 236, out, power_return_5, retdest  {236: y0, 212: y1, 224: y2}
     %jump(mul_fp12)
 power_return_5:
-    // stack:                                 out, retdest  {236: y0, 212: y1, 224: y2}
+    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
     PUSH 224  DUP2
-    // stack:                       out, 224, out, retdest  {236: y0, 212: y1, 224: y2}
+    // stack:                      out, 224, out, retdest  {236: y0, 212: y1, 224: y2}
     %jump(mul_fp12)
 
 /// def power_loop_0():
 ///     for i in range(1, len4):
-///         if EXP4[-i]:
+///         abc = load(power_data_0)
+///         if a:
 ///             y1 *= square
-///         if EXP2[-i]:
+///         if b:
 ///             y2 *= square
-///         if EXP0[-i]:
+///         if c:
 ///             y0 *= square
 ///         square = square_fp12(square)
 ///     y1 *= square
 ///
 /// def power_loop_1():
 ///     for i in range(len4, len2):
-///        if EXP2[-i]:
+///        ab = load(power_data_1)
+///        if a:
 ///            y2 *= square
-///        if EXP0[-i]:
+///        if b:
 ///            y0 *= square
 ///        square = square_fp12(square)
 ///     y2 *= square
 ///
 /// def power_loop_2():
 ///     for i in range(len2, len0):
-///         if EXP0[-i]:
+///         a = load(power_data_1)
+///         if a:
 ///             y0 *= square
 ///         square = square_fp12(square)
 ///     y0 *= square
 
+power_loop_0:
+    // stack:                                     i  , j, k, sqr, retdest
+    DUP1  ISZERO
+    // stack:                             break?, i  , j, k, sqr, retdest
+    %jumpi(power_loop_0_end)
+    // stack:                                     i  , j, k, sqr, retdest
+    %sub_const(1)
+    // stack:                                     i-1, j, k, sqr, retdest
+    DUP1  %mload_kernel_code(power_data_0)
+    // stack:                                abc, i-1, j, k, sqr, retdest
+    DUP1  %lt_const(100)
+    // stack:                         skip?, abc, i-1, j, k, sqr, retdest
+    %jumpi(power_loop_0_b)
+    // stack:                                abc, i-1, j, k, sqr, retdest
+    %sub_const(100)
+    // stack:                                 bc, i-1, j, k, sqr, retdest
+    PUSH power_loop_0_b  PUSH 212  DUP1  DUP8
+    // stack: sqr, 212, 212, power_loop_0_b,  bc, i-1, j, k, sqr, retdest
+    %jump(mul_fp12)
+power_loop_0_b:
+    // stack:                               bc, i, j, k, sqr, retdest
+    DUP1  %lt_const(10)
+    // stack:                        skip?, bc, i, j, k, sqr, retdest
+    %jumpi(power_loop_0_c)
+    // stack:                               bc, i, j, k, sqr, retdest
+    %sub_const(10)
+    // stack:                                c, i, j, k, sqr, retdest
+    PUSH power_loop_0_c  PUSH 224  DUP1  DUP8
+    // stack: sqr, 224, 224, power_loop_0_c, c, i, j, k, sqr, retdest
+    %jump(mul_fp12)
+power_loop_0_c:
+    // stack:                              c, i, j, k, sqr, retdest
+    DUP1  ISZERO
+    // stack:                       skip?, c, i, j, k, sqr, retdest
+    %jumpi(power_loop_0_sq)
+    // stack:                              c, i, j, k, sqr, retdest
+    POP
+    // stack:                                 i, j, k, sqr, retdest
+    PUSH power_loop_0_sq  PUSH 200  DUP1  DUP7
+    // stack: sqr, 200, 200, power_loop_0_sq, i, j, k, sqr, retdest
+    %jump(mul_fp12)
+power_loop_0_sq:
+    // stack:                         i, j, k, sqr, retdest
+    PUSH power_loop_0  DUP5  DUP1
+    // stack: sqr, sqr, power_loop_0, i, j, k, sqr, retdest
+    %jump(mul_fp12)
+power_loop_0_end:
+    // stack:                           0, j, k, sqr, retdest
+    POP  
+    // stack:                              j, k, sqr, retdest
+    PUSH power_loop_1  PUSH 212  DUP1  DUP6
+    // stack: sqr, 212, 212, power_loop_1, j, k, sqr, retdest
+    %jump(mul_fp12)
+
+power_loop_1:
+    // stack:                                   j  , k, sqr, retdest
+    DUP1  ISZERO
+    // stack:                           break?, j  , k, sqr, retdest
+    %jumpi(power_loop_1_end)
+    // stack:                                   j  , k, sqr, retdest
+    %sub_const(1)
+    // stack:                                   j-1, k, sqr, retdest
+    DUP1  %mload_kernel_code(power_data_1)
+    // stack:                               ab, j-1, k, sqr, retdest
+    DUP1  %lt_const(10)
+    // stack:                        skip?, ab, j-1, k, sqr, retdest
+    %jumpi(power_loop_1_b)
+    // stack:                               ab, j-1, k, sqr, retdest
+    %sub_const(10)
+    // stack:                                b, j-1, k, sqr, retdest
+    PUSH power_loop_1_b  PUSH 224  DUP1  DUP7
+    // stack: sqr, 224, 224, power_loop_1_b, b, j-1, k, sqr, retdest
+    %jump(mul_fp12)
+power_loop_1_b:
+    // stack:                              b, j, k, sqr, retdest
+    DUP1  ISZERO
+    // stack:                       skip?, b, j, k, sqr, retdest
+    %jumpi(power_loop_1_sq)
+    // stack:                              b, j, k, sqr, retdest
+    POP
+    // stack:                                 j, k, sqr, retdest
+    PUSH power_loop_1_sq  PUSH 200  DUP1  DUP6
+    // stack: sqr, 200, 200, power_loop_1_sq, j, k, sqr, retdest
+    %jump(mul_fp12)
+power_loop_1_sq:
+    // stack:                         j, k, sqr, retdest
+    PUSH power_loop_1  DUP4  DUP1
+    // stack: sqr, sqr, power_loop_1, j, k, sqr, retdest
+    %jump(square_fp12)
+power_loop_1_end:
+    // stack:                           0, k, sqr, retdest
+    POP  
+    // stack:                              k, sqr, retdest
+    PUSH power_loop_2  PUSH 224  DUP1  DUP6
+    // stack: sqr, 224, 224, power_loop_2, k, sqr, retdest
+    %jump(mul_fp12)
 
 
+power_loop_2:
+    // stack:                                 k  , sqr, retdest
+    DUP1  ISZERO
+    // stack:                         break?, k  , sqr, retdest
+    %jumpi(power_loop_2_end)
+    // stack:                                 k  , sqr, retdest
+    %sub_const(1)
+    // stack:                                 k-1, sqr, retdest
+    DUP1  %mload_kernel_code(power_data_2)
+    // stack:                              a, k-1, sqr, retdest
+    DUP1  ISZERO
+    // stack:                       skip?, a, k-1, sqr, retdest
+    %jumpi(power_loop_2_sq)
+    // stack:                              a, k-1, sqr, retdest
+    POP
+    // stack:                                 k-1, sqr, retdest
+    PUSH power_loop_2_sq  PUSH 200  DUP1  DUP5
+    // stack: sqr, 200, 200, power_loop_2_sq, k-1, sqr, retdest
+    %jump(mul_fp12)
+power_loop_2_sq:
+    // stack:                         k, sqr, retdest
+    PUSH power_loop_2  DUP3  DUP1
+    // stack: sqr, sqr, power_loop_2, k, sqr, retdest
+    %jump(square_fp12)
+power_loop_2_end:
+    // stack:                           0, sqr, retdest
+    POP  
+    // stack:                              sqr, retdest
+    PUSH power_return  PUSH 200  DUP1  DUP4
+    // stack: sqr, 200, 200, power_return, sqr, retdest
+    %jump(mul_fp12)

From 03c14d0392bf29b01f666a51b062741139b5741b Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 6 Dec 2022 19:21:21 -0800
Subject: [PATCH 036/201] fixed miller + conts

---
 .../bn254/curve_arithmetic/constants.asm      | 42 +++++++++++++++----
 .../bn254/curve_arithmetic/miller_loop.asm    | 31 ++++++++++----
 2 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
index 573d4c04..b0cea9e3 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@@ -1,9 +1,35 @@
 global miller_data:
-    BYTES 0x21, 0x13, 0x11, 0x61, 0x52, 0x24, 0x21, 0x21
-    BYTES 0x11, 0x25, 0x13, 0x15, 0x44, 0x12, 0x21, 0x13 
-    BYTES 0x11, 0x11, 0x32, 0x33, 0x14, 0x21, 0x11, 0x13 
-    BYTES 0x12, 0x11, 0x11, 0x21, 0x11, 0x46, 0x11, 0x22 
-    BYTES 0x31, 0x11, 0x24, 0x11, 0x11, 0x26, 0x16, 0x21 
-    BYTES 0x21, 0x21, 0x11, 0x13, 0x15, 0x11, 0x34, 0x21 
-    BYTES 0x12, 0x11, 0x17, 0x21, 0x23, 0x12, 0x34, 0x11 
-    BYTES 0x32, 0x32, 0x12, 0x13, 0x22, 0x15
\ No newline at end of file
+    BYTES 0x60, 0x12, 0x22, 0x11, 0x51, 0x54, 0x14, 0x11
+    BYTES 0x13, 0x12, 0x34, 0x11, 0x32, 0x21, 0x42, 0x14
+    BYTES 0x12, 0x14, 0x51, 0x22, 0x15, 0x11, 0x12, 0x31
+    BYTES 0x11, 0x24, 0x11, 0x11, 0x26, 0x16, 0x21, 0x21
+    BYTES 0x21, 0x11, 0x13, 0x15, 0x11, 0x34, 0x21, 0x12
+    BYTES 0x11, 0x17, 0x21, 0x23, 0x12, 0x34, 0x11, 0x32
+    BYTES 0x32, 0x12, 0x13, 0x22, 0x15
+
+global power_data_0:
+    BYTES 111, 010, 011, 111, 110, 101, 001, 100, 001, 100
+    BYTES 110, 110, 110, 011, 011, 101, 011, 101, 101, 111
+    BYTES 000, 011, 011, 001, 011, 001, 101, 100, 100, 000
+    BYTES 010, 100, 110, 010, 110, 100, 110, 101, 101, 001
+    BYTES 001, 110, 110, 110, 010, 110, 101, 001, 010, 010
+    BYTES 110, 110, 110, 010, 101, 110, 101, 010, 101, 001
+    BYTES 000, 111, 111, 110, 111
+
+global power_data_1:
+    BYTES 11, 01, 11, 10, 11, 10, 01, 10, 00, 01
+    BYTES 10, 11, 01, 11, 10, 01, 00, 00, 00, 01
+    BYTES 10, 01, 01, 10, 00, 01, 11, 00, 01, 00
+    BYTES 10, 11, 11, 00, 11, 10, 11, 00, 11, 01
+    BYTES 11, 11, 11, 01, 01, 00, 00, 11, 00, 11
+    BYTES 11, 01, 01, 10, 11, 10, 11, 10, 10, 00
+    BYTES 11, 10
+
+global power_data_2:
+    BYTES 0, 1, 1, 0, 0, 1, 1, 1, 1, 0
+    BYTES 0, 0, 1, 0, 0, 1, 1, 0, 1, 0
+    BYTES 1, 1, 1, 1, 0, 0, 1, 1, 1, 0
+    BYTES 1, 0, 1, 0, 0, 0, 0, 0, 1, 1
+    BYTES 0, 1, 0, 1, 0, 0, 1, 0, 0, 0
+    BYTES 1, 0, 1, 1, 1, 0, 1, 0, 1, 1
+    BYTES 0, 0, 1, 0, 0
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index c6eaad3a..675b04a1 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -5,7 +5,7 @@
 /// def miller_init():
 ///     out = 1
 ///     O = P
-///     times = 62
+///     times = 61
 ///
 /// def miller_loop():
 ///     while times:
@@ -26,9 +26,9 @@
 ///     mul_tangent()
 
 /// Note: miller_data was defined by
-/// (1) taking the binary expansion of the BN254 prime p
-/// (2) popping the head and appending a 0:
-///     exp = bin(p)[1:-1] + [0]
+/// (1) taking the binary expansion of N254, the size of the elliptic curve
+/// (2) popping the first and last elements, then appending a 0:
+///     exp = bin(N254)[1:-1] + [0]
 /// (3) counting the lengths of runs of 1s then 0s in exp, e.g.
 ///     exp = 1100010011110 => EXP = [(2,3), (1,2), (4,1)]
 /// (4) encoding each pair (n,m) as 0xnm:
@@ -46,13 +46,13 @@ global miller_init:
     // stack:        P, Q, out, retdest
     DUP2  DUP2
     // stack:     O, P, Q, out, retdest
-    PUSH 62
-    // stack: 62, O, P, Q, out, retdest
+    PUSH 61
+    // stack: 61, O, P, Q, out, retdest
 miller_loop:
     // stack:          times  , O, P, Q, out, retdest
     DUP1  ISZERO
     // stack:  break?, times  , O, P, Q, out, retdest
-    %jumpi(miller_end)
+    %jumpi(miller_final)
     // stack:          times  , O, P, Q, out, retdest
     %sub_const(1)
     // stack:          times-1, O, P, Q, out, retdest
@@ -61,6 +61,11 @@ miller_loop:
     %mload_kernel_code(miller_data)
     // stack:    0xnm, times-1, O, P, Q, out, retdest
     %jump(miller_one)
+miller_final:
+    // stack:     0, O, P, Q, out, retdest
+    PUSH 28
+    // stack: 28, 0, O, P, Q, out, retdest
+    %jump(miller_zero_final)
 miller_end:
     // stack: times, O, P, Q, out, retdest
     %pop3  %pop3  %pop3
@@ -92,6 +97,18 @@ miller_zero:
     // stack: miller_zero, m-1, times, O, P, Q, out, retdest
     %jump(mul_tangent)
 
+miller_zero_final:
+    // stack:              m  , times, O, P, Q, out, retdest
+    DUP1  ISZERO
+    // stack:       skip?, m  , times, O, P, Q, out, retdest
+    %jumpi(miller_end)
+    // stack:              m  , times, O, P, Q, out, retdest
+    %sub_const(1)
+    // stack:              m-1, times, O, P, Q, out, retdest
+    PUSH miller_zero
+    // stack: miller_zero, m-1, times, O, P, Q, out, retdest
+    %jump(mul_tangent)
+
 
 /// def mul_tangent()
 ///     out = square_fp12(out)

From bf9c32463f92ecce53e2419a5413663fbbac4c68 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 8 Dec 2022 15:18:43 -0800
Subject: [PATCH 037/201] macros

---
 .../bn254/curve_arithmetic/tate_pairing.asm   |  6 +-
 .../bn254/field_arithmetic/field_macros.asm   | 93 +++++++++++++++++++
 .../curve/bn254/field_arithmetic/inverse.asm  | 38 ++++----
 3 files changed, 115 insertions(+), 22 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 5b9dd170..971528af 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -27,10 +27,12 @@ global tate:
     %jump(miller_init)
 global post_mllr:
     // stack:                          out,            tate_mul1, tate_mul2, tate_mul3, retdest
+    DUP1 
+    // stack:                     out, out,            tate_mul1, tate_mul2, tate_mul3, retdest
     PUSH 100 
-    // stack:                     100, out,            tate_mul1, tate_mul2, tate_mul3, retdest
+    // stack:                100, out, out,            tate_mul1, tate_mul2, tate_mul3, retdest
     DUP2
-    // stack:                out, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest
+    // stack:           out, 100, out, out,            tate_mul1, tate_mul2, tate_mul3, retdest
     // %inverse_fp12
     // stack:                     100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
     DUP2
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
index a76ed2ae..f7525144 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
@@ -865,3 +865,96 @@
     SWAP1
     // stack:                   g0, g0_, g1, g1_, g2, g2_
 %endmacro
+
+%macro load_fp12
+    // stack:                                                          ptr
+    DUP1  %add_const(10)
+    // stack:                                                   ind10, ptr
+    %mload_kernel_general
+    // stack:                                                     x10, ptr
+    DUP2  %add_const(9)
+    // stack:                                              ind09, x10, ptr
+    %mload_kernel_general
+    // stack:                                                x09, x10, ptr
+    DUP3  %add_const(8)
+    // stack:                                         ind08, x09, x10, ptr
+    %mload_kernel_general
+    // stack:                                           x08, x09, x10, ptr
+    DUP4  %add_const(7)
+    // stack:                                    ind07, x08, x09, x10, ptr
+    %mload_kernel_general
+    // stack:                                      x07, x08, x09, x10, ptr
+    DUP5  %add_const(6)
+    // stack:                               ind06, x07, x08, x09, x10, ptr
+    %mload_kernel_general
+    // stack:                                 x06, x07, x08, x09, x10, ptr
+    DUP6  %add_const(5)
+    // stack:                          ind05, x06, x07, x08, x09, x10, ptr
+    %mload_kernel_general
+    // stack:                            x05, x06, x07, x08, x09, x10, ptr
+    DUP7  %add_const(4)
+    // stack:                     ind04, x05, x06, x07, x08, x09, x10, ptr
+    %mload_kernel_general
+    // stack:                       x04, x05, x06, x07, x08, x09, x10, ptr
+    DUP8  %add_const(3)
+    // stack:                ind03, x04, x05, x06, x07, x08, x09, x10, ptr
+    %mload_kernel_general
+    // stack:                  x03, x04, x05, x06, x07, x08, x09, x10, ptr
+    DUP9  %add_const(2)
+    // stack:           ind02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
+    %mload_kernel_general
+    // stack:             x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
+    DUP10  %add_const(1)
+    // stack:      ind01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
+    %mload_kernel_general
+    // stack:        x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
+    DUP11  %add_const(11)
+    // stack: ind11, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
+    %mload_kernel_general
+    // stack:   x11, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
+    SWAP11
+    // stack: ind00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11
+    %mload_kernel_general
+    // stack:   x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11
+%endmacro
+
+%macro assert_eq_unit_fp12
+    // stack:      ptr
+    DUP1                 %mload_kernel_code
+    // stack: x00, ptr
+    %assert_eq_const(1)
+    // stack:      ptr 
+    DUP1  %add_const(01)  %mload_kernel_code
+    // stack: x01, ptr
+    %assert_eq_const(0)
+    DUP1  %add_const(02)  %mload_kernel_code
+    // stack: x02, ptr
+    %assert_eq_const(0)
+    DUP1  %add_const(03)  %mload_kernel_code
+    // stack: x03, ptr
+    %assert_eq_const(0)
+    DUP1  %add_const(04)  %mload_kernel_code
+    // stack: x04, ptr
+    %assert_eq_const(0)
+    DUP1  %add_const(05)  %mload_kernel_code
+    // stack: x05, ptr
+    %assert_eq_const(0)
+    DUP1  %add_const(06)  %mload_kernel_code
+    // stack: x06, ptr
+    %assert_eq_const(0)
+    DUP1  %add_const(07)  %mload_kernel_code
+    // stack: x07, ptr
+    %assert_eq_const(0)
+    DUP1  %add_const(08)  %mload_kernel_code
+    // stack: x08, ptr
+    %assert_eq_const(0)
+    DUP1  %add_const(09)  %mload_kernel_code
+    // stack: x09, ptr
+    %assert_eq_const(0)
+    DUP1  %add_const(10)  %mload_kernel_code
+    // stack: x10, ptr
+    %assert_eq_const(0)
+    DUP1  %add_const(11)  %mload_kernel_code
+    // stack: x11, ptr
+    %assert_eq_const(0)
+%endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index d164aed1..4e72d782 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -22,11 +22,11 @@
 %endmacro
 
 global inverse_fp12:
-    // stack:                           ptr, inv, retdest
-    // DUP1  %load_fp12
-    // stack:                        f, ptr, inv, retdest
+    // stack:                                ptr, inv, retdest
+    DUP1  %load_fp12
+    // stack:                             f, ptr, inv, retdest
     DUP14
-    // stack:                   inv, f, ptr, inv, retdest 
+    // stack:                        inv, f, ptr, inv, retdest
     PROVER_INPUT(ff::bn254_base::inverse_fp12)
     PROVER_INPUT(ff::bn254_base::inverse_fp12)
     PROVER_INPUT(ff::bn254_base::inverse_fp12)
@@ -39,24 +39,22 @@ global inverse_fp12:
     PROVER_INPUT(ff::bn254_base::inverse_fp12)
     PROVER_INPUT(ff::bn254_base::inverse_fp12)
     PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    // stack:             f^-1, inv, f, ptr, inv, retdest
+    // stack:                  f^-1, inv, f, ptr, inv, retdest
     DUP13
-    // stack:        inv, f^-1, inv, f, ptr, inv, retdest
-    // %store_fp12  POP
-    // stack:                        f, ptr, inv, retdest
-    %pop4  %pop4  %pop4
-    // stack:                           ptr, inv, retdest 
-    PUSH check_inv  PUSH 200
-    // stack:           200, check_inv, ptr, inv, retdest 
-    DUP4  DUP4
-    // stack: ptr, inv, 200, check_inv, ptr, inv, retdest 
+    // stack:             inv, f^-1, inv, f, ptr, inv, retdest
+    %store_fp12
+    // stack:                        inv, f, ptr, inv, retdest
+    %stack (inv, f: 12) -> ()
+    // stack:                                ptr, inv, retdest 
+    PUSH 200  PUSH check_inv 
+    // stack:                check_inv, 200, ptr, inv, retdest 
+    DUP2  DUP5  DUP5
+    // stack: ptr, inv, 200, check_inv, 200, ptr, inv, retdest 
     %jump(mul_fp12)
 global check_inv:
-    // stack:                      200, ptr, inv, retdest
-    // %eq_unit_fp12
-    // stack:                  is_unit, ptr, inv, retdest
-    %assert_nonzero
-    // stack:                           ptr, inv, retdest
+    // stack:                           200, ptr, inv, retdest
+    %assert_eq_unit_fp12
+    // stack:                                ptr, inv, retdest
     POP  SWAP1  
-    // stack:                                retdest, inv
+    // stack:                                     retdest, inv
     JUMP

From dbeabb8075ff352b2d7841521de983b4af8ba41c Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 8 Dec 2022 17:38:27 -0800
Subject: [PATCH 038/201] storefp12 macro

---
 .../bn254/field_arithmetic/field_macros.asm   | 56 ++++++++++++++++++-
 .../curve/bn254/field_arithmetic/inverse.asm  |  4 +-
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
index f7525144..27711417 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
@@ -918,7 +918,61 @@
     // stack:   x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11
 %endmacro
 
-%macro assert_eq_unit_fp12
+%macro store_fp12
+    // stack:        ptr, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11
+    SWAP11
+    // stack:        x10, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
+    DUP12  %add_const(10)
+    // stack: ind10, x10, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:             x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
+    DUP11
+    // stack:      ind00, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:                  x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
+    DUP10  %add_const(01)
+    // stack:           ind01, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:                       x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
+    DUP10  %add_const(02)
+    // stack:                ind02, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:                            x03, x04, x05, x06, x07, x08, x09, ptr, x11
+    DUP10  %add_const(03)
+    // stack:                     ind03, x03, x04, x05, x06, x07, x08, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:                                 x04, x05, x06, x07, x08, x09, ptr, x11
+    DUP10  %add_const(04)
+    // stack:                          ind04, x04, x05, x06, x07, x08, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:                                      x05, x06, x07, x08, x09, ptr, x11
+    DUP10  %add_const(05)
+    // stack:                               ind05, x05, x06, x07, x08, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:                                           x06, x07, x08, x09, ptr, x11
+    DUP10  %add_const(06)
+    // stack:                                    ind06, x06, x07, x08, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:                                                x07, x08, x09, ptr, x11
+    DUP10  %add_const(07)
+    // stack:                                         ind07, x07, x08, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:                                                     x08, x09, ptr, x11
+    DUP10  %add_const(08)
+    // stack:                                              ind08, x08, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:                                                          x09, ptr, x11
+    DUP10  %add_const(09)
+    // stack:                                                   ind09, x09, ptr, x11
+    %mstore_kernel_general
+    // stack:                                                               ptr, x11
+           %add_const(11)
+    // stack:                                                             ind11, x11
+    %mstore_kernel_general
+    // stack:                                                            
+%endmacro
+
+%macro assert_eq_fp12_unit
     // stack:      ptr
     DUP1                 %mload_kernel_code
     // stack: x00, ptr
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 4e72d782..ad9aeff3 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -44,7 +44,7 @@ global inverse_fp12:
     // stack:             inv, f^-1, inv, f, ptr, inv, retdest
     %store_fp12
     // stack:                        inv, f, ptr, inv, retdest
-    %stack (inv, f: 12) -> ()
+    POP %pop4 %pop4 %pop4
     // stack:                                ptr, inv, retdest 
     PUSH 200  PUSH check_inv 
     // stack:                check_inv, 200, ptr, inv, retdest 
@@ -53,7 +53,7 @@ global inverse_fp12:
     %jump(mul_fp12)
 global check_inv:
     // stack:                           200, ptr, inv, retdest
-    %assert_eq_unit_fp12
+    %assert_eq_fp12_unit
     // stack:                                ptr, inv, retdest
     POP  SWAP1  
     // stack:                                     retdest, inv

From 83328f918c40c93bc9089f4ff842a9dd73c48fad Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 8 Dec 2022 17:46:13 -0800
Subject: [PATCH 039/201] fix dups

---
 .../bn254/field_arithmetic/field_macros.asm    | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
index 27711417..653ae2de 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
@@ -934,39 +934,39 @@
     // stack:           ind01, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                       x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    DUP10  %add_const(02)
+    DUP9   %add_const(02)
     // stack:                ind02, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                            x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    DUP10  %add_const(03)
+    DUP8   %add_const(03)
     // stack:                     ind03, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                 x04, x05, x06, x07, x08, x09, ptr, x11
-    DUP10  %add_const(04)
+    DUP7   %add_const(04)
     // stack:                          ind04, x04, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                      x05, x06, x07, x08, x09, ptr, x11
-    DUP10  %add_const(05)
+    DUP6   %add_const(05)
     // stack:                               ind05, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                           x06, x07, x08, x09, ptr, x11
-    DUP10  %add_const(06)
+    DUP5   %add_const(06)
     // stack:                                    ind06, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                                x07, x08, x09, ptr, x11
-    DUP10  %add_const(07)
+    DUP4   %add_const(07)
     // stack:                                         ind07, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                                     x08, x09, ptr, x11
-    DUP10  %add_const(08)
+    DUP3   %add_const(08)
     // stack:                                              ind08, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                                          x09, ptr, x11
-    DUP10  %add_const(09)
+    DUP2   %add_const(09)
     // stack:                                                   ind09, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                                               ptr, x11
-           %add_const(11)
+    %add_const(11)
     // stack:                                                             ind11, x11
     %mstore_kernel_general
     // stack:                                                            

From 8737ba9b938b74cd3a886505af07648d9d707b8d Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 8 Dec 2022 18:20:24 -0800
Subject: [PATCH 040/201] fix tate

---
 .../bn254/curve_arithmetic/miller_loop.asm    |  2 +-
 .../bn254/curve_arithmetic/tate_pairing.asm   | 74 ++++++++++---------
 .../curve/bn254/field_arithmetic/fp12_mul.asm |  6 +-
 3 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 675b04a1..5c71038e 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -70,7 +70,7 @@ miller_end:
     // stack: times, O, P, Q, out, retdest
     %pop3  %pop3  %pop3
     // stack:                 out, retdest
-    %jump(post_mllr)
+    SWAP1  %jump(post_mllr)
 
 
 miller_one:
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 971528af..47b1c313 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -15,58 +15,60 @@
 ///     return out
 
 global tate:
-    // stack:                     ptr, out,                                             retdest
-    PUSH tate_mul3   SWAP2 
-    // stack:                     out, ptr,                                  tate_mul3, retdest
-    PUSH tate_mul2   SWAP2 
-    // stack:                     ptr, out,                       tate_mul2, tate_mul3, retdest
-    PUSH tate_mul1   SWAP2
-    // stack:                     out, ptr,            tate_mul1, tate_mul2, tate_mul3, retdest
-    PUSH post_mllr   SWAP2 
-    // stack:                     ptr, out, post_mllr, tate_mul1, tate_mul2, tate_mul3, retdest
+    // stack:           ptr, out,            retdest
+    PUSH post_mllr   SWAP2   SWAP1
+    // stack:           ptr, out, post_mllr, retdest
     %jump(miller_init)
 global post_mllr:
-    // stack:                          out,            tate_mul1, tate_mul2, tate_mul3, retdest
-    DUP1 
-    // stack:                     out, out,            tate_mul1, tate_mul2, tate_mul3, retdest
+    // stack:                           out, retdest
+    PUSH tate_inv
+    // stack:                 tate_inv, out, retdest
     PUSH 100 
-    // stack:                100, out, out,            tate_mul1, tate_mul2, tate_mul3, retdest
+    // stack:            100, tate_inv, out, retdest
+    DUP3 
+    // stack:       out, 100, tate_inv, out, retdest
+    %jump(inverse_fp12)
+tate_inv:
+    // stack:                           out, retdest  {100: inv}
+    PUSH tate_mul1
+    // stack:                tate_mul1, out, retdest  {100: inv}
     DUP2
-    // stack:           out, 100, out, out,            tate_mul1, tate_mul2, tate_mul3, retdest
-    // %inverse_fp12
-    // stack:                     100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
+    // stack:           out, tate_mul1, out, retdest  {100: inv}
+    PUSH 100 
+    // stack:      100, out, tate_mul1, out, retdest  {100: inv}
     DUP2
-    // stack:                out, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
+    // stack: out, 100, out, tate_mul1, out, retdest  {100: inv}
     %frob_fp12_6
-    // stack:                out, 100, out,            tate_mul1, tate_mul2, tate_mul3, retdest  {100: inv}
+    // stack: out, 100, out, tate_mul1, out, retdest  {100: inv}
     %jump(mul_fp12)
 tate_mul1:
-    // stack:                          out,                       tate_mul2, tate_mul3, retdest  {100: inv}
-    DUP1
-    // stack:                     out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
-    PUSH 100
-    // stack:                100, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}       
+    // stack:                           out, retdest  {100: inv}
+    PUSH tate_mul2
+    // stack:                tate_mul2, out, retdest  {100: inv}
     DUP2
-    // stack:           out, 100, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv}
+    // stack:           out, tate_mul2, out, retdest  {100: inv}
+    PUSH 100
+    // stack:      100, out, tate_mul2, out, retdest  {100: inv}       
+    DUP2
+    // stack: out, 100, out, tate_mul2, out, retdest  {100: inv}
     %frob_fp12_2
-    // stack:                100, out, out,                       tate_mul2, tate_mul3, retdest  {100: inv} 
+    // stack: out, 100, out, tate_mul2, out, retdest  {100: inv} 
     %jump(mul_fp12)
 tate_mul2: 
-    // stack:                          out,                                  tate_mul3, retdest  {100: acc}
+    // stack:                           out, retdest  {100: acc}
     PUSH post_pow
-    // stack:                post_pow, out,                                  tate_mul3, retdest  {100: acc}
+    // stack:                 post_pow, out, retdest  {100: acc}
     PUSH 100
-    // stack:           100, post_pow, out,                                  tate_mul3, retdest  {100: acc}
+    // stack:            100, post_pow, out, retdest  {100: acc}
     DUP3
-    // stack:      out, 100, post_pow, out,                                  tate_mul3, retdest  {100: acc}
-    // %jump(power)
+    // stack:       out, 100, post_pow, out, retdest  {100: acc}
+    %jump(power)
 post_pow: 
-    // stack:                     100, out,                                  tate_mul3, retdest  {100: pow}
+    // stack:                           out, retdest  {100: pow}
+    PUSH 100
+    // stack:                      100, out, retdest  {100: pow}
     DUP2
-    // stack:                out, 100, out,                                  tate_mul3, retdest  {100: pow}
+    // stack:                 out, 100, out, retdest  {100: pow}
     %frob_fp12_3
-    // stack:                out, 100, out,                                  tate_mul3, retdest  {100: pow}
+    // stack:                 out, 100, out, retdest  {100: pow}
     %jump(mul_fp12)
-tate_mul3:
-    // stack:                          out,                                             retdest  {100: pow}
-    SWAP1  JUMP
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index 53e13153..1b1a4153 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -173,7 +173,7 @@ ret_3:
     // stack:                 out, sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %store_fp6
     // stack:                                     inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    POP  SWAP1  JUMP
+    %pop2  JUMP
 
 
 //////////////////////////////////////
@@ -303,7 +303,7 @@ global mul_fp12_sparse:
     // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
     %store_fp6
     // stack:                                                                    inA, inB, out
-    %pop2  SWAP1  JUMP
+    %pop3  JUMP
 
 /// global mul_fp12_sparse_fast:
 ///    // stack:                                                            inA, inB, out
@@ -447,4 +447,4 @@ post_sq2:
     // stack:                                out, ff + sh(f'f'), inp, out
     %store_fp6
     // stack:                                                    inp, out
-    POP  SWAP1  JUMP
+    %pop2  JUMP

From 17890dd58ddbb6351a4031c6be5cb7886a214f6a Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 8 Dec 2022 18:29:27 -0800
Subject: [PATCH 041/201] fix miller

---
 .../bn254/curve_arithmetic/miller_loop.asm    | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 5c71038e..f86f3f88 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -117,17 +117,17 @@ miller_zero_final:
 ///     O += O
 
 mul_tangent:
-    // stack:                                         retdest, 0xnm, times, O, P, Q, out
-    PUSH mul_tangent_2  PUSH mul_tangent_1
-    // stack:           mul_tangent_1, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP13  DUP1
-    // stack: out, out, mul_tangent_1, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    // stack:                                                   retdest, 0xnm, times, O, P, Q, out
+    PUSH mul_tangent_2  DUP12  PUSH mul_tangent_1
+    // stack:           mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    DUP2  DUP1
+    // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %jump(square_fp12)
 mul_tangent_1:
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP12  DUP12  DUP12  DUP12
+    DUP13  DUP13  DUP13  DUP13
     // stack:        Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP10  DUP10
+    DUP11  DUP11
     // stack:     O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %store_tangent
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
@@ -135,10 +135,10 @@ mul_tangent_1:
     // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
     %jump(mul_fp12_sparse)
 mul_tangent_2:
-    // stack:             out, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    POP  PUSH after_double
+    // stack:                  retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    PUSH after_double
     // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    DUP5  DUP5
+    DUP6  DUP6
     // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
     %jump(ec_double)
 after_double:

From a9f80d383d14ca162ebe2f9830199b12e2bbaf90 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 13 Dec 2022 16:31:16 -0800
Subject: [PATCH 042/201] spacing

---
 evm/src/cpu/kernel/asm/util/basic_macros.asm | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm
index 6ec13835..5b9358e3 100644
--- a/evm/src/cpu/kernel/asm/util/basic_macros.asm
+++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm
@@ -289,17 +289,17 @@
     DUP1
     PUSH 28
     BYTE
-    // stack:                a, abcd
+    // stack:           a, abcd
     DUP2
     PUSH 29
     BYTE
     %shl_const(8)
-    // stack:            b0, a, abcd 
+    // stack:       b0, a, abcd 
     DUP3
     PUSH 30
     BYTE
     %shl_const(16)
-    // stack:       c00, b0, a, abcd
+    // stack:  c00, b0, a, abcd
     SWAP3
     PUSH 31
     BYTE
@@ -310,3 +310,7 @@
     OR
     // stack: dcba
 %endmacro
+
+01 00 00 00 01 00 00 00 01 00
+ff 00 00 ff 00 ff 00 00 00 00 ff 00 00
+ff ff ff ff ff ff
\ No newline at end of file

From bd8988957edd53cdd35b53771d45c8d9b7dea4f7 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@dmitrys-mbp.lan>
Date: Wed, 14 Dec 2022 19:14:14 -0800
Subject: [PATCH 043/201] U256ify

---
 .../curve/bn254/field_arithmetic/fp12_mul.asm |  13 +-
 evm/src/cpu/kernel/interpreter.rs             |   3 +-
 evm/src/cpu/kernel/tests/fields.rs            | 150 ++++++++----------
 3 files changed, 67 insertions(+), 99 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index 1b1a4153..e879297e 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -23,17 +23,10 @@ global test_mul_fp12:
     // stack:                     ret_stack, inB, out, inA
     SWAP3
     // stack:                           inA, inB, out, ret_stack
-    %jump(square_fp12_test)
+    %jump(mul_fp12)
 ret_stack:
-    // stack:          out
-    DUP1  %offset_fp6
-    // stack:    out', out
-    %load_fp6
-    // stack:      h', out
-    DUP7
-    // stack: out, h', out
-    %load_fp6
-    // stack:   h, h', out
+    // stack: out
+    %load_fp12
     %jump(0xdeadbeef)
 
 square_fp12_test:
diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index c3edbb5b..70b65ee1 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -1,7 +1,6 @@
 //! An EVM interpreter for testing and debugging purposes.
 
 use std::collections::HashMap;
-use std::str::FromStr;
 
 use anyhow::{anyhow, bail, ensure};
 use ethereum_types::{U256, U512};
@@ -25,7 +24,7 @@ type F = GoldilocksField;
 const DEFAULT_HALT_OFFSET: usize = 0xdeadbeef;
 
 /// Order of the BN254 base field.
-const BN_BASE: U256 = U256([
+pub const BN_BASE: U256 = U256([
     4332616871279656263,
     10917124144477883021,
     13281191951274694749,
diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs
index 1fdd0d75..9342c96c 100644
--- a/evm/src/cpu/kernel/tests/fields.rs
+++ b/evm/src/cpu/kernel/tests/fields.rs
@@ -3,49 +3,44 @@ use ethereum_types::U256;
 use rand::{thread_rng, Rng};
 
 use crate::cpu::kernel::aggregator::KERNEL;
-use crate::cpu::kernel::interpreter::run_interpreter;
+use crate::cpu::kernel::interpreter::{run_interpreter, BN_BASE};
 
-// TODO: 107 is hardcoded as a dummy prime for testing
-// should be changed to the proper implementation prime
-// once the run_{add, mul, sub}fp254 fns are implemented
-const P254: u32 = 107;
-
-fn add_fp(x: u32, y: u32) -> u32 {
-    (x + y) % P254
+fn add_fp(x: U256, y: U256) -> U256 {
+    (x + y) % BN_BASE
 }
 
-fn add3_fp(x: u32, y: u32, z: u32) -> u32 {
-    (x + y + z) % P254
+fn add3_fp(x: U256, y: U256, z: U256) -> U256 {
+    (x + y + z) % BN_BASE
 }
 
-fn mul_fp(x: u32, y: u32) -> u32 {
-    (x * y) % P254
+fn mul_fp(x: U256, y: U256) -> U256 {
+    U256::try_from(x.full_mul(y) % BN_BASE).unwrap()
 }
 
-fn sub_fp(x: u32, y: u32) -> u32 {
-    (P254 + x - y) % P254
+fn sub_fp(x: U256, y: U256) -> U256 {
+    (BN_BASE + x - y) % BN_BASE
 }
 
-fn add_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
+fn add_fp2(a: [U256; 2], b: [U256; 2]) -> [U256; 2] {
     let [a, a_] = a;
     let [b, b_] = b;
     [add_fp(a, b), add_fp(a_, b_)]
 }
 
-fn add3_fp2(a: [u32; 2], b: [u32; 2], c: [u32; 2]) -> [u32; 2] {
+fn add3_fp2(a: [U256; 2], b: [U256; 2], c: [U256; 2]) -> [U256; 2] {
     let [a, a_] = a;
     let [b, b_] = b;
     let [c, c_] = c;
     [add3_fp(a, b, c), add3_fp(a_, b_, c_)]
 }
 
-fn sub_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
+fn sub_fp2(a: [U256; 2], b: [U256; 2]) -> [U256; 2] {
     let [a, a_] = a;
     let [b, b_] = b;
     [sub_fp(a, b), sub_fp(a_, b_)]
 }
 
-fn mul_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
+fn mul_fp2(a: [U256; 2], b: [U256; 2]) -> [U256; 2] {
     let [a, a_] = a;
     let [b, b_] = b;
     [
@@ -54,12 +49,15 @@ fn mul_fp2(a: [u32; 2], b: [u32; 2]) -> [u32; 2] {
     ]
 }
 
-fn i9(a: [u32; 2]) -> [u32; 2] {
+fn i9(a: [U256; 2]) -> [U256; 2] {
     let [a, a_] = a;
-    [sub_fp(mul_fp(9, a), a_), add_fp(a, mul_fp(9, a_))]
+    [
+        sub_fp(mul_fp(U256::from(9), a), a_),
+        add_fp(a, mul_fp(U256::from(9), a_)),
+    ]
 }
 
-fn add_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
+fn add_fp6(c: [[U256; 2]; 3], d: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
     let [c0, c1, c2] = c;
     let [d0, d1, d2] = d;
 
@@ -69,7 +67,7 @@ fn add_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
     [e0, e1, e2]
 }
 
-fn sub_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
+fn sub_fp6(c: [[U256; 2]; 3], d: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
     let [c0, c1, c2] = c;
     let [d0, d1, d2] = d;
 
@@ -79,7 +77,7 @@ fn sub_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
     [e0, e1, e2]
 }
 
-fn mul_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
+fn mul_fp6(c: [[U256; 2]; 3], d: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
     let [c0, c1, c2] = c;
     let [d0, d1, d2] = d;
 
@@ -101,17 +99,18 @@ fn mul_fp6(c: [[u32; 2]; 3], d: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
     ]
 }
 
-fn sh(c: [[u32; 2]; 3]) -> [[u32; 2]; 3] {
+fn sh(c: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
     let [c0, c1, c2] = c;
     [i9(c2), c0, c1]
 }
 
-fn sparse_embed(x: [u32; 5]) -> [[[u32; 2]; 3]; 2] {
+fn sparse_embed(x: [U256; 5]) -> [[[U256; 2]; 3]; 2] {
     let [g0, g1, g1_, g2, g2_] = x;
-    [[[g0, 0], [g1, g1_], [0, 0]], [[0, 0], [g2, g2_], [0, 0]]]
+    let z = U256::from(0);
+    [[[g0, z], [g1, g1_], [z, z]], [[z, z], [g2, g2_], [z, z]]]
 }
 
-fn mul_fp12(f: [[[u32; 2]; 3]; 2], g: [[[u32; 2]; 3]; 2]) -> [[[u32; 2]; 3]; 2] {
+fn mul_fp12(f: [[[U256; 2]; 3]; 2], g: [[[U256; 2]; 3]; 2]) -> [[[U256; 2]; 3]; 2] {
     let [f0, f1] = f;
     let [g0, g1] = g;
 
@@ -121,80 +120,60 @@ fn mul_fp12(f: [[[u32; 2]; 3]; 2], g: [[[u32; 2]; 3]; 2]) -> [[[u32; 2]; 3]; 2]
     [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
 }
 
-fn gen_fp6() -> [[u32; 2]; 3] {
+fn gen_fp() -> U256 {
     let mut rng = thread_rng();
+    let x64 = rng.gen::<u64>();
+    U256([x64, x64, x64, x64]) % BN_BASE
+}
+
+fn gen_fp6() -> [[U256; 2]; 3] {
     [
-        [rng.gen_range(0..P254), rng.gen_range(0..P254)],
-        [rng.gen_range(0..P254), rng.gen_range(0..P254)],
-        [rng.gen_range(0..P254), rng.gen_range(0..P254)],
+        [gen_fp(), gen_fp()],
+        [gen_fp(), gen_fp()],
+        [gen_fp(), gen_fp()],
     ]
 }
 
-fn gen_fp12_sparse() -> [[[u32; 2]; 3]; 2] {
-    let mut rng = thread_rng();
-    sparse_embed([
-        rng.gen_range(0..P254),
-        rng.gen_range(0..P254),
-        rng.gen_range(0..P254),
-        rng.gen_range(0..P254),
-        rng.gen_range(0..P254),
-    ])
+fn gen_fp12_sparse() -> [[[U256; 2]; 3]; 2] {
+    sparse_embed([gen_fp(), gen_fp(), gen_fp(), gen_fp(), gen_fp()])
 }
 
-fn as_stack(xs: Vec<u32>) -> Vec<U256> {
+fn as_stack(xs: Vec<U256>) -> Vec<U256> {
     xs.iter().map(|&x| U256::from(x)).rev().collect()
 }
 
-#[test]
-#[ignore]
-fn test_fp6() -> Result<()> {
-    let c = gen_fp6();
-    let d = gen_fp6();
-
-    let mut input: Vec<u32> = [c, d].into_iter().flatten().flatten().collect();
-    input.push(0xdeadbeef);
-
-    let initial_offset = KERNEL.global_labels["mul_fp6"];
-    let initial_stack: Vec<U256> = as_stack(input);
-    let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
-        .stack()
-        .to_vec();
-
-    let output: Vec<u32> = mul_fp6(c, d).into_iter().flatten().collect();
-    let expected = as_stack(output);
-
-    assert_eq!(final_stack, expected);
-
-    Ok(())
-}
-
 fn make_initial_stack(
-    in1: u32,
-    in2: u32,
-    out: u32,
-    f0: [[u32; 2]; 3],
-    f1: [[u32; 2]; 3],
-    g0: [[u32; 2]; 3],
-    g1: [[u32; 2]; 3],
+    in1: usize,
+    in2: usize,
+    out: usize,
+    f0: [[U256; 2]; 3],
+    f1: [[U256; 2]; 3],
+    g0: [[U256; 2]; 3],
+    g1: [[U256; 2]; 3],
 ) -> Vec<U256> {
     // stack: in0, f, in0', f', in1, g, in1', g', in1, out, in0, out
-    let f0: Vec<u32> = f0.into_iter().flatten().collect();
-    let f1: Vec<u32> = f1.into_iter().flatten().collect();
-    let g0: Vec<u32> = g0.into_iter().flatten().collect();
-    let g1: Vec<u32> = g1.into_iter().flatten().collect();
+
+    let in1 = U256::from(in1);
+    let in2 = U256::from(in2);
+    let out = U256::from(out);
+
+    let f0: Vec<U256> = f0.into_iter().flatten().collect();
+    let f1: Vec<U256> = f1.into_iter().flatten().collect();
+    let g0: Vec<U256> = g0.into_iter().flatten().collect();
+    let g1: Vec<U256> = g1.into_iter().flatten().collect();
 
     let mut input = f0;
     input.extend(vec![in1]);
     input.extend(f1);
     input.extend(g0);
-    input.extend(vec![in2]);
+    input.extend(vec![U256::from(in2)]);
     input.extend(g1);
     input.extend(vec![in2, out, in1]);
 
     as_stack(input)
 }
 
-// #[test]
+#[test]
 fn test_fp12() -> Result<()> {
     let in1 = 64;
     let in2 = 76;
@@ -211,13 +190,12 @@ fn test_fp12() -> Result<()> {
         .stack()
         .to_vec();
 
-    let mut output: Vec<u32> = mul_fp12([f0, f1], [g0, g1])
+    let expected: Vec<U256> = mul_fp12([f0, f1], [g0, g1])
         .into_iter()
         .flatten()
         .flatten()
+        .rev()
         .collect();
-    output.extend(vec![out]);
-    let expected = as_stack(output);
 
     assert_eq!(final_stack, expected);
 
@@ -240,20 +218,19 @@ fn test_fp12_sparse() -> Result<()> {
         .stack()
         .to_vec();
 
-    let mut output: Vec<u32> = mul_fp12([f0, f1], [g0, g1])
+    let expected: Vec<U256> = mul_fp12([f0, f1], [g0, g1])
         .into_iter()
         .flatten()
         .flatten()
+        .rev()
         .collect();
-    output.extend(vec![out]);
-    let expected = as_stack(output);
 
     assert_eq!(final_stack, expected);
 
     Ok(())
 }
 
-#[test]
+// #[test]
 fn test_fp12_square() -> Result<()> {
     let in1 = 64;
     let in2 = 76;
@@ -268,13 +245,12 @@ fn test_fp12_square() -> Result<()> {
         .stack()
         .to_vec();
 
-    let mut output: Vec<u32> = mul_fp12([f0, f1], [f0, f1])
+    let expected: Vec<U256> = mul_fp12([f0, f1], [f0, f1])
         .into_iter()
         .flatten()
         .flatten()
+        .rev()
         .collect();
-    output.extend(vec![out]);
-    let expected = as_stack(output);
 
     assert_eq!(final_stack, expected);
 

From 7378f3808a5b2d3328aabdaac4ad8e9220696fef Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@dmitrys-mbp.lan>
Date: Wed, 14 Dec 2022 19:31:21 -0800
Subject: [PATCH 044/201] fix

---
 evm/src/cpu/kernel/tests/fields.rs | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs
index 9342c96c..6afe3064 100644
--- a/evm/src/cpu/kernel/tests/fields.rs
+++ b/evm/src/cpu/kernel/tests/fields.rs
@@ -138,10 +138,6 @@ fn gen_fp12_sparse() -> [[[U256; 2]; 3]; 2] {
     sparse_embed([gen_fp(), gen_fp(), gen_fp(), gen_fp(), gen_fp()])
 }
 
-fn as_stack(xs: Vec<U256>) -> Vec<U256> {
-    xs.iter().map(|&x| U256::from(x)).rev().collect()
-}
-
 fn make_initial_stack(
     in1: usize,
     in2: usize,
@@ -169,8 +165,9 @@ fn make_initial_stack(
     input.extend(vec![U256::from(in2)]);
     input.extend(g1);
     input.extend(vec![in2, out, in1]);
+    input.reverse();
 
-    as_stack(input)
+    input
 }
 
 #[test]

From 5ca2d88bfb3eec97285da53fbb5c5688753b8f06 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@dmitrys-mbp.lan>
Date: Wed, 14 Dec 2022 19:51:07 -0800
Subject: [PATCH 045/201] aggregator

---
 evm/src/cpu/kernel/aggregator.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 7504a26c..ce2ce0cc 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -28,6 +28,7 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/power.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/field_macros.asm"),

From a72d4faaec13c7b5b076042ccb26905e48ced7e9 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@dmitrys-mbp.lan>
Date: Wed, 14 Dec 2022 19:58:16 -0800
Subject: [PATCH 046/201] minor

---
 evm/src/cpu/kernel/interpreter.rs  | 2 +-
 evm/src/cpu/kernel/tests/fields.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 70b65ee1..e53b4cae 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -118,7 +118,7 @@ impl<'a> Interpreter<'a> {
         let mut tot = 0;
         for i in 0..0x100 {
             if self.opcode_count[i] > 0 {
-                tot = tot + self.opcode_count[i];
+                tot += self.opcode_count[i];
                 println!("{}: {}", get_mnemonic(i as u8), self.opcode_count[i])
             }
         }
diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs
index 6afe3064..70d339e9 100644
--- a/evm/src/cpu/kernel/tests/fields.rs
+++ b/evm/src/cpu/kernel/tests/fields.rs
@@ -162,7 +162,7 @@ fn make_initial_stack(
     input.extend(vec![in1]);
     input.extend(f1);
     input.extend(g0);
-    input.extend(vec![U256::from(in2)]);
+    input.extend(vec![in2]);
     input.extend(g1);
     input.extend(vec![in2, out, in1]);
     input.reverse();

From d888ce8c94a02f02f248cfb57a07725357112391 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@dmitrys-mbp.lan>
Date: Wed, 14 Dec 2022 20:16:50 -0800
Subject: [PATCH 047/201] minor

---
 evm/src/cpu/kernel/tests/fields.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/fields.rs
index 70d339e9..97276358 100644
--- a/evm/src/cpu/kernel/tests/fields.rs
+++ b/evm/src/cpu/kernel/tests/fields.rs
@@ -199,7 +199,8 @@ fn test_fp12() -> Result<()> {
     Ok(())
 }
 
-// #[test]
+#[test]
+#[ignore]
 fn test_fp12_sparse() -> Result<()> {
     let in1 = 64;
     let in2 = 76;
@@ -227,7 +228,8 @@ fn test_fp12_sparse() -> Result<()> {
     Ok(())
 }
 
-// #[test]
+#[test]
+#[ignore]
 fn test_fp12_square() -> Result<()> {
     let in1 = 64;
     let in2 = 76;

From e8fe799ecd550ab43aa0558313e2b2cf0c3389fd Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@dmitrys-mbp.lan>
Date: Wed, 14 Dec 2022 21:14:22 -0800
Subject: [PATCH 048/201] name

---
 evm/src/cpu/kernel/tests/{fields.rs => bn254_field.rs} | 0
 evm/src/cpu/kernel/tests/mod.rs                        | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename evm/src/cpu/kernel/tests/{fields.rs => bn254_field.rs} (100%)

diff --git a/evm/src/cpu/kernel/tests/fields.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
similarity index 100%
rename from evm/src/cpu/kernel/tests/fields.rs
rename to evm/src/cpu/kernel/tests/bn254_field.rs
diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs
index 2ae9d2b0..246b33d2 100644
--- a/evm/src/cpu/kernel/tests/mod.rs
+++ b/evm/src/cpu/kernel/tests/mod.rs
@@ -4,7 +4,7 @@ mod core;
 mod curve_ops;
 mod ecrecover;
 mod exp;
-mod fields;
+mod bn254_field;
 mod hash;
 mod mpt;
 mod packing;

From 94b999c2c1c7d6ccacddb5aec5719dac3ba023b0 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@dmitrys-mbp.lan>
Date: Wed, 14 Dec 2022 21:28:01 -0800
Subject: [PATCH 049/201] fix

---
 evm/src/cpu/kernel/tests/bn254_field.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index 97276358..21becba9 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -164,7 +164,7 @@ fn make_initial_stack(
     input.extend(g0);
     input.extend(vec![in2]);
     input.extend(g1);
-    input.extend(vec![in2, out, in1]);
+    input.extend(vec![in2, out, in1, out]);
     input.reverse();
 
     input

From 7328b653414e1d08f9ac68c1b7a69f66ef7cd9bd Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Thu, 15 Dec 2022 11:53:17 -0800
Subject: [PATCH 050/201] name

---
 .../cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index 27276989..1b5fec79 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -66,7 +66,7 @@
 %endmacro
 
 
-/// def Fp12_frob_n(f, f'):
+/// def frob_fp12_n(f, f'):
 ///     g  =             frob_fp6(n, f )
 ///     g' = FROB_z[n] * frob_fp6(n, f')
 ///     return g, g'

From 9063e900fbd854a1b6adfb768345fe70829d9ce9 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Thu, 15 Dec 2022 13:18:00 -0800
Subject: [PATCH 051/201] streamline tests

---
 .../curve/bn254/field_arithmetic/fp12_mul.asm |  34 ++--
 .../bn254/field_arithmetic/frobenius.asm      |  26 +--
 evm/src/cpu/kernel/tests/bn254_field.rs       | 169 ++++++++++--------
 evm/src/cpu/kernel/tests/mod.rs               |   2 +-
 4 files changed, 128 insertions(+), 103 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index e879297e..0ba5de5a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -2,37 +2,29 @@
 
 /// cost: 220
 global test_mul_fp12:
-    // stack:      f, inA , f', g, inB , g', inB, out, inA
+    // stack:      f, inA , f', g, inB , g', mul_dest, inA, inB, out, ret_stack, out
     DUP7
-    // stack: inA, f, inA , f', g, inB , g', inB, out, inA
+    // stack: inA, f, inA , f', g, inB , g', mul_dest, inA, inB, out, ret_stack, out
     %store_fp6
-    // stack:         inA , f', g, inB , g', inB, out, inA
+    // stack:         inA , f', g, inB , g', mul_dest, inA, inB, out, ret_stack, out
     %offset_fp6
-    // stack:         inA', f', g, inB , g', inB, out, inA
+    // stack:         inA', f', g, inB , g', mul_dest, inA, inB, out, ret_stack, out
     %store_fp6
-    // stack:                   g, inB , g', inB, out, inA
+    // stack:                   g, inB , g', mul_dest, inA, inB, out, ret_stack, out
     DUP7
-    // stack:              inB, g, inB , g', inB, out, inA
+    // stack:              inB, g, inB , g', mul_dest, inA, inB, out, ret_stack, out
     %store_fp6
-    // stack:                      inB , g', inB, out, inA
+    // stack:                      inB , g', mul_dest, inA, inB, out, ret_stack, out
     %offset_fp6
-    // stack:                      inB', g', inB, out, inA
+    // stack:                      inB', g', mul_dest, inA, inB, out, ret_stack, out
     %store_fp6
-    // stack:                                inB, out, inA
-    PUSH ret_stack
-    // stack:                     ret_stack, inB, out, inA
-    SWAP3
-    // stack:                           inA, inB, out, ret_stack
-    %jump(mul_fp12)
-ret_stack:
+    // stack:                                mul_dest, inA, inB, out, ret_stack, out
+    JUMP
+global ret_stack:
     // stack: out
     %load_fp12
     %jump(0xdeadbeef)
 
-square_fp12_test:
-    POP
-    %jump(square_fp12)
-
 
 ///////////////////////////////////////
 ///// GENERAL FP12 MULTIPLICATION /////
@@ -387,6 +379,10 @@ global mul_fp12_sparse:
 ///
 /// f, f' consist of six elements on the stack
 
+global square_fp12_test:
+    POP
+    %jump(square_fp12)
+
 global square_fp12:
     // stack:                                                    inp, out
     DUP1
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index 1b5fec79..7e828784 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -1,3 +1,5 @@
+/// let Z` denote the complex conjugate of Z
+
 /// def frob_fp6_n(C0, C1, C2):
 ///     if n%2:
 ///         D0, D1, D2 = C0`, FROB_t1[n] * C1`, FROB_t2[n] * C2`
@@ -13,7 +15,7 @@
     // stack: C2 , C1 , D0
     %conj
     // stack: C2`, C1 , D0
-    %froby2_1
+    %frobt2_1
     // stack: D2 , C1 , D0
     %swap_fp2_hole_2
     // stack: D0 , C1 , D2
@@ -21,7 +23,7 @@
     // stack: C1 , D0 , D2
     %conj
     // stack: C1`, D0 , D2
-    %froby1_1
+    %frobt1_1
     // stack: D1 , D0 , D2
     %swap_fp2
     // stack: D0 , D1 , D2
@@ -31,13 +33,13 @@
     // stack: C0, C1, C2
     %swap_fp2_hole_2
     // stack: C2, C1, C0
-    %froby2_2
+    %frobt2_2
     // stack: D2, C1, C0
     %swap_fp2_hole_2
     // stack: C0, C1, D2
     %swap_fp2
     // stack: C1, C0, D2
-    %froby1_2
+    %frobt1_2
     // stack: D1, C0, D2
     %swap_fp2
     // stack: D0, D1, D2
@@ -51,7 +53,7 @@
     // stack: C2 , C1 , D0
     %conj
     // stack: C2`, C1 , D0
-    %froby2_3
+    %frobt2_3
     // stack: D2 , C1 , D0
     %swap_fp2_hole_2
     // stack: D0 , C1 , D2
@@ -59,7 +61,7 @@
     // stack: C1 , D0 , D2
     %conj
     // stack: C1`, D0 , D2
-    %froby1_3
+    %frobt1_3
     // stack: D1 , D0 , D2
     %swap_fp2
     // stack: D0 , D1 , D2
@@ -183,36 +185,36 @@
 %endmacro
 
 
-%macro froby1_1
+%macro frobt1_1
     PUSH 0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2
     PUSH 0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d
     %mul_fp2
 %endmacro
 
-%macro froby2_1
+%macro frobt2_1
     PUSH 0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126
     PUSH 0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762
     %mul_fp2
 %endmacro
 
-%macro froby1_2
+%macro frobt1_2
     PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48
     %mul_fp_fp2
 %endmacro
 
-%macro froby2_2
+%macro frobt2_2
     PUSH 0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe
     %mul_fp_fp2
 %endmacro
 
 
-%macro froby1_3
+%macro frobt1_3
     PUSH 0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de
     PUSH 0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d
     %mul_fp2
 %endmacro
 
-%macro froby2_3
+%macro frobt2_3
     PUSH 0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f
     PUSH 0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066
     %mul_fp2
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index 21becba9..b6f2ed65 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -21,6 +21,15 @@ fn sub_fp(x: U256, y: U256) -> U256 {
     (BN_BASE + x - y) % BN_BASE
 }
 
+fn neg_fp(x: U256) -> U256 {
+    (BN_BASE - x) % BN_BASE
+}
+
+fn conj_fp2(a: [U256; 2]) -> [U256; 2] {
+    let [a, a_] = a;
+    [a, neg_fp(a_)]
+}
+
 fn add_fp2(a: [U256; 2], b: [U256; 2]) -> [U256; 2] {
     let [a, a_] = a;
     let [b, b_] = b;
@@ -51,10 +60,8 @@ fn mul_fp2(a: [U256; 2], b: [U256; 2]) -> [U256; 2] {
 
 fn i9(a: [U256; 2]) -> [U256; 2] {
     let [a, a_] = a;
-    [
-        sub_fp(mul_fp(U256::from(9), a), a_),
-        add_fp(a, mul_fp(U256::from(9), a_)),
-    ]
+    let nine = U256::from(9);
+    [sub_fp(mul_fp(nine, a), a_), add_fp(a, mul_fp(nine, a_))]
 }
 
 fn add_fp6(c: [[U256; 2]; 3], d: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
@@ -106,8 +113,11 @@ fn sh(c: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
 
 fn sparse_embed(x: [U256; 5]) -> [[[U256; 2]; 3]; 2] {
     let [g0, g1, g1_, g2, g2_] = x;
-    let z = U256::from(0);
-    [[[g0, z], [g1, g1_], [z, z]], [[z, z], [g2, g2_], [z, z]]]
+    let zero = U256::from(0);
+    [
+        [[g0, zero], [g1, g1_], [zero, zero]],
+        [[zero, zero], [g2, g2_], [zero, zero]],
+    ]
 }
 
 fn mul_fp12(f: [[[U256; 2]; 3]; 2], g: [[[U256; 2]; 3]; 2]) -> [[[U256; 2]; 3]; 2] {
@@ -139,18 +149,19 @@ fn gen_fp12_sparse() -> [[[U256; 2]; 3]; 2] {
 }
 
 fn make_initial_stack(
+    in0: usize,
     in1: usize,
-    in2: usize,
     out: usize,
     f0: [[U256; 2]; 3],
     f1: [[U256; 2]; 3],
     g0: [[U256; 2]; 3],
     g1: [[U256; 2]; 3],
+    mul_label: &str,
 ) -> Vec<U256> {
     // stack: in0, f, in0', f', in1, g, in1', g', in1, out, in0, out
 
+    let in0 = U256::from(in0);
     let in1 = U256::from(in1);
-    let in2 = U256::from(in2);
     let out = U256::from(out);
 
     let f0: Vec<U256> = f0.into_iter().flatten().collect();
@@ -158,100 +169,116 @@ fn make_initial_stack(
     let g0: Vec<U256> = g0.into_iter().flatten().collect();
     let g1: Vec<U256> = g1.into_iter().flatten().collect();
 
+    let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
+    let mul_dest = U256::from(KERNEL.global_labels[mul_label]);
+
     let mut input = f0;
-    input.extend(vec![in1]);
+    input.extend(vec![in0]);
     input.extend(f1);
     input.extend(g0);
-    input.extend(vec![in2]);
+    input.extend(vec![in1]);
     input.extend(g1);
-    input.extend(vec![in2, out, in1, out]);
+    input.extend(vec![mul_dest, in0, in1, out, ret_stack, out]);
     input.reverse();
 
     input
 }
 
+fn make_expected_output(f: [[[U256; 2]; 3]; 2], g: [[[U256; 2]; 3]; 2]) -> Vec<U256> {
+    mul_fp12(f, g)
+        .into_iter()
+        .flatten()
+        .flatten()
+        .rev()
+        .collect()
+}
+
 #[test]
-fn test_fp12() -> Result<()> {
-    let in1 = 64;
-    let in2 = 76;
+fn test_mul_fp12() -> Result<()> {
+    let in0 = 64;
+    let in1 = 76;
     let out = 88;
 
     let f0 = gen_fp6();
     let f1 = gen_fp6();
     let g0 = gen_fp6();
     let g1 = gen_fp6();
+    let [h0, h1] = gen_fp12_sparse();
 
-    let initial_offset = KERNEL.global_labels["test_mul_fp12"];
-    let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, g0, g1);
-    let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
-        .stack()
-        .to_vec();
+    let test_mul = KERNEL.global_labels["test_mul_fp12"];
 
-    let expected: Vec<U256> = mul_fp12([f0, f1], [g0, g1])
-        .into_iter()
-        .flatten()
-        .flatten()
-        .rev()
-        .collect();
+    let normal: Vec<U256> = make_initial_stack(in0, in1, out, f0, f1, g0, g1, "mul_fp12");
+    let sparse: Vec<U256> = make_initial_stack(in0, in1, out, f0, f1, h0, h1, "mul_fp12_sparse");
+    let square: Vec<U256> = make_initial_stack(in0, in1, out, f0, f1, f0, f1, "square_fp12_test");
 
-    assert_eq!(final_stack, expected);
+    let out_normal: Vec<U256> = run_interpreter(test_mul, normal)?.stack().to_vec();
+    let out_sparse: Vec<U256> = run_interpreter(test_mul, sparse)?.stack().to_vec();
+    let out_square: Vec<U256> = run_interpreter(test_mul, square)?.stack().to_vec();
+
+    let exp_normal: Vec<U256> = make_expected_output([f0, f1], [g0, g1]);
+    let exp_sparse: Vec<U256> = make_expected_output([f0, f1], [h0, h1]);
+    let exp_square: Vec<U256> = make_expected_output([f0, f1], [f0, f1]);
+
+    assert_eq!(out_normal, exp_normal);
+    assert_eq!(out_sparse, exp_sparse);
+    assert_eq!(out_square, exp_square);
 
     Ok(())
 }
 
-#[test]
-#[ignore]
-fn test_fp12_sparse() -> Result<()> {
-    let in1 = 64;
-    let in2 = 76;
-    let out = 88;
+// #[test]
+// #[ignore]
+// fn test_fp12_sparse() -> Result<()> {
+//     let in1 = 64;
+//     let in2 = 76;
+//     let out = 88;
 
-    let f0 = gen_fp6();
-    let f1 = gen_fp6();
-    let [g0, g1] = gen_fp12_sparse();
+//     let f0 = gen_fp6();
+//     let f1 = gen_fp6();
+//     let [g0, g1] = gen_fp12_sparse();
 
-    let initial_offset = KERNEL.global_labels["test_mul_fp12"];
-    let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, g0, g1);
-    let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
-        .stack()
-        .to_vec();
+//     let initial_offset = KERNEL.global_labels["test_mul_fp12"];
+//     let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, g0, g1);
+//     let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
+//         .stack()
+//         .to_vec();
 
-    let expected: Vec<U256> = mul_fp12([f0, f1], [g0, g1])
-        .into_iter()
-        .flatten()
-        .flatten()
-        .rev()
-        .collect();
+//     let expected: Vec<U256> = mul_fp12([f0, f1], [g0, g1])
+//         .into_iter()
+//         .flatten()
+//         .flatten()
+//         .rev()
+//         .collect();
 
-    assert_eq!(final_stack, expected);
+//     assert_eq!(final_stack, expected);
 
-    Ok(())
-}
+//     Ok(())
+// }
 
-#[test]
-#[ignore]
-fn test_fp12_square() -> Result<()> {
-    let in1 = 64;
-    let in2 = 76;
-    let out = 88;
+// #[test]
+// #[ignore]
+// fn test_fp12_square() -> Result<()> {
+//     let in1 = 64;
+//     let in2 = 76;
+//     let out = 88;
 
-    let f0 = gen_fp6();
-    let f1 = gen_fp6();
+//     let f0 = gen_fp6();
+//     let f1 = gen_fp6();
 
-    let initial_offset = KERNEL.global_labels["test_mul_fp12"];
-    let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, f0, f1);
-    let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
-        .stack()
-        .to_vec();
+//     let initial_offset = KERNEL.global_labels["test_mul_fp12"];
+//     let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, f0, f1);
+//     let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
+//         .stack()
+//         .to_vec();
 
-    let expected: Vec<U256> = mul_fp12([f0, f1], [f0, f1])
-        .into_iter()
-        .flatten()
-        .flatten()
-        .rev()
-        .collect();
+//     let expected: Vec<U256> = mul_fp12([f0, f1], [f0, f1])
+//         .into_iter()
+//         .flatten()
+//         .flatten()
+//         .rev()
+//         .collect();
 
-    assert_eq!(final_stack, expected);
+//     assert_eq!(final_stack, expected);
 
-    Ok(())
-}
+//     Ok(())
+// }
diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs
index 246b33d2..0f799df1 100644
--- a/evm/src/cpu/kernel/tests/mod.rs
+++ b/evm/src/cpu/kernel/tests/mod.rs
@@ -1,10 +1,10 @@
 mod account_code;
 mod balance;
+mod bn254_field;
 mod core;
 mod curve_ops;
 mod ecrecover;
 mod exp;
-mod bn254_field;
 mod hash;
 mod mpt;
 mod packing;

From d0247017e2b9f040dea364599c15826a84016c69 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Thu, 15 Dec 2022 13:18:20 -0800
Subject: [PATCH 052/201] delete dead code

---
 evm/src/cpu/kernel/tests/bn254_field.rs | 57 -------------------------
 1 file changed, 57 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index b6f2ed65..c847e8f1 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -225,60 +225,3 @@ fn test_mul_fp12() -> Result<()> {
 
     Ok(())
 }
-
-// #[test]
-// #[ignore]
-// fn test_fp12_sparse() -> Result<()> {
-//     let in1 = 64;
-//     let in2 = 76;
-//     let out = 88;
-
-//     let f0 = gen_fp6();
-//     let f1 = gen_fp6();
-//     let [g0, g1] = gen_fp12_sparse();
-
-//     let initial_offset = KERNEL.global_labels["test_mul_fp12"];
-//     let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, g0, g1);
-//     let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
-//         .stack()
-//         .to_vec();
-
-//     let expected: Vec<U256> = mul_fp12([f0, f1], [g0, g1])
-//         .into_iter()
-//         .flatten()
-//         .flatten()
-//         .rev()
-//         .collect();
-
-//     assert_eq!(final_stack, expected);
-
-//     Ok(())
-// }
-
-// #[test]
-// #[ignore]
-// fn test_fp12_square() -> Result<()> {
-//     let in1 = 64;
-//     let in2 = 76;
-//     let out = 88;
-
-//     let f0 = gen_fp6();
-//     let f1 = gen_fp6();
-
-//     let initial_offset = KERNEL.global_labels["test_mul_fp12"];
-//     let initial_stack: Vec<U256> = make_initial_stack(in1, in2, out, f0, f1, f0, f1);
-//     let final_stack: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
-//         .stack()
-//         .to_vec();
-
-//     let expected: Vec<U256> = mul_fp12([f0, f1], [f0, f1])
-//         .into_iter()
-//         .flatten()
-//         .flatten()
-//         .rev()
-//         .collect();
-
-//     assert_eq!(final_stack, expected);
-
-//     Ok(())
-// }

From 14982d480f393b2e6fd02996e69227add8a07975 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Thu, 15 Dec 2022 13:20:16 -0800
Subject: [PATCH 053/201] naming

---
 evm/src/cpu/kernel/tests/bn254_field.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index c847e8f1..af4b773f 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -148,7 +148,7 @@ fn gen_fp12_sparse() -> [[[U256; 2]; 3]; 2] {
     sparse_embed([gen_fp(), gen_fp(), gen_fp(), gen_fp(), gen_fp()])
 }
 
-fn make_initial_stack(
+fn make_mul_stack(
     in0: usize,
     in1: usize,
     out: usize,
@@ -184,7 +184,7 @@ fn make_initial_stack(
     input
 }
 
-fn make_expected_output(f: [[[U256; 2]; 3]; 2], g: [[[U256; 2]; 3]; 2]) -> Vec<U256> {
+fn make_mul_expected(f: [[[U256; 2]; 3]; 2], g: [[[U256; 2]; 3]; 2]) -> Vec<U256> {
     mul_fp12(f, g)
         .into_iter()
         .flatten()
@@ -207,17 +207,17 @@ fn test_mul_fp12() -> Result<()> {
 
     let test_mul = KERNEL.global_labels["test_mul_fp12"];
 
-    let normal: Vec<U256> = make_initial_stack(in0, in1, out, f0, f1, g0, g1, "mul_fp12");
-    let sparse: Vec<U256> = make_initial_stack(in0, in1, out, f0, f1, h0, h1, "mul_fp12_sparse");
-    let square: Vec<U256> = make_initial_stack(in0, in1, out, f0, f1, f0, f1, "square_fp12_test");
+    let normal: Vec<U256> = make_mul_stack(in0, in1, out, f0, f1, g0, g1, "mul_fp12");
+    let sparse: Vec<U256> = make_mul_stack(in0, in1, out, f0, f1, h0, h1, "mul_fp12_sparse");
+    let square: Vec<U256> = make_mul_stack(in0, in1, out, f0, f1, f0, f1, "square_fp12_test");
 
     let out_normal: Vec<U256> = run_interpreter(test_mul, normal)?.stack().to_vec();
     let out_sparse: Vec<U256> = run_interpreter(test_mul, sparse)?.stack().to_vec();
     let out_square: Vec<U256> = run_interpreter(test_mul, square)?.stack().to_vec();
 
-    let exp_normal: Vec<U256> = make_expected_output([f0, f1], [g0, g1]);
-    let exp_sparse: Vec<U256> = make_expected_output([f0, f1], [h0, h1]);
-    let exp_square: Vec<U256> = make_expected_output([f0, f1], [f0, f1]);
+    let exp_normal: Vec<U256> = make_mul_expected([f0, f1], [g0, g1]);
+    let exp_sparse: Vec<U256> = make_mul_expected([f0, f1], [h0, h1]);
+    let exp_square: Vec<U256> = make_mul_expected([f0, f1], [f0, f1]);
 
     assert_eq!(out_normal, exp_normal);
     assert_eq!(out_sparse, exp_sparse);

From c598b94204c0288b50cd1431000274136351c838 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Thu, 15 Dec 2022 14:08:23 -0800
Subject: [PATCH 054/201] refactor

---
 .../curve/bn254/field_arithmetic/fp12_mul.asm |  23 +-
 evm/src/cpu/kernel/tests/bn254_field.rs       | 214 ++++++++++++++++--
 2 files changed, 196 insertions(+), 41 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index 0ba5de5a..e93336ee 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -1,24 +1,11 @@
 /// Note: uncomment this to test
 
-/// cost: 220
 global test_mul_fp12:
-    // stack:      f, inA , f', g, inB , g', mul_dest, inA, inB, out, ret_stack, out
-    DUP7
-    // stack: inA, f, inA , f', g, inB , g', mul_dest, inA, inB, out, ret_stack, out
-    %store_fp6
-    // stack:         inA , f', g, inB , g', mul_dest, inA, inB, out, ret_stack, out
-    %offset_fp6
-    // stack:         inA', f', g, inB , g', mul_dest, inA, inB, out, ret_stack, out
-    %store_fp6
-    // stack:                   g, inB , g', mul_dest, inA, inB, out, ret_stack, out
-    DUP7
-    // stack:              inB, g, inB , g', mul_dest, inA, inB, out, ret_stack, out
-    %store_fp6
-    // stack:                      inB , g', mul_dest, inA, inB, out, ret_stack, out
-    %offset_fp6
-    // stack:                      inB', g', mul_dest, inA, inB, out, ret_stack, out
-    %store_fp6
-    // stack:                                mul_dest, inA, inB, out, ret_stack, out
+    // stack: inA, f, f', inB, g, g', mul_dest, inA, inB, out, ret_stack, out
+    %store_fp12
+    // stack:             inB, g, g', mul_dest, inA, inB, out, ret_stack, out
+    %store_fp12
+    // stack:                         mul_dest, inA, inB, out, ret_stack, out
     JUMP
 global ret_stack:
     // stack: out
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index af4b773f..5699b1f4 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -1,3 +1,5 @@
+use std::str::FromStr;
+
 use anyhow::Result;
 use ethereum_types::U256;
 use rand::{thread_rng, Rng};
@@ -5,6 +7,10 @@ use rand::{thread_rng, Rng};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::{run_interpreter, BN_BASE};
 
+type Fp2 = [U256; 2];
+type Fp6 = [Fp2; 3];
+type Fp12 = [Fp6; 2];
+
 fn add_fp(x: U256, y: U256) -> U256 {
     (x + y) % BN_BASE
 }
@@ -25,31 +31,31 @@ fn neg_fp(x: U256) -> U256 {
     (BN_BASE - x) % BN_BASE
 }
 
-fn conj_fp2(a: [U256; 2]) -> [U256; 2] {
+fn conj_fp2(a: Fp2) -> Fp2 {
     let [a, a_] = a;
     [a, neg_fp(a_)]
 }
 
-fn add_fp2(a: [U256; 2], b: [U256; 2]) -> [U256; 2] {
+fn add_fp2(a: Fp2, b: Fp2) -> Fp2 {
     let [a, a_] = a;
     let [b, b_] = b;
     [add_fp(a, b), add_fp(a_, b_)]
 }
 
-fn add3_fp2(a: [U256; 2], b: [U256; 2], c: [U256; 2]) -> [U256; 2] {
+fn add3_fp2(a: Fp2, b: Fp2, c: Fp2) -> Fp2 {
     let [a, a_] = a;
     let [b, b_] = b;
     let [c, c_] = c;
     [add3_fp(a, b, c), add3_fp(a_, b_, c_)]
 }
 
-fn sub_fp2(a: [U256; 2], b: [U256; 2]) -> [U256; 2] {
+fn sub_fp2(a: Fp2, b: Fp2) -> Fp2 {
     let [a, a_] = a;
     let [b, b_] = b;
     [sub_fp(a, b), sub_fp(a_, b_)]
 }
 
-fn mul_fp2(a: [U256; 2], b: [U256; 2]) -> [U256; 2] {
+fn mul_fp2(a: Fp2, b: Fp2) -> Fp2 {
     let [a, a_] = a;
     let [b, b_] = b;
     [
@@ -58,13 +64,13 @@ fn mul_fp2(a: [U256; 2], b: [U256; 2]) -> [U256; 2] {
     ]
 }
 
-fn i9(a: [U256; 2]) -> [U256; 2] {
+fn i9(a: Fp2) -> Fp2 {
     let [a, a_] = a;
     let nine = U256::from(9);
     [sub_fp(mul_fp(nine, a), a_), add_fp(a, mul_fp(nine, a_))]
 }
 
-fn add_fp6(c: [[U256; 2]; 3], d: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
+fn add_fp6(c: Fp6, d: Fp6) -> Fp6 {
     let [c0, c1, c2] = c;
     let [d0, d1, d2] = d;
 
@@ -74,7 +80,7 @@ fn add_fp6(c: [[U256; 2]; 3], d: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
     [e0, e1, e2]
 }
 
-fn sub_fp6(c: [[U256; 2]; 3], d: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
+fn sub_fp6(c: Fp6, d: Fp6) -> Fp6 {
     let [c0, c1, c2] = c;
     let [d0, d1, d2] = d;
 
@@ -84,7 +90,7 @@ fn sub_fp6(c: [[U256; 2]; 3], d: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
     [e0, e1, e2]
 }
 
-fn mul_fp6(c: [[U256; 2]; 3], d: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
+fn mul_fp6(c: Fp6, d: Fp6) -> Fp6 {
     let [c0, c1, c2] = c;
     let [d0, d1, d2] = d;
 
@@ -106,12 +112,12 @@ fn mul_fp6(c: [[U256; 2]; 3], d: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
     ]
 }
 
-fn sh(c: [[U256; 2]; 3]) -> [[U256; 2]; 3] {
+fn sh(c: Fp6) -> Fp6 {
     let [c0, c1, c2] = c;
     [i9(c2), c0, c1]
 }
 
-fn sparse_embed(x: [U256; 5]) -> [[[U256; 2]; 3]; 2] {
+fn sparse_embed(x: [U256; 5]) -> Fp12 {
     let [g0, g1, g1_, g2, g2_] = x;
     let zero = U256::from(0);
     [
@@ -120,7 +126,7 @@ fn sparse_embed(x: [U256; 5]) -> [[[U256; 2]; 3]; 2] {
     ]
 }
 
-fn mul_fp12(f: [[[U256; 2]; 3]; 2], g: [[[U256; 2]; 3]; 2]) -> [[[U256; 2]; 3]; 2] {
+fn mul_fp12(f: Fp12, g: Fp12) -> Fp12 {
     let [f0, f1] = f;
     let [g0, g1] = g;
 
@@ -136,7 +142,7 @@ fn gen_fp() -> U256 {
     U256([x64, x64, x64, x64]) % BN_BASE
 }
 
-fn gen_fp6() -> [[U256; 2]; 3] {
+fn gen_fp6() -> Fp6 {
     [
         [gen_fp(), gen_fp()],
         [gen_fp(), gen_fp()],
@@ -144,21 +150,183 @@ fn gen_fp6() -> [[U256; 2]; 3] {
     ]
 }
 
-fn gen_fp12_sparse() -> [[[U256; 2]; 3]; 2] {
+fn gen_fp12_sparse() -> Fp12 {
     sparse_embed([gen_fp(), gen_fp(), gen_fp(), gen_fp(), gen_fp()])
 }
 
+fn frob_t1(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
+                .unwrap(),
+            U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
+                .unwrap(),
+            U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
+                .unwrap(),
+            U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+fn frob_t2(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
+                .unwrap(),
+            U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
+                .unwrap(),
+            U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
+                .unwrap(),
+            U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+fn frob_z(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
+                .unwrap(),
+            U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
+                .unwrap(),
+            U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
+                .unwrap(),
+            U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
+                .unwrap(),
+        ],
+        6 => [
+            U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        7 => [
+            U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
+                .unwrap(),
+            U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
+                .unwrap(),
+        ],
+        8 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        9 => [
+            U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
+                .unwrap(),
+            U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
+                .unwrap(),
+        ],
+        10 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        11 => [
+            U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
+                .unwrap(),
+            U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    let _c0 = conj_fp2(c0);
+    let _c1 = conj_fp2(c1);
+    let _c2 = conj_fp2(c2);
+
+    if n % 2 != 0 {
+        [_c0, mul_fp2(frob_t1(n), _c1), mul_fp2(frob_t2(n), _c2)]
+    } else {
+        [c0, mul_fp2(frob_t1(n), c1), mul_fp2(frob_t2(n), c2)]
+    }
+}
+fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
+    let [f0, f1] = f;
+    let zero = U256::from(0);
+    let scale = [frob_z(n), [zero, zero], [zero, zero]];
+    [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
+}
+
 fn make_mul_stack(
     in0: usize,
     in1: usize,
     out: usize,
-    f0: [[U256; 2]; 3],
-    f1: [[U256; 2]; 3],
-    g0: [[U256; 2]; 3],
-    g1: [[U256; 2]; 3],
+    f0: Fp6,
+    f1: Fp6,
+    g0: Fp6,
+    g1: Fp6,
     mul_label: &str,
 ) -> Vec<U256> {
-    // stack: in0, f, in0', f', in1, g, in1', g', in1, out, in0, out
+    // stack: in0, f, f', in1, g, g', mul_dest, in0, in1, out, ret_stack, out
 
     let in0 = U256::from(in0);
     let in1 = U256::from(in1);
@@ -172,11 +340,11 @@ fn make_mul_stack(
     let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
     let mul_dest = U256::from(KERNEL.global_labels[mul_label]);
 
-    let mut input = f0;
-    input.extend(vec![in0]);
+    let mut input = vec![in0];
+    input.extend(f0);
     input.extend(f1);
-    input.extend(g0);
     input.extend(vec![in1]);
+    input.extend(g0);
     input.extend(g1);
     input.extend(vec![mul_dest, in0, in1, out, ret_stack, out]);
     input.reverse();
@@ -184,7 +352,7 @@ fn make_mul_stack(
     input
 }
 
-fn make_mul_expected(f: [[[U256; 2]; 3]; 2], g: [[[U256; 2]; 3]; 2]) -> Vec<U256> {
+fn make_mul_expected(f: Fp12, g: Fp12) -> Vec<U256> {
     mul_fp12(f, g)
         .into_iter()
         .flatten()

From cefbe248fa434bafc4e0a1c89f07552dd5d06c98 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Thu, 15 Dec 2022 17:00:38 -0800
Subject: [PATCH 055/201] frob tests work

---
 .../curve/bn254/curve_arithmetic/power.asm    |   4 +-
 .../bn254/curve_arithmetic/tate_pairing.asm   |   4 +-
 .../bn254/field_arithmetic/frobenius.asm      |  49 ++++++++-
 evm/src/cpu/kernel/tests/bn254_field.rs       | 101 +++++++++++++-----
 4 files changed, 124 insertions(+), 34 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm
index 949a42c8..d0cf3bed 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm
@@ -12,7 +12,7 @@
 ///     y0  = y0^{-1}
 ///     y1 *= y0 * (y2**2)
 ///     y1  = frob_fp12_1(y1)
-///     y2  = frob_fp12_2(y2)
+///     y2  = frob_fp12_2_(y2)
 ///     return y2 * y1 * y0 
 
 global power:
@@ -59,7 +59,7 @@ power_return_4:
     // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
     PUSH 224  DUP1
     // stack:                      224, 224, out, retdest  {236: y0, 212: y1, 224: y2}
-    %frob_fp12_2
+    %frob_fp12_2_
     // stack:                           224, out, retdest  {236: y0, 212: y1, 224: y2}
     POP
     // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 47b1c313..031dc93b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -5,7 +5,7 @@
 ///     out = frob_fp12_6(out)
 ///     out = mul_fp12(out, inv)
 ///
-///     acc = frob_fp12_2(out)
+///     acc = frob_fp12_2_(out)
 ///     out = mul_fp12(out, acc)
 ///
 ///     pow = fast_exp(out)
@@ -51,7 +51,7 @@ tate_mul1:
     // stack:      100, out, tate_mul2, out, retdest  {100: inv}       
     DUP2
     // stack: out, 100, out, tate_mul2, out, retdest  {100: inv}
-    %frob_fp12_2
+    %frob_fp12_2_
     // stack: out, 100, out, tate_mul2, out, retdest  {100: inv} 
     %jump(mul_fp12)
 tate_mul2: 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index 7e828784..c343378b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -1,3 +1,41 @@
+global test_frob_fp12_1:
+    // stack: ptr, f, ptr
+    %store_fp12
+    // stack:         ptr
+    %frob_fp12_1
+    // stack:         ptr
+    %load_fp12
+    %jump(0xdeadbeef)
+
+global test_frob_fp12_2:
+    // stack: ptr, f, ptr
+    %store_fp12
+    // stack:         ptr 
+    DUP1
+    // stack:    ptr, ptr
+    %frob_fp12_2_
+    // stack:         ptr
+    %load_fp12
+    %jump(0xdeadbeef)
+
+global test_frob_fp12_3:
+    // stack: ptr, f, ptr
+    %store_fp12
+    // stack:         ptr
+    %frob_fp12_3
+    // stack:         ptr
+    %load_fp12
+    %jump(0xdeadbeef)
+
+global test_frob_fp12_6:
+    // stack: ptr, f, ptr
+    %store_fp12
+    // stack:         ptr
+    %frob_fp12_6
+    // stack:         ptr
+    %load_fp12
+    %jump(0xdeadbeef)
+
 /// let Z` denote the complex conjugate of Z
 
 /// def frob_fp6_n(C0, C1, C2):
@@ -91,13 +129,14 @@
     // stack:       f', ptr
     %frobz_1
     // stack:       g', ptr
-    DUP1  %offset_fp6
+    DUP7  %offset_fp6
     // stack: ptr', g', ptr
     %store_fp6
     // stack:           ptr
 %endmacro 
 
-%macro frob_fp12_2
+// Note: this is the only one with distinct input and output pointers
+%macro frob_fp12_2_
     // stack:           ptr , out
     DUP1
     // stack:      ptr, ptr , out
@@ -115,7 +154,7 @@
     // stack:             f', out
     %frobz_2
     // stack:             g', out
-    DUP1  %offset_fp6
+    DUP7  %offset_fp6
     // stack:       out', g', out
     %store_fp6
     // stack:                 out
@@ -139,11 +178,11 @@
     // stack:       f', ptr
     %frobz_3
     // stack:       g', ptr
-    DUP1  %offset_fp6
+    DUP7  %offset_fp6
     // stack: ptr', g', ptr
     %store_fp6
     // stack:           ptr
-%endmacro 
+%endmacro
 
 %macro frob_fp12_6
     // stack:           ptr
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index 5699b1f4..d2468205 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -150,6 +150,10 @@ fn gen_fp6() -> Fp6 {
     ]
 }
 
+fn gen_fp12() -> Fp12 {
+    [gen_fp6(), gen_fp6()]
+}
+
 fn gen_fp12_sparse() -> Fp12 {
     sparse_embed([gen_fp(), gen_fp(), gen_fp(), gen_fp(), gen_fp()])
 }
@@ -303,16 +307,22 @@ fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
     let _c1 = conj_fp2(c1);
     let _c2 = conj_fp2(c2);
 
+    let n = n % 6;
+    let frob_t1 = frob_t1(n);
+    let frob_t2 = frob_t2(n);
+
     if n % 2 != 0 {
-        [_c0, mul_fp2(frob_t1(n), _c1), mul_fp2(frob_t2(n), _c2)]
+        [_c0, mul_fp2(frob_t1, _c1), mul_fp2(frob_t2, _c2)]
     } else {
-        [c0, mul_fp2(frob_t1(n), c1), mul_fp2(frob_t2(n), c2)]
+        [c0, mul_fp2(frob_t1, c1), mul_fp2(frob_t2, c2)]
     }
 }
+
 fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
     let [f0, f1] = f;
     let zero = U256::from(0);
     let scale = [frob_z(n), [zero, zero], [zero, zero]];
+
     [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
 }
 
@@ -320,10 +330,8 @@ fn make_mul_stack(
     in0: usize,
     in1: usize,
     out: usize,
-    f0: Fp6,
-    f1: Fp6,
-    g0: Fp6,
-    g1: Fp6,
+    f: Fp12,
+    g: Fp12,
     mul_label: &str,
 ) -> Vec<U256> {
     // stack: in0, f, f', in1, g, g', mul_dest, in0, in1, out, ret_stack, out
@@ -332,20 +340,16 @@ fn make_mul_stack(
     let in1 = U256::from(in1);
     let out = U256::from(out);
 
-    let f0: Vec<U256> = f0.into_iter().flatten().collect();
-    let f1: Vec<U256> = f1.into_iter().flatten().collect();
-    let g0: Vec<U256> = g0.into_iter().flatten().collect();
-    let g1: Vec<U256> = g1.into_iter().flatten().collect();
+    let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
+    let g: Vec<U256> = g.into_iter().flatten().flatten().collect();
 
     let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
     let mul_dest = U256::from(KERNEL.global_labels[mul_label]);
 
     let mut input = vec![in0];
-    input.extend(f0);
-    input.extend(f1);
+    input.extend(f);
     input.extend(vec![in1]);
-    input.extend(g0);
-    input.extend(g1);
+    input.extend(g);
     input.extend(vec![mul_dest, in0, in1, out, ret_stack, out]);
     input.reverse();
 
@@ -367,25 +371,23 @@ fn test_mul_fp12() -> Result<()> {
     let in1 = 76;
     let out = 88;
 
-    let f0 = gen_fp6();
-    let f1 = gen_fp6();
-    let g0 = gen_fp6();
-    let g1 = gen_fp6();
-    let [h0, h1] = gen_fp12_sparse();
+    let f: Fp12 = gen_fp12();
+    let g: Fp12 = gen_fp12();
+    let h: Fp12 = gen_fp12_sparse();
 
     let test_mul = KERNEL.global_labels["test_mul_fp12"];
 
-    let normal: Vec<U256> = make_mul_stack(in0, in1, out, f0, f1, g0, g1, "mul_fp12");
-    let sparse: Vec<U256> = make_mul_stack(in0, in1, out, f0, f1, h0, h1, "mul_fp12_sparse");
-    let square: Vec<U256> = make_mul_stack(in0, in1, out, f0, f1, f0, f1, "square_fp12_test");
+    let normal: Vec<U256> = make_mul_stack(in0, in1, out, f, g, "mul_fp12");
+    let sparse: Vec<U256> = make_mul_stack(in0, in1, out, f, h, "mul_fp12_sparse");
+    let square: Vec<U256> = make_mul_stack(in0, in1, out, f, f, "square_fp12_test");
 
     let out_normal: Vec<U256> = run_interpreter(test_mul, normal)?.stack().to_vec();
     let out_sparse: Vec<U256> = run_interpreter(test_mul, sparse)?.stack().to_vec();
     let out_square: Vec<U256> = run_interpreter(test_mul, square)?.stack().to_vec();
 
-    let exp_normal: Vec<U256> = make_mul_expected([f0, f1], [g0, g1]);
-    let exp_sparse: Vec<U256> = make_mul_expected([f0, f1], [h0, h1]);
-    let exp_square: Vec<U256> = make_mul_expected([f0, f1], [f0, f1]);
+    let exp_normal: Vec<U256> = make_mul_expected(f, g);
+    let exp_sparse: Vec<U256> = make_mul_expected(f, h);
+    let exp_square: Vec<U256> = make_mul_expected(f, f);
 
     assert_eq!(out_normal, exp_normal);
     assert_eq!(out_sparse, exp_sparse);
@@ -393,3 +395,52 @@ fn test_mul_fp12() -> Result<()> {
 
     Ok(())
 }
+
+fn make_frob_stack(f: Fp12) -> Vec<U256> {
+    let ptr = U256::from(100);
+    let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
+    let mut input = vec![ptr];
+    input.extend(f);
+    input.extend(vec![ptr]);
+    input.reverse();
+
+    input
+}
+
+fn make_frob_expected(n: usize, f: Fp12) -> Vec<U256> {
+    frob_fp12(n, f)
+        .into_iter()
+        .flatten()
+        .flatten()
+        .rev()
+        .collect()
+}
+
+#[test]
+fn test_frob_fp12() -> Result<()> {
+    let f: Fp12 = gen_fp12();
+
+    let test_frob1 = KERNEL.global_labels["test_frob_fp12_1"];
+    let test_frob2 = KERNEL.global_labels["test_frob_fp12_2"];
+    let test_frob3 = KERNEL.global_labels["test_frob_fp12_3"];
+    let test_frob6 = KERNEL.global_labels["test_frob_fp12_6"];
+
+    let stack = make_frob_stack(f);
+
+    let out_frob1: Vec<U256> = run_interpreter(test_frob1, stack.clone())?.stack().to_vec();
+    let out_frob2: Vec<U256> = run_interpreter(test_frob2, stack.clone())?.stack().to_vec();
+    let out_frob3: Vec<U256> = run_interpreter(test_frob3, stack.clone())?.stack().to_vec();
+    let out_frob6: Vec<U256> = run_interpreter(test_frob6, stack)?.stack().to_vec();
+
+    let exp_frob1: Vec<U256> = make_frob_expected(1, f);
+    let exp_frob2: Vec<U256> = make_frob_expected(2, f);
+    let exp_frob3: Vec<U256> = make_frob_expected(3, f);
+    let exp_frob6: Vec<U256> = make_frob_expected(6, f);
+
+    assert_eq!(out_frob1, exp_frob1);
+    assert_eq!(out_frob2, exp_frob2);
+    assert_eq!(out_frob3, exp_frob3);
+    assert_eq!(out_frob6, exp_frob6);
+
+    Ok(())
+}

From d6c7e319bd94dfcee5c6729482a51d6a1274d463 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Fri, 16 Dec 2022 17:35:52 -0800
Subject: [PATCH 056/201] all but inv

---
 evm/src/cpu/kernel/aggregator.rs              |   2 +-
 .../power.asm                                 |   8 +-
 evm/src/cpu/kernel/tests/bn254_field.rs       | 275 +++++++++++++++---
 3 files changed, 246 insertions(+), 39 deletions(-)
 rename evm/src/cpu/kernel/asm/curve/bn254/{curve_arithmetic => field_arithmetic}/power.asm (96%)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index ce2ce0cc..29144404 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -28,13 +28,13 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/power.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/field_macros.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/power.asm"),
         include_str!("asm/curve/common.asm"),
         include_str!("asm/curve/secp256k1/curve_mul.asm"),
         include_str!("asm/curve/secp256k1/curve_add.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
similarity index 96%
rename from evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index d0cf3bed..c49faab0 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -16,13 +16,13 @@
 ///     return y2 * y1 * y0 
 
 global power:
-    // stack:                                                            sqr, out, retdest
+    // stack:                                                       sqr, out, retdest
     PUSH 1  DUP1  DUP1
-    // stack:                                                   1, 1, 1, sqr, out, retdest
+    // stack:                                              1, 1, 1, sqr, out, retdest
     %mstore_kernel_general(200)  %mstore_kernel_general(212)  %mstore_kernel_general(224)
-    // stack:                                                            sqr, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:                                                       sqr, out, retdest  {200: y0, 212: y1, 224: y2}
     PUSH power_loop_2  PUSH power_loop_1  PUSH power_return    
-    // stack:                  power_return, power_loop_1, power_loop_2, sqr, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:             power_return, power_loop_1, power_loop_2, sqr, out, retdest  {200: y0, 212: y1, 224: y2}
     SWAP3
     // stack:             sqr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
     PUSH 65  PUSH 62  PUSH 65
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index d2468205..d6a7f54a 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -7,27 +7,68 @@ use rand::{thread_rng, Rng};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::{run_interpreter, BN_BASE};
 
+type Fp = U256;
 type Fp2 = [U256; 2];
 type Fp6 = [Fp2; 3];
 type Fp12 = [Fp6; 2];
 
-fn add_fp(x: U256, y: U256) -> U256 {
+const zero: Fp = U256::from(0);
+
+fn embed_fp2(x: Fp) -> Fp2 {
+    [x, zero]
+}
+
+fn embed_fp2_fp6(a: Fp2) -> Fp6 {
+    [a, embed_fp2(zero), embed_fp2(zero)]
+}
+
+fn embed_fp6(x: Fp) -> Fp6 {
+    embed_fp2_fp6(embed_fp2(x))
+}
+
+fn embed_fp12(x: Fp) -> Fp12 {
+    [embed_fp6(x), embed_fp6(zero)]
+}
+
+fn gen_fp() -> Fp {
+    let rng = thread_rng();
+    let x64 = rng.gen::<u64>();
+    U256([x64, x64, x64, x64]) % BN_BASE
+}
+
+fn gen_fp6() -> Fp6 {
+    [
+        [gen_fp(), gen_fp()],
+        [gen_fp(), gen_fp()],
+        [gen_fp(), gen_fp()],
+    ]
+}
+
+fn gen_fp12() -> Fp12 {
+    [gen_fp6(), gen_fp6()]
+}
+
+fn gen_fp12_sparse() -> Fp12 {
+    sparse_embed([gen_fp(), gen_fp(), gen_fp(), gen_fp(), gen_fp()])
+}
+
+fn add_fp(x: Fp, y: Fp) -> Fp {
     (x + y) % BN_BASE
 }
 
-fn add3_fp(x: U256, y: U256, z: U256) -> U256 {
+fn add3_fp(x: Fp, y: Fp, z: Fp) -> Fp {
     (x + y + z) % BN_BASE
 }
 
-fn mul_fp(x: U256, y: U256) -> U256 {
+fn mul_fp(x: Fp, y: Fp) -> Fp {
     U256::try_from(x.full_mul(y) % BN_BASE).unwrap()
 }
 
-fn sub_fp(x: U256, y: U256) -> U256 {
+fn sub_fp(x: Fp, y: Fp) -> Fp {
     (BN_BASE + x - y) % BN_BASE
 }
 
-fn neg_fp(x: U256) -> U256 {
+fn neg_fp(x: Fp) -> Fp {
     (BN_BASE - x) % BN_BASE
 }
 
@@ -119,10 +160,9 @@ fn sh(c: Fp6) -> Fp6 {
 
 fn sparse_embed(x: [U256; 5]) -> Fp12 {
     let [g0, g1, g1_, g2, g2_] = x;
-    let zero = U256::from(0);
     [
-        [[g0, zero], [g1, g1_], [zero, zero]],
-        [[zero, zero], [g2, g2_], [zero, zero]],
+        [embed_fp2(g0), [g1, g1_], embed_fp2(zero)],
+        [embed_fp2(zero), [g2, g2_], embed_fp2(zero)],
     ]
 }
 
@@ -136,28 +176,6 @@ fn mul_fp12(f: Fp12, g: Fp12) -> Fp12 {
     [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
 }
 
-fn gen_fp() -> U256 {
-    let mut rng = thread_rng();
-    let x64 = rng.gen::<u64>();
-    U256([x64, x64, x64, x64]) % BN_BASE
-}
-
-fn gen_fp6() -> Fp6 {
-    [
-        [gen_fp(), gen_fp()],
-        [gen_fp(), gen_fp()],
-        [gen_fp(), gen_fp()],
-    ]
-}
-
-fn gen_fp12() -> Fp12 {
-    [gen_fp6(), gen_fp6()]
-}
-
-fn gen_fp12_sparse() -> Fp12 {
-    sparse_embed([gen_fp(), gen_fp(), gen_fp(), gen_fp(), gen_fp()])
-}
-
 fn frob_t1(n: usize) -> Fp2 {
     match n {
         0 => [
@@ -320,12 +338,203 @@ fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
 
 fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
     let [f0, f1] = f;
-    let zero = U256::from(0);
-    let scale = [frob_z(n), [zero, zero], [zero, zero]];
+    let scale = embed_fp2_fp6(frob_z(n));
 
     [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
 }
 
+const EXPS4: [(bool, bool, bool); 65] = [
+    (True, True, True),
+    (True, True, False),
+    (True, True, True),
+    (True, True, True),
+    (False, False, False),
+    (False, False, True),
+    (True, False, True),
+    (False, True, False),
+    (True, False, True),
+    (True, True, False),
+    (True, False, True),
+    (False, True, False),
+    (True, True, False),
+    (True, True, False),
+    (True, True, False),
+    (False, True, False),
+    (False, True, False),
+    (False, False, True),
+    (True, False, True),
+    (True, True, False),
+    (False, True, False),
+    (True, True, False),
+    (True, True, False),
+    (True, True, False),
+    (False, False, True),
+    (False, False, True),
+    (True, False, True),
+    (True, False, True),
+    (True, True, False),
+    (True, False, False),
+    (True, True, False),
+    (False, True, False),
+    (True, True, False),
+    (True, False, False),
+    (False, True, False),
+    (False, False, False),
+    (True, False, False),
+    (True, False, False),
+    (True, False, True),
+    (False, False, True),
+    (False, True, True),
+    (False, False, True),
+    (False, True, True),
+    (False, True, True),
+    (False, False, False),
+    (True, True, True),
+    (True, False, True),
+    (True, False, True),
+    (False, True, True),
+    (True, False, True),
+    (False, True, True),
+    (False, True, True),
+    (True, True, False),
+    (True, True, False),
+    (True, True, False),
+    (True, False, False),
+    (False, False, True),
+    (True, False, False),
+    (False, False, True),
+    (True, False, True),
+    (True, True, False),
+    (True, True, True),
+    (False, True, True),
+    (False, True, False),
+    (True, True, True),
+];
+
+const EXPS2: [(bool, bool); 62] = [
+    (True, False),
+    (True, True),
+    (False, False),
+    (True, False),
+    (True, False),
+    (True, True),
+    (True, False),
+    (True, True),
+    (True, False),
+    (False, True),
+    (False, True),
+    (True, True),
+    (True, True),
+    (False, False),
+    (True, True),
+    (False, False),
+    (False, False),
+    (False, True),
+    (False, True),
+    (True, True),
+    (True, True),
+    (True, True),
+    (False, True),
+    (True, True),
+    (False, False),
+    (True, True),
+    (True, False),
+    (True, True),
+    (False, False),
+    (True, True),
+    (True, True),
+    (True, False),
+    (False, False),
+    (False, True),
+    (False, False),
+    (True, True),
+    (False, True),
+    (False, False),
+    (True, False),
+    (False, True),
+    (False, True),
+    (True, False),
+    (False, True),
+    (False, False),
+    (False, False),
+    (False, False),
+    (False, True),
+    (True, False),
+    (True, True),
+    (False, True),
+    (True, True),
+    (True, False),
+    (False, True),
+    (False, False),
+    (True, False),
+    (False, True),
+    (True, False),
+    (True, True),
+    (True, False),
+    (True, True),
+    (False, True),
+    (True, True),
+];
+
+const EXPS0: [(bool, bool); 65] = [
+    False, False, True, False, False, True, True, False, True, False, True, True, True, False,
+    True, False, False, False, True, False, False, True, False, True, False, True, True, False,
+    False, False, False, False, True, False, True, False, True, True, True, False, False, True,
+    True, True, True, False, True, False, True, True, False, False, True, False, False, False,
+    True, True, True, True, False, False, True, True, False,
+];
+
+fn fast_exp(f: Fp12) -> Fp12 {
+    let mut sq: Fp12 = f;
+    let mut y0: Fp12 = embed_fp12(U256::from(1));
+    let mut y2: Fp12 = embed_fp12(U256::from(1));
+    let mut y4: Fp12 = embed_fp12(U256::from(1));
+
+    for (a, b, c) in EXPS4 {
+        if a {
+            y4 = mul_fp12(y4, sq);
+        }
+        if b {
+            y2 = mul_fp12(y2, sq);
+        }
+        if c {
+            y0 = mul_fp12(y0, sq);
+        }
+        sq = mul_fp12(sq, sq);
+    }
+    y4 = mul_fp12(y4, y4);
+
+    for (a, b) in EXPS2 {
+        if a {
+            y2 = mul_fp12(y2, sq);
+        }
+        if b {
+            y0 = mul_fp12(y0, sq);
+        }
+        sq = mul_fp12(sq, sq);
+    }
+    y2 = mul_fp12(y2, y2);
+
+    for a in EXPS0 {
+        if a {
+            y0 = mul_fp12(y0, sq);
+        }
+        sq = mul_fp12(sq, sq);
+    }
+    y0 = mul_fp12(y0, y0);
+
+    // TODO: y0 = inv_fp12(y0);
+
+    y4 = mul_fp12(y4, y2);
+    y4 = mul_fp12(y4, y2);
+    y4 = mul_fp12(y4, y0);
+
+    y4 = frob_fp12(1, y4);
+    y2 = frob_fp12(2, y2);
+
+    mul_fp12(mul_fp12(y4, y2), y0)
+}
+
 fn make_mul_stack(
     in0: usize,
     in1: usize,
@@ -352,7 +561,6 @@ fn make_mul_stack(
     input.extend(g);
     input.extend(vec![mul_dest, in0, in1, out, ret_stack, out]);
     input.reverse();
-
     input
 }
 
@@ -403,7 +611,6 @@ fn make_frob_stack(f: Fp12) -> Vec<U256> {
     input.extend(f);
     input.extend(vec![ptr]);
     input.reverse();
-
     input
 }
 

From 15ee75f2ea981884fabc226ad1115d0cf38210f3 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Fri, 16 Dec 2022 17:46:50 -0800
Subject: [PATCH 057/201] all but inv

---
 evm/src/cpu/kernel/tests/bn254_field.rs | 266 ++++++++++++------------
 1 file changed, 133 insertions(+), 133 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index d6a7f54a..4de7c889 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -344,144 +344,144 @@ fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
 }
 
 const EXPS4: [(bool, bool, bool); 65] = [
-    (True, True, True),
-    (True, True, False),
-    (True, True, True),
-    (True, True, True),
-    (False, False, False),
-    (False, False, True),
-    (True, False, True),
-    (False, True, False),
-    (True, False, True),
-    (True, True, False),
-    (True, False, True),
-    (False, True, False),
-    (True, True, False),
-    (True, True, False),
-    (True, True, False),
-    (False, True, False),
-    (False, True, False),
-    (False, False, True),
-    (True, False, True),
-    (True, True, False),
-    (False, True, False),
-    (True, True, False),
-    (True, True, False),
-    (True, True, False),
-    (False, False, True),
-    (False, False, True),
-    (True, False, True),
-    (True, False, True),
-    (True, True, False),
-    (True, False, False),
-    (True, True, False),
-    (False, True, False),
-    (True, True, False),
-    (True, False, False),
-    (False, True, False),
-    (False, False, False),
-    (True, False, False),
-    (True, False, False),
-    (True, False, True),
-    (False, False, True),
-    (False, True, True),
-    (False, False, True),
-    (False, True, True),
-    (False, True, True),
-    (False, False, False),
-    (True, True, True),
-    (True, False, True),
-    (True, False, True),
-    (False, True, True),
-    (True, False, True),
-    (False, True, True),
-    (False, True, True),
-    (True, True, False),
-    (True, True, False),
-    (True, True, False),
-    (True, False, False),
-    (False, False, True),
-    (True, False, False),
-    (False, False, True),
-    (True, False, True),
-    (True, True, False),
-    (True, True, True),
-    (False, True, True),
-    (False, True, False),
-    (True, True, True),
+    (true, true, true),
+    (true, true, false),
+    (true, true, true),
+    (true, true, true),
+    (false, false, false),
+    (false, false, true),
+    (true, false, true),
+    (false, true, false),
+    (true, false, true),
+    (true, true, false),
+    (true, false, true),
+    (false, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (false, true, false),
+    (false, true, false),
+    (false, false, true),
+    (true, false, true),
+    (true, true, false),
+    (false, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (false, false, true),
+    (false, false, true),
+    (true, false, true),
+    (true, false, true),
+    (true, true, false),
+    (true, false, false),
+    (true, true, false),
+    (false, true, false),
+    (true, true, false),
+    (true, false, false),
+    (false, true, false),
+    (false, false, false),
+    (true, false, false),
+    (true, false, false),
+    (true, false, true),
+    (false, false, true),
+    (false, true, true),
+    (false, false, true),
+    (false, true, true),
+    (false, true, true),
+    (false, false, false),
+    (true, true, true),
+    (true, false, true),
+    (true, false, true),
+    (false, true, true),
+    (true, false, true),
+    (false, true, true),
+    (false, true, true),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, false, false),
+    (false, false, true),
+    (true, false, false),
+    (false, false, true),
+    (true, false, true),
+    (true, true, false),
+    (true, true, true),
+    (false, true, true),
+    (false, true, false),
+    (true, true, true),
 ];
 
 const EXPS2: [(bool, bool); 62] = [
-    (True, False),
-    (True, True),
-    (False, False),
-    (True, False),
-    (True, False),
-    (True, True),
-    (True, False),
-    (True, True),
-    (True, False),
-    (False, True),
-    (False, True),
-    (True, True),
-    (True, True),
-    (False, False),
-    (True, True),
-    (False, False),
-    (False, False),
-    (False, True),
-    (False, True),
-    (True, True),
-    (True, True),
-    (True, True),
-    (False, True),
-    (True, True),
-    (False, False),
-    (True, True),
-    (True, False),
-    (True, True),
-    (False, False),
-    (True, True),
-    (True, True),
-    (True, False),
-    (False, False),
-    (False, True),
-    (False, False),
-    (True, True),
-    (False, True),
-    (False, False),
-    (True, False),
-    (False, True),
-    (False, True),
-    (True, False),
-    (False, True),
-    (False, False),
-    (False, False),
-    (False, False),
-    (False, True),
-    (True, False),
-    (True, True),
-    (False, True),
-    (True, True),
-    (True, False),
-    (False, True),
-    (False, False),
-    (True, False),
-    (False, True),
-    (True, False),
-    (True, True),
-    (True, False),
-    (True, True),
-    (False, True),
-    (True, True),
+    (true, false),
+    (true, true),
+    (false, false),
+    (true, false),
+    (true, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (true, false),
+    (false, true),
+    (false, true),
+    (true, true),
+    (true, true),
+    (false, false),
+    (true, true),
+    (false, false),
+    (false, false),
+    (false, true),
+    (false, true),
+    (true, true),
+    (true, true),
+    (true, true),
+    (false, true),
+    (true, true),
+    (false, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (false, false),
+    (true, true),
+    (true, true),
+    (true, false),
+    (false, false),
+    (false, true),
+    (false, false),
+    (true, true),
+    (false, true),
+    (false, false),
+    (true, false),
+    (false, true),
+    (false, true),
+    (true, false),
+    (false, true),
+    (false, false),
+    (false, false),
+    (false, false),
+    (false, true),
+    (true, false),
+    (true, true),
+    (false, true),
+    (true, true),
+    (true, false),
+    (false, true),
+    (false, false),
+    (true, false),
+    (false, true),
+    (true, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (false, true),
+    (true, true),
 ];
 
-const EXPS0: [(bool, bool); 65] = [
-    False, False, True, False, False, True, True, False, True, False, True, True, True, False,
-    True, False, False, False, True, False, False, True, False, True, False, True, True, False,
-    False, False, False, False, True, False, True, False, True, True, True, False, False, True,
-    True, True, True, False, True, False, True, True, False, False, True, False, False, False,
-    True, True, True, True, False, False, True, True, False,
+const EXPS0: [bool; 65] = [
+    false, false, true, false, false, true, true, false, true, false, true, true, true, false,
+    true, false, false, false, true, false, false, true, false, true, false, true, true, false,
+    false, false, false, false, true, false, true, false, true, true, true, false, false, true,
+    true, true, true, false, true, false, true, true, false, false, true, false, false, false,
+    true, true, true, true, false, false, true, true, false,
 ];
 
 fn fast_exp(f: Fp12) -> Fp12 {

From a503b05813672b9c3e96fdb95eac306733466f55 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Mon, 19 Dec 2022 14:39:23 -0800
Subject: [PATCH 058/201] fix

---
 .../curve/bn254/field_arithmetic/power.asm    |  6 +++++
 evm/src/cpu/kernel/tests/bn254_field.rs       | 27 ++++++++++++++-----
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index c49faab0..fc85cb44 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -1,3 +1,9 @@
+global test_power:
+    // stack: sqr, f, sqr, out, ret_stack, out
+    %store_fp12
+    // stack:         sqr, out, ret_stack, out
+    %jump(power)
+
 /// def power(square):
 ///     power_init()
 ///     power_loop_0()
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index 4de7c889..be3c8a0e 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -12,14 +12,14 @@ type Fp2 = [U256; 2];
 type Fp6 = [Fp2; 3];
 type Fp12 = [Fp6; 2];
 
-const zero: Fp = U256::from(0);
+const ZERO: Fp = U256([0, 0, 0, 0]);
 
 fn embed_fp2(x: Fp) -> Fp2 {
-    [x, zero]
+    [x, ZERO]
 }
 
 fn embed_fp2_fp6(a: Fp2) -> Fp6 {
-    [a, embed_fp2(zero), embed_fp2(zero)]
+    [a, embed_fp2(ZERO), embed_fp2(ZERO)]
 }
 
 fn embed_fp6(x: Fp) -> Fp6 {
@@ -27,11 +27,11 @@ fn embed_fp6(x: Fp) -> Fp6 {
 }
 
 fn embed_fp12(x: Fp) -> Fp12 {
-    [embed_fp6(x), embed_fp6(zero)]
+    [embed_fp6(x), embed_fp6(ZERO)]
 }
 
 fn gen_fp() -> Fp {
-    let rng = thread_rng();
+    let mut rng = thread_rng();
     let x64 = rng.gen::<u64>();
     U256([x64, x64, x64, x64]) % BN_BASE
 }
@@ -161,8 +161,8 @@ fn sh(c: Fp6) -> Fp6 {
 fn sparse_embed(x: [U256; 5]) -> Fp12 {
     let [g0, g1, g1_, g2, g2_] = x;
     [
-        [embed_fp2(g0), [g1, g1_], embed_fp2(zero)],
-        [embed_fp2(zero), [g2, g2_], embed_fp2(zero)],
+        [embed_fp2(g0), [g1, g1_], embed_fp2(ZERO)],
+        [embed_fp2(ZERO), [g2, g2_], embed_fp2(ZERO)],
     ]
 }
 
@@ -651,3 +651,16 @@ fn test_frob_fp12() -> Result<()> {
 
     Ok(())
 }
+
+fn make_power_stack(f: Fp12) -> Vec<U256> {
+    let sqr = U256::from(100);
+    let out = U256::from(300);
+    let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
+    let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
+
+    let mut input = vec![sqr];
+    input.extend(f);
+    input.extend(vec![sqr, out, ret_stack, out]);
+    input.reverse();
+    input
+}

From 7788a29f4a5480af970d1f7223fc182ca8dea228 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@dmitrys-mbp.lan>
Date: Tue, 20 Dec 2022 00:22:59 -0800
Subject: [PATCH 059/201] skeleton inv

---
 .../bn254/field_arithmetic/field_macros.asm   |   2 +-
 .../curve/bn254/field_arithmetic/inverse.asm  |  36 +-
 evm/src/cpu/kernel/tests/bn254_field.rs       |  82 +++
 evm/src/generation/prover_input.rs            | 521 +++++++++++++++++-
 evm/src/witness/util.rs                       |  14 +
 5 files changed, 638 insertions(+), 17 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
index 653ae2de..8500e053 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
@@ -974,7 +974,7 @@
 
 %macro assert_eq_fp12_unit
     // stack:      ptr
-    DUP1                 %mload_kernel_code
+    DUP1                  %mload_kernel_code
     // stack: x00, ptr
     %assert_eq_const(1)
     // stack:      ptr 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index ad9aeff3..35353c3a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -23,23 +23,26 @@
 
 global inverse_fp12:
     // stack:                                ptr, inv, retdest
-    DUP1  %load_fp12
+    // DUP1  %load_fp12
     // stack:                             f, ptr, inv, retdest
     DUP14
     // stack:                        inv, f, ptr, inv, retdest
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
-    PROVER_INPUT(ff::bn254_base::inverse_fp12)
+    PROVER_INPUT(ffe::bn254_base::ext_inv11)
+    PROVER_INPUT(ffe::bn254_base::ext_inv10)
+    PROVER_INPUT(ffe::bn254_base::ext_inv9)
+    PROVER_INPUT(ffe::bn254_base::ext_inv8)
+    PROVER_INPUT(ffe::bn254_base::ext_inv7)
+    PROVER_INPUT(ffe::bn254_base::ext_inv6)
+    PROVER_INPUT(ffe::bn254_base::ext_inv5)
+    PROVER_INPUT(ffe::bn254_base::ext_inv4)
+    PROVER_INPUT(ffe::bn254_base::ext_inv3)
+    PROVER_INPUT(ffe::bn254_base::ext_inv2)
+    PROVER_INPUT(ffe::bn254_base::ext_inv1)
+    PROVER_INPUT(ffe::bn254_base::ext_inv0)
     // stack:                  f^-1, inv, f, ptr, inv, retdest
+
+    %jump(0xdeadbeef)
+
     DUP13
     // stack:             inv, f^-1, inv, f, ptr, inv, retdest
     %store_fp12
@@ -55,6 +58,9 @@ global check_inv:
     // stack:                           200, ptr, inv, retdest
     %assert_eq_fp12_unit
     // stack:                                ptr, inv, retdest
-    POP  SWAP1  
+    %pop2
+    // stack:                                          retdest
+    %jump(0xdeadbeef)
+    // SWAP1  
     // stack:                                     retdest, inv
-    JUMP
+    // JUMP
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index be3c8a0e..5e7c11d7 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -96,6 +96,10 @@ fn sub_fp2(a: Fp2, b: Fp2) -> Fp2 {
     [sub_fp(a, b), sub_fp(a_, b_)]
 }
 
+fn neg_fp2(a: Fp2) -> Fp2 {
+    sub_fp2(embed_fp2(ZERO), a)
+}
+
 fn mul_fp2(a: Fp2, b: Fp2) -> Fp2 {
     let [a, a_] = a;
     let [b, b_] = b;
@@ -131,6 +135,10 @@ fn sub_fp6(c: Fp6, d: Fp6) -> Fp6 {
     [e0, e1, e2]
 }
 
+fn neg_fp6(a: Fp6) -> Fp6 {
+    sub_fp6(embed_fp6(ZERO), a)
+}
+
 fn mul_fp6(c: Fp6, d: Fp6) -> Fp6 {
     let [c0, c1, c2] = c;
     let [d0, d1, d2] = d;
@@ -343,6 +351,51 @@ fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
     [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
 }
 
+fn exp_fp(x: Fp, e: U256) -> Fp {
+    let mut current = x;
+    let mut product = U256::one();
+
+    for j in 0..256 {
+        if e.bit(j) {
+            product = U256::try_from(product.full_mul(current) % BN_BASE).unwrap();
+        }
+        current = U256::try_from(current.full_mul(current) % BN_BASE).unwrap();
+    }
+    product
+}
+
+fn inv_fp(x: Fp) -> Fp {
+    exp_fp(x, BN_BASE - 2)
+}
+
+fn inv_fp2(a: Fp2) -> Fp2 {
+    let [a0, a1] = a;
+    let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
+    [mul_fp(norm, a0), neg_fp(mul_fp(norm, a1))]
+}
+
+fn inv_fp6(c: Fp6) -> Fp6 {
+    let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
+    let e = mul_fp6(b, frob_fp6(5, c))[0];
+    let n = mul_fp2(e, conj_fp2(e))[0];
+    let i = inv_fp(n);
+    let d = mul_fp2(embed_fp2(i), e);
+    let [f0, f1, f2] = frob_fp6(1, b);
+    [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
+}
+
+fn inv_fp12(f: Fp12) -> Fp12 {
+    let a = mul_fp12(frob_fp12(1, f), frob_fp12(7, f))[0];
+    let b = mul_fp6(a, frob_fp6(2, a));
+    let c = mul_fp6(b, frob_fp6(4, a))[0];
+    let n = mul_fp2(c, conj_fp2(c))[0];
+    let i = inv_fp(n);
+    let d = mul_fp2(embed_fp2(i), c);
+    let [g0, g1, g2] = frob_fp6(1, b);
+    let e = [mul_fp2(d, g0), mul_fp2(d, g1), mul_fp2(d, g2)];
+    [mul_fp6(e, f[0]), neg_fp6(mul_fp6(e, f[1]))]
+}
+
 const EXPS4: [(bool, bool, bool); 65] = [
     (true, true, true),
     (true, true, false),
@@ -604,6 +657,35 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
+#[test]
+fn test_inv_fp12() -> Result<()> {
+    let ptr = U256::from(100);
+    let inv = U256::from(200);
+
+    let f: Fp12 = gen_fp12();
+    let flat_f: Vec<U256> = f.into_iter().flatten().flatten().collect();
+    let mut stack: Vec<U256> = flat_f.clone();
+    stack.extend(vec![ptr, inv]);
+    stack.reverse();
+
+    let g = inv_fp12(f);
+    let one = mul_fp12(f, g);
+    println!("ONE? {:#?}", one);
+
+    let mut expected: Vec<U256> = g.into_iter().flatten().flatten().collect();
+    expected.extend(vec![inv]);
+    expected.extend(flat_f);
+    expected.extend(vec![ptr, inv]);
+    expected.reverse();
+
+    let initial_offset = KERNEL.global_labels["inverse_fp12"];
+    let output: Vec<U256> = run_interpreter(initial_offset, stack)?.stack().to_vec();
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
+
 fn make_frob_stack(f: Fp12) -> Vec<U256> {
     let ptr = U256::from(100);
     let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 885760eb..5cdca6bc 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -6,9 +6,13 @@ use plonky2::field::types::Field;
 use crate::generation::prover_input::EvmField::{
     Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar,
 };
+use crate::generation::prover_input::FieldExtOp::{
+    ExtInv0, ExtInv1, ExtInv10, ExtInv11, ExtInv2, ExtInv3, ExtInv4, ExtInv5, ExtInv6, ExtInv7,
+    ExtInv8, ExtInv9,
+};
 use crate::generation::prover_input::FieldOp::{Inverse, Sqrt};
 use crate::generation::state::GenerationState;
-use crate::witness::util::stack_peek;
+use crate::witness::util::{stack_peek, stack_peeks};
 
 /// Prover input function represented as a scoped function name.
 /// Example: `PROVER_INPUT(ff::bn254_base::inverse)` is represented as `ProverInputFn([ff, bn254_base, inverse])`.
@@ -26,6 +30,7 @@ impl<F: Field> GenerationState<F> {
         match input_fn.0[0].as_str() {
             "end_of_txns" => self.run_end_of_txns(),
             "ff" => self.run_ff(input_fn),
+            "ffe" => self.run_ffe(input_fn),
             "mpt" => self.run_mpt(),
             "rlp" => self.run_rlp(),
             "account_code" => self.run_account_code(input_fn),
@@ -51,6 +56,14 @@ impl<F: Field> GenerationState<F> {
         field.op(op, x)
     }
 
+    /// Finite field extension operations.
+    fn run_ffe(&self, input_fn: &ProverInputFn) -> U256 {
+        let field = EvmField::from_str(input_fn.0[1].as_str()).unwrap();
+        let op = FieldExtOp::from_str(input_fn.0[2].as_str()).unwrap();
+        let xs = stack_peeks(self).expect("Empty stack");
+        field.extop(op, xs)
+    }
+
     /// MPT data.
     fn run_mpt(&mut self) -> U256 {
         self.mpt_prover_inputs
@@ -100,6 +113,21 @@ enum FieldOp {
     Sqrt,
 }
 
+enum FieldExtOp {
+    ExtInv0,
+    ExtInv1,
+    ExtInv2,
+    ExtInv3,
+    ExtInv4,
+    ExtInv5,
+    ExtInv6,
+    ExtInv7,
+    ExtInv8,
+    ExtInv9,
+    ExtInv10,
+    ExtInv11,
+}
+
 impl FromStr for EvmField {
     type Err = ();
 
@@ -126,6 +154,28 @@ impl FromStr for FieldOp {
     }
 }
 
+impl FromStr for FieldExtOp {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s {
+            "ext_inv0" => ExtInv0,
+            "ext_inv1" => ExtInv1,
+            "ext_inv2" => ExtInv2,
+            "ext_inv3" => ExtInv3,
+            "ext_inv4" => ExtInv4,
+            "ext_inv5" => ExtInv5,
+            "ext_inv6" => ExtInv6,
+            "ext_inv7" => ExtInv7,
+            "ext_inv8" => ExtInv8,
+            "ext_inv9" => ExtInv9,
+            "ext_inv10" => ExtInv10,
+            "ext_inv11" => ExtInv11,
+            _ => panic!("Unrecognized field extension operation."),
+        })
+    }
+}
+
 impl EvmField {
     fn order(&self) -> U256 {
         match self {
@@ -152,6 +202,23 @@ impl EvmField {
         }
     }
 
+    fn extop(&self, op: FieldExtOp, xs: Vec<U256>) -> U256 {
+        match op {
+            FieldExtOp::ExtInv0 => self.ext_inv0(xs),
+            FieldExtOp::ExtInv1 => self.ext_inv1(xs),
+            FieldExtOp::ExtInv2 => self.ext_inv2(xs),
+            FieldExtOp::ExtInv3 => self.ext_inv3(xs),
+            FieldExtOp::ExtInv4 => self.ext_inv4(xs),
+            FieldExtOp::ExtInv5 => self.ext_inv5(xs),
+            FieldExtOp::ExtInv6 => self.ext_inv6(xs),
+            FieldExtOp::ExtInv7 => self.ext_inv7(xs),
+            FieldExtOp::ExtInv8 => self.ext_inv8(xs),
+            FieldExtOp::ExtInv9 => self.ext_inv9(xs),
+            FieldExtOp::ExtInv10 => self.ext_inv10(xs),
+            FieldExtOp::ExtInv11 => self.ext_inv11(xs),
+        }
+    }
+
     fn inverse(&self, x: U256) -> U256 {
         let n = self.order();
         assert!(x < n);
@@ -168,6 +235,84 @@ impl EvmField {
         );
         modexp(x, q, n)
     }
+
+    fn ext_inv(&self, xs: Vec<U256>, offset: usize) -> [U256; 12] {
+
+        let f0 = xs.clone().into_iter().nth(offset).unwrap();
+        let f1 = xs.clone().into_iter().nth(offset + 1).unwrap();
+        let f2 = xs.clone().into_iter().nth(offset + 2).unwrap();
+        let f3 = xs.clone().into_iter().nth(offset + 3).unwrap();
+        let f4 = xs.clone().into_iter().nth(offset + 4).unwrap();
+        let f5 = xs.clone().into_iter().nth(offset + 5).unwrap();
+        let f6 = xs.clone().into_iter().nth(offset + 6).unwrap();
+        let f7 = xs.clone().into_iter().nth(offset + 7).unwrap();
+        let f8 = xs.clone().into_iter().nth(offset + 8).unwrap();
+        let f9 = xs.clone().into_iter().nth(offset + 9).unwrap();
+        let f10 = xs.clone().into_iter().nth(offset + 10).unwrap();
+        let f11 = xs.clone().into_iter().nth(offset + 11).unwrap();
+
+        let f = [
+            [[f0, f1], [f2, f3], [f4, f5]],
+            [[f6, f7], [f8, f9], [f10, f11]],
+        ];
+
+        println!("STACK_: {:#?}", f);
+
+        let g = inv_fp12(f);
+
+        [
+            g[0][0][0], g[0][0][1], g[0][1][0], g[0][1][1], g[0][2][0], g[0][2][1], 
+            g[1][0][0], g[1][0][1], g[1][1][0], g[1][1][1], g[1][2][0], g[1][2][1],
+        ]
+    }
+
+    fn ext_inv0(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 12)[0]
+    }
+
+    fn ext_inv1(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 11)[1]
+    }
+
+    fn ext_inv2(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 10)[2]
+    }
+
+    fn ext_inv3(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 9)[3]
+    }
+
+    fn ext_inv4(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 8)[4]
+    }
+
+    fn ext_inv5(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 7)[5]
+    }
+
+    fn ext_inv6(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 6)[6]
+    }
+
+    fn ext_inv7(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 5)[7]
+    }
+
+    fn ext_inv8(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 4)[8]
+    }
+
+    fn ext_inv9(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 3)[9]
+    }
+
+    fn ext_inv10(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 2)[10]
+    }
+
+    fn ext_inv11(&self, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 1)[11]
+    }
 }
 
 fn modexp(x: U256, e: U256, n: U256) -> U256 {
@@ -182,3 +327,377 @@ fn modexp(x: U256, e: U256, n: U256) -> U256 {
     }
     product
 }
+
+type Fp = U256;
+type Fp2 = [U256; 2];
+type Fp6 = [Fp2; 3];
+type Fp12 = [Fp6; 2];
+
+const ZERO: Fp = U256([0, 0, 0, 0]);
+
+const BN_BASE: U256 = U256([
+    4332616871279656263,
+    10917124144477883021,
+    13281191951274694749,
+    3486998266802970665,
+]);
+
+fn embed_fp2(x: Fp) -> Fp2 {
+    [x, ZERO]
+}
+
+fn embed_fp2_fp6(a: Fp2) -> Fp6 {
+    [a, embed_fp2(ZERO), embed_fp2(ZERO)]
+}
+
+fn embed_fp6(x: Fp) -> Fp6 {
+    embed_fp2_fp6(embed_fp2(x))
+}
+
+fn embed_fp12(x: Fp) -> Fp12 {
+    [embed_fp6(x), embed_fp6(ZERO)]
+}
+
+fn add_fp(x: Fp, y: Fp) -> Fp {
+    (x + y) % BN_BASE
+}
+
+fn add3_fp(x: Fp, y: Fp, z: Fp) -> Fp {
+    (x + y + z) % BN_BASE
+}
+
+fn mul_fp(x: Fp, y: Fp) -> Fp {
+    U256::try_from(x.full_mul(y) % BN_BASE).unwrap()
+}
+
+fn sub_fp(x: Fp, y: Fp) -> Fp {
+    (BN_BASE + x - y) % BN_BASE
+}
+
+fn neg_fp(x: Fp) -> Fp {
+    (BN_BASE - x) % BN_BASE
+}
+
+fn conj_fp2(a: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    [a, neg_fp(a_)]
+}
+
+fn add_fp2(a: Fp2, b: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    let [b, b_] = b;
+    [add_fp(a, b), add_fp(a_, b_)]
+}
+
+fn add3_fp2(a: Fp2, b: Fp2, c: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    let [b, b_] = b;
+    let [c, c_] = c;
+    [add3_fp(a, b, c), add3_fp(a_, b_, c_)]
+}
+
+fn sub_fp2(a: Fp2, b: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    let [b, b_] = b;
+    [sub_fp(a, b), sub_fp(a_, b_)]
+}
+
+fn neg_fp2(a: Fp2) -> Fp2 {
+    sub_fp2(embed_fp2(ZERO), a)
+}
+
+fn mul_fp2(a: Fp2, b: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    let [b, b_] = b;
+    [
+        sub_fp(mul_fp(a, b), mul_fp(a_, b_)),
+        add_fp(mul_fp(a, b_), mul_fp(a_, b)),
+    ]
+}
+
+fn i9(a: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    let nine = U256::from(9);
+    [sub_fp(mul_fp(nine, a), a_), add_fp(a, mul_fp(nine, a_))]
+}
+
+fn add_fp6(c: Fp6, d: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    let [d0, d1, d2] = d;
+
+    let e0 = add_fp2(c0, d0);
+    let e1 = add_fp2(c1, d1);
+    let e2 = add_fp2(c2, d2);
+    [e0, e1, e2]
+}
+
+fn sub_fp6(c: Fp6, d: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    let [d0, d1, d2] = d;
+
+    let e0 = sub_fp2(c0, d0);
+    let e1 = sub_fp2(c1, d1);
+    let e2 = sub_fp2(c2, d2);
+    [e0, e1, e2]
+}
+
+fn neg_fp6(a: Fp6) -> Fp6 {
+    sub_fp6(embed_fp6(ZERO), a)
+}
+
+fn mul_fp6(c: Fp6, d: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    let [d0, d1, d2] = d;
+
+    let c0d0 = mul_fp2(c0, d0);
+    let c0d1 = mul_fp2(c0, d1);
+    let c0d2 = mul_fp2(c0, d2);
+    let c1d0 = mul_fp2(c1, d0);
+    let c1d1 = mul_fp2(c1, d1);
+    let c1d2 = mul_fp2(c1, d2);
+    let c2d0 = mul_fp2(c2, d0);
+    let c2d1 = mul_fp2(c2, d1);
+    let c2d2 = mul_fp2(c2, d2);
+    let cd12 = add_fp2(c1d2, c2d1);
+
+    [
+        add_fp2(c0d0, i9(cd12)),
+        add3_fp2(c0d1, c1d0, i9(c2d2)),
+        add3_fp2(c0d2, c1d1, c2d0),
+    ]
+}
+
+fn sh(c: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    [i9(c2), c0, c1]
+}
+
+fn sparse_embed(x: [U256; 5]) -> Fp12 {
+    let [g0, g1, g1_, g2, g2_] = x;
+    [
+        [embed_fp2(g0), [g1, g1_], embed_fp2(ZERO)],
+        [embed_fp2(ZERO), [g2, g2_], embed_fp2(ZERO)],
+    ]
+}
+
+fn mul_fp12(f: Fp12, g: Fp12) -> Fp12 {
+    let [f0, f1] = f;
+    let [g0, g1] = g;
+
+    let h0 = mul_fp6(f0, g0);
+    let h1 = mul_fp6(f1, g1);
+    let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1));
+    [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
+}
+
+fn frob_t1(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
+                .unwrap(),
+            U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
+                .unwrap(),
+            U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
+                .unwrap(),
+            U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+fn frob_t2(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
+                .unwrap(),
+            U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
+                .unwrap(),
+            U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
+                .unwrap(),
+            U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+fn frob_z(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
+                .unwrap(),
+            U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
+                .unwrap(),
+            U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
+                .unwrap(),
+            U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
+                .unwrap(),
+        ],
+        6 => [
+            U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        7 => [
+            U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
+                .unwrap(),
+            U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
+                .unwrap(),
+        ],
+        8 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        9 => [
+            U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
+                .unwrap(),
+            U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
+                .unwrap(),
+        ],
+        10 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        11 => [
+            U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
+                .unwrap(),
+            U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    let _c0 = conj_fp2(c0);
+    let _c1 = conj_fp2(c1);
+    let _c2 = conj_fp2(c2);
+
+    let n = n % 6;
+    let frob_t1 = frob_t1(n);
+    let frob_t2 = frob_t2(n);
+
+    if n % 2 != 0 {
+        [_c0, mul_fp2(frob_t1, _c1), mul_fp2(frob_t2, _c2)]
+    } else {
+        [c0, mul_fp2(frob_t1, c1), mul_fp2(frob_t2, c2)]
+    }
+}
+
+fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
+    let [f0, f1] = f;
+    let scale = embed_fp2_fp6(frob_z(n));
+
+    [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
+}
+
+fn exp_fp(x: Fp, e: U256) -> Fp {
+    let mut current = x;
+    let mut product = U256::one();
+
+    for j in 0..256 {
+        if e.bit(j) {
+            product = U256::try_from(product.full_mul(current) % BN_BASE).unwrap();
+        }
+        current = U256::try_from(current.full_mul(current) % BN_BASE).unwrap();
+    }
+    product
+}
+
+fn inv_fp(x: Fp) -> Fp {
+    exp_fp(x, BN_BASE - 2)
+}
+
+fn inv_fp2(a: Fp2) -> Fp2 {
+    let [a0, a1] = a;
+    let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
+    [mul_fp(norm, a0), neg_fp(mul_fp(norm, a1))]
+}
+
+fn inv_fp6(c: Fp6) -> Fp6 {
+    let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
+    let e = mul_fp6(b, frob_fp6(5, c))[0];
+    let n = mul_fp2(e, conj_fp2(e))[0];
+    let i = inv_fp(n);
+    let d = mul_fp2(embed_fp2(i), e);
+    let [f0, f1, f2] = frob_fp6(1, b);
+    [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
+}
+
+fn inv_fp12(f: Fp12) -> Fp12 {
+    let a = mul_fp12(frob_fp12(1, f), frob_fp12(7, f))[0];
+    let b = mul_fp6(a, frob_fp6(2, a));
+    let c = mul_fp6(b, frob_fp6(4, a))[0];
+    let n = mul_fp2(c, conj_fp2(c))[0];
+    let i = inv_fp(n);
+    let d = mul_fp2(embed_fp2(i), c);
+    let [g0, g1, g2] = frob_fp6(1, b);
+    let e = [mul_fp2(d, g0), mul_fp2(d, g1), mul_fp2(d, g2)];
+    [mul_fp6(e, f[0]), neg_fp6(mul_fp6(e, f[1]))]
+}
diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs
index 08d68edc..697ebb80 100644
--- a/evm/src/witness/util.rs
+++ b/evm/src/witness/util.rs
@@ -39,6 +39,20 @@ pub(crate) fn stack_peek<F: Field>(state: &GenerationState<F>, i: usize) -> Opti
     )))
 }
 
+/// Peak at the entire stack.
+pub(crate) fn stack_peeks<F: Field>(state: &GenerationState<F>) -> Option<Vec<U256>> {
+    let n = state.registers.stack_len;
+    let mut stack: Vec<U256> = vec![];
+    for i in 0..n {
+        stack.extend(vec![state.memory.get(MemoryAddress::new(
+            state.registers.effective_context(),
+            Segment::Stack,
+            n - 1 - i,
+        ))])
+    }
+    Some(stack)
+}
+
 pub(crate) fn mem_read_with_log<F: Field>(
     channel: MemoryChannel,
     address: MemoryAddress,

From c24629715890dd7800a5e9cfa55c8896a58e9543 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@dmitrys-mbp.lan>
Date: Tue, 20 Dec 2022 00:47:57 -0800
Subject: [PATCH 060/201] inverse works

---
 .../bn254/field_arithmetic/field_macros.asm   | 41 -------------------
 .../curve/bn254/field_arithmetic/inverse.asm  | 31 ++++++++++----
 evm/src/cpu/kernel/tests/bn254_field.rs       | 27 +++++-------
 evm/src/generation/prover_input.rs            |  5 +--
 4 files changed, 34 insertions(+), 70 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
index 8500e053..87c8fdea 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
@@ -971,44 +971,3 @@
     %mstore_kernel_general
     // stack:                                                            
 %endmacro
-
-%macro assert_eq_fp12_unit
-    // stack:      ptr
-    DUP1                  %mload_kernel_code
-    // stack: x00, ptr
-    %assert_eq_const(1)
-    // stack:      ptr 
-    DUP1  %add_const(01)  %mload_kernel_code
-    // stack: x01, ptr
-    %assert_eq_const(0)
-    DUP1  %add_const(02)  %mload_kernel_code
-    // stack: x02, ptr
-    %assert_eq_const(0)
-    DUP1  %add_const(03)  %mload_kernel_code
-    // stack: x03, ptr
-    %assert_eq_const(0)
-    DUP1  %add_const(04)  %mload_kernel_code
-    // stack: x04, ptr
-    %assert_eq_const(0)
-    DUP1  %add_const(05)  %mload_kernel_code
-    // stack: x05, ptr
-    %assert_eq_const(0)
-    DUP1  %add_const(06)  %mload_kernel_code
-    // stack: x06, ptr
-    %assert_eq_const(0)
-    DUP1  %add_const(07)  %mload_kernel_code
-    // stack: x07, ptr
-    %assert_eq_const(0)
-    DUP1  %add_const(08)  %mload_kernel_code
-    // stack: x08, ptr
-    %assert_eq_const(0)
-    DUP1  %add_const(09)  %mload_kernel_code
-    // stack: x09, ptr
-    %assert_eq_const(0)
-    DUP1  %add_const(10)  %mload_kernel_code
-    // stack: x10, ptr
-    %assert_eq_const(0)
-    DUP1  %add_const(11)  %mload_kernel_code
-    // stack: x11, ptr
-    %assert_eq_const(0)
-%endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 35353c3a..cf7ee5ad 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -21,9 +21,15 @@
     // stack:           x^-1
 %endmacro
 
+global test_inverse_fp12:
+    // stack:                        ptr, f, ptr, inv, retdest
+    %store_fp12
+    // stack:                                ptr, inv, retdest
+    %jump(inverse_fp12)
+
 global inverse_fp12:
     // stack:                                ptr, inv, retdest
-    // DUP1  %load_fp12
+    DUP1  %load_fp12
     // stack:                             f, ptr, inv, retdest
     DUP14
     // stack:                        inv, f, ptr, inv, retdest
@@ -40,9 +46,6 @@ global inverse_fp12:
     PROVER_INPUT(ffe::bn254_base::ext_inv1)
     PROVER_INPUT(ffe::bn254_base::ext_inv0)
     // stack:                  f^-1, inv, f, ptr, inv, retdest
-
-    %jump(0xdeadbeef)
-
     DUP13
     // stack:             inv, f^-1, inv, f, ptr, inv, retdest
     %store_fp12
@@ -56,11 +59,21 @@ global inverse_fp12:
     %jump(mul_fp12)
 global check_inv:
     // stack:                           200, ptr, inv, retdest
-    %assert_eq_fp12_unit
+    %load_fp12
+    // stack:                         unit?, ptr, inv, retdest
+    %assert_eq_const(1)
+    %assert_eq_const(0)
+    %assert_eq_const(0)
+    %assert_eq_const(0)
+    %assert_eq_const(0)
+    %assert_eq_const(0)
+    %assert_eq_const(0)
+    %assert_eq_const(0)
+    %assert_eq_const(0)
+    %assert_eq_const(0)
+    %assert_eq_const(0)
+    %assert_eq_const(0)
     // stack:                                ptr, inv, retdest
     %pop2
     // stack:                                          retdest
-    %jump(0xdeadbeef)
-    // SWAP1  
-    // stack:                                     retdest, inv
-    // JUMP
+    JUMP
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index 5e7c11d7..0d3c95fd 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -661,27 +661,20 @@ fn test_mul_fp12() -> Result<()> {
 fn test_inv_fp12() -> Result<()> {
     let ptr = U256::from(100);
     let inv = U256::from(200);
+    let f: Vec<U256> = gen_fp12().into_iter().flatten().flatten().collect();
 
-    let f: Fp12 = gen_fp12();
-    let flat_f: Vec<U256> = f.into_iter().flatten().flatten().collect();
-    let mut stack: Vec<U256> = flat_f.clone();
-    stack.extend(vec![ptr, inv]);
-    stack.reverse();
+    let initial_offset = KERNEL.global_labels["test_inverse_fp12"];
 
-    let g = inv_fp12(f);
-    let one = mul_fp12(f, g);
-    println!("ONE? {:#?}", one);
+    let mut initial_stack = vec![ptr];
+    initial_stack.extend(f);
+    initial_stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
+    initial_stack.reverse();
 
-    let mut expected: Vec<U256> = g.into_iter().flatten().flatten().collect();
-    expected.extend(vec![inv]);
-    expected.extend(flat_f);
-    expected.extend(vec![ptr, inv]);
-    expected.reverse();
+    let output: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
+        .stack()
+        .to_vec();
 
-    let initial_offset = KERNEL.global_labels["inverse_fp12"];
-    let output: Vec<U256> = run_interpreter(initial_offset, stack)?.stack().to_vec();
-
-    assert_eq!(output, expected);
+    assert_eq!(output, vec![]);
 
     Ok(())
 }
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 5cdca6bc..20e40ce7 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -237,7 +237,6 @@ impl EvmField {
     }
 
     fn ext_inv(&self, xs: Vec<U256>, offset: usize) -> [U256; 12] {
-
         let f0 = xs.clone().into_iter().nth(offset).unwrap();
         let f1 = xs.clone().into_iter().nth(offset + 1).unwrap();
         let f2 = xs.clone().into_iter().nth(offset + 2).unwrap();
@@ -261,8 +260,8 @@ impl EvmField {
         let g = inv_fp12(f);
 
         [
-            g[0][0][0], g[0][0][1], g[0][1][0], g[0][1][1], g[0][2][0], g[0][2][1], 
-            g[1][0][0], g[1][0][1], g[1][1][0], g[1][1][1], g[1][2][0], g[1][2][1],
+            g[0][0][0], g[0][0][1], g[0][1][0], g[0][1][1], g[0][2][0], g[0][2][1], g[1][0][0],
+            g[1][0][1], g[1][1][0], g[1][1][1], g[1][2][0], g[1][2][1],
         ]
     }
 

From 95383db49d9fedb0f6dbb9aeb37f139ae4e671a5 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Tue, 20 Dec 2022 11:57:45 -0800
Subject: [PATCH 061/201] inverse edits

---
 evm/src/cpu/kernel/tests/bn254_field.rs | 53 ++++++++++++++-----------
 evm/src/generation/prover_input.rs      |  2 -
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index 0d3c95fd..e162969e 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -385,6 +385,7 @@ fn inv_fp6(c: Fp6) -> Fp6 {
 }
 
 fn inv_fp12(f: Fp12) -> Fp12 {
+    let [f0, f1] = f;
     let a = mul_fp12(frob_fp12(1, f), frob_fp12(7, f))[0];
     let b = mul_fp6(a, frob_fp6(2, a));
     let c = mul_fp6(b, frob_fp6(4, a))[0];
@@ -393,7 +394,7 @@ fn inv_fp12(f: Fp12) -> Fp12 {
     let d = mul_fp2(embed_fp2(i), c);
     let [g0, g1, g2] = frob_fp6(1, b);
     let e = [mul_fp2(d, g0), mul_fp2(d, g1), mul_fp2(d, g2)];
-    [mul_fp6(e, f[0]), neg_fp6(mul_fp6(e, f[1]))]
+    [mul_fp6(e, f0), neg_fp6(mul_fp6(e, f1))]
 }
 
 const EXPS4: [(bool, bool, bool); 65] = [
@@ -576,7 +577,7 @@ fn fast_exp(f: Fp12) -> Fp12 {
     }
     y0 = mul_fp12(y0, y0);
 
-    // TODO: y0 = inv_fp12(y0);
+    y0 = inv_fp12(y0);
 
     y4 = mul_fp12(y4, y2);
     y4 = mul_fp12(y4, y2);
@@ -657,28 +658,6 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-#[test]
-fn test_inv_fp12() -> Result<()> {
-    let ptr = U256::from(100);
-    let inv = U256::from(200);
-    let f: Vec<U256> = gen_fp12().into_iter().flatten().flatten().collect();
-
-    let initial_offset = KERNEL.global_labels["test_inverse_fp12"];
-
-    let mut initial_stack = vec![ptr];
-    initial_stack.extend(f);
-    initial_stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
-    initial_stack.reverse();
-
-    let output: Vec<U256> = run_interpreter(initial_offset, initial_stack)?
-        .stack()
-        .to_vec();
-
-    assert_eq!(output, vec![]);
-
-    Ok(())
-}
-
 fn make_frob_stack(f: Fp12) -> Vec<U256> {
     let ptr = U256::from(100);
     let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
@@ -727,6 +706,32 @@ fn test_frob_fp12() -> Result<()> {
     Ok(())
 }
 
+fn make_inv_stack(f: Fp12) -> Vec<U256> {
+    let ptr = U256::from(100);
+    let inv = U256::from(200);
+    let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
+
+    let mut input = vec![ptr];
+    input.extend(f);
+    input.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
+    input.reverse();
+    input
+}
+
+#[test]
+fn test_inv_fp12() -> Result<()> {
+    let f: Fp12 = gen_fp12();
+
+    let test_inv = KERNEL.global_labels["test_inverse_fp12"];
+    let stack = make_inv_stack(f);
+
+    let output: Vec<U256> = run_interpreter(test_inv, stack)?.stack().to_vec();
+
+    assert_eq!(output, vec![]);
+
+    Ok(())
+}
+
 fn make_power_stack(f: Fp12) -> Vec<U256> {
     let sqr = U256::from(100);
     let out = U256::from(300);
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 20e40ce7..d8b24432 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -255,8 +255,6 @@ impl EvmField {
             [[f6, f7], [f8, f9], [f10, f11]],
         ];
 
-        println!("STACK_: {:#?}", f);
-
         let g = inv_fp12(f);
 
         [

From 950771a6ea6adfcac51cca685894fe962ec12499 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Tue, 20 Dec 2022 12:21:27 -0800
Subject: [PATCH 062/201] clean up inverse

---
 .../curve/bn254/field_arithmetic/inverse.asm  | 51 ++++++++++---------
 evm/src/cpu/kernel/tests/bn254_field.rs       |  4 +-
 2 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index cf7ee5ad..6b067960 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -21,6 +21,7 @@
     // stack:           x^-1
 %endmacro
 
+
 global test_inverse_fp12:
     // stack:                        ptr, f, ptr, inv, retdest
     %store_fp12
@@ -28,11 +29,31 @@ global test_inverse_fp12:
     %jump(inverse_fp12)
 
 global inverse_fp12:
-    // stack:                                ptr, inv, retdest
+    // stack:                 ptr, inv, retdest
     DUP1  %load_fp12
-    // stack:                             f, ptr, inv, retdest
+    // stack:              f, ptr, inv, retdest
     DUP14
-    // stack:                        inv, f, ptr, inv, retdest
+    // stack:         inv, f, ptr, inv, retdest
+    %prover_inv_fp12
+    // stack:   f^-1, inv, f, ptr, inv, retdest
+    DUP13  %store_fp12
+    // stack:         inv, f, ptr, inv, retdest
+    POP  %pop4  %pop4  %pop4
+    // stack:                 ptr, inv, retdest
+    PUSH 100  PUSH check_inv
+    // stack: check_inv, 100, ptr, inv, retdest 
+    SWAP3  SWAP1  SWAP2
+    // stack: ptr, inv, 100, check_inv, retdest 
+    %jump(mul_fp12)
+global check_inv:
+    // stack:        retdest
+    PUSH 100  %load_fp12
+    // stack: unit?, retdest
+    %assert_eq_unit_fp12
+    // stack:        retdest
+    JUMP
+
+%macro prover_inv_fp12
     PROVER_INPUT(ffe::bn254_base::ext_inv11)
     PROVER_INPUT(ffe::bn254_base::ext_inv10)
     PROVER_INPUT(ffe::bn254_base::ext_inv9)
@@ -45,22 +66,9 @@ global inverse_fp12:
     PROVER_INPUT(ffe::bn254_base::ext_inv2)
     PROVER_INPUT(ffe::bn254_base::ext_inv1)
     PROVER_INPUT(ffe::bn254_base::ext_inv0)
-    // stack:                  f^-1, inv, f, ptr, inv, retdest
-    DUP13
-    // stack:             inv, f^-1, inv, f, ptr, inv, retdest
-    %store_fp12
-    // stack:                        inv, f, ptr, inv, retdest
-    POP %pop4 %pop4 %pop4
-    // stack:                                ptr, inv, retdest 
-    PUSH 200  PUSH check_inv 
-    // stack:                check_inv, 200, ptr, inv, retdest 
-    DUP2  DUP5  DUP5
-    // stack: ptr, inv, 200, check_inv, 200, ptr, inv, retdest 
-    %jump(mul_fp12)
-global check_inv:
-    // stack:                           200, ptr, inv, retdest
-    %load_fp12
-    // stack:                         unit?, ptr, inv, retdest
+%endmacro
+
+%macro assert_eq_unit_fp12
     %assert_eq_const(1)
     %assert_eq_const(0)
     %assert_eq_const(0)
@@ -73,7 +81,4 @@ global check_inv:
     %assert_eq_const(0)
     %assert_eq_const(0)
     %assert_eq_const(0)
-    // stack:                                ptr, inv, retdest
-    %pop2
-    // stack:                                          retdest
-    JUMP
+%endmacro
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index e162969e..2f30956b 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -707,8 +707,8 @@ fn test_frob_fp12() -> Result<()> {
 }
 
 fn make_inv_stack(f: Fp12) -> Vec<U256> {
-    let ptr = U256::from(100);
-    let inv = U256::from(200);
+    let ptr = U256::from(200);
+    let inv = U256::from(300);
     let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
 
     let mut input = vec![ptr];

From 32758829ba25b8ffc236b95a614be568aa6d6189 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Tue, 20 Dec 2022 12:29:48 -0800
Subject: [PATCH 063/201] refactor

---
 evm/src/cpu/kernel/aggregator.rs              |   2 +-
 .../curve/bn254/field_arithmetic/fp12_mul.asm |   5 -
 .../curve/bn254/field_arithmetic/power.asm    | 156 +++++++++---------
 .../{field_macros.asm => utils.asm}           |   6 +
 evm/src/cpu/kernel/tests/bn254_field.rs       |   8 +-
 5 files changed, 89 insertions(+), 88 deletions(-)
 rename evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/{field_macros.asm => utils.asm} (99%)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 29144404..d924eeb4 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -30,11 +30,11 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/field_macros.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/power.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/utils.asm"),
         include_str!("asm/curve/common.asm"),
         include_str!("asm/curve/secp256k1/curve_mul.asm"),
         include_str!("asm/curve/secp256k1/curve_add.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index e93336ee..5c2471d9 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -7,11 +7,6 @@ global test_mul_fp12:
     %store_fp12
     // stack:                         mul_dest, inA, inB, out, ret_stack, out
     JUMP
-global ret_stack:
-    // stack: out
-    %load_fp12
-    %jump(0xdeadbeef)
-
 
 ///////////////////////////////////////
 ///// GENERAL FP12 MULTIPLICATION /////
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index fc85cb44..6ab2eb3f 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -1,10 +1,10 @@
 global test_power:
-    // stack: sqr, f, sqr, out, ret_stack, out
+    // stack: ptr, f, ptr, out, ret_stack, out
     %store_fp12
-    // stack:         sqr, out, ret_stack, out
+    // stack:         ptr, out, ret_stack, out
     %jump(power)
 
-/// def power(square):
+/// def power(acc):
 ///     power_init()
 ///     power_loop_0()
 ///     power_loop_1()
@@ -22,17 +22,17 @@ global test_power:
 ///     return y2 * y1 * y0 
 
 global power:
-    // stack:                                                       sqr, out, retdest
+    // stack:                                                       ptr, out, retdest
     PUSH 1  DUP1  DUP1
-    // stack:                                              1, 1, 1, sqr, out, retdest
+    // stack:                                              1, 1, 1, ptr, out, retdest
     %mstore_kernel_general(200)  %mstore_kernel_general(212)  %mstore_kernel_general(224)
-    // stack:                                                       sqr, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:                                                       ptr, out, retdest  {200: y0, 212: y1, 224: y2}
     PUSH power_loop_2  PUSH power_loop_1  PUSH power_return    
-    // stack:             power_return, power_loop_1, power_loop_2, sqr, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:             power_return, power_loop_1, power_loop_2, ptr, out, retdest  {200: y0, 212: y1, 224: y2}
     SWAP3
-    // stack:             sqr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:             ptr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
     PUSH 65  PUSH 62  PUSH 65
-    // stack: 65, 62, 65, sqr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack: 65, 62, 65, ptr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
     %jump(power_loop_0)
 
 power_return:
@@ -84,158 +84,158 @@ power_return_5:
 ///     for i in range(1, len4):
 ///         abc = load(power_data_0)
 ///         if a:
-///             y1 *= square
+///             y1 *= acc
 ///         if b:
-///             y2 *= square
+///             y2 *= acc
 ///         if c:
-///             y0 *= square
-///         square = square_fp12(square)
-///     y1 *= square
+///             y0 *= acc
+///         acc = square_fp12(acc)
+///     y1 *= acc
 ///
 /// def power_loop_1():
 ///     for i in range(len4, len2):
 ///        ab = load(power_data_1)
 ///        if a:
-///            y2 *= square
+///            y2 *= acc
 ///        if b:
-///            y0 *= square
-///        square = square_fp12(square)
-///     y2 *= square
+///            y0 *= acc
+///        acc = square_fp12(acc)
+///     y2 *= acc
 ///
 /// def power_loop_2():
 ///     for i in range(len2, len0):
 ///         a = load(power_data_1)
 ///         if a:
-///             y0 *= square
-///         square = square_fp12(square)
-///     y0 *= square
+///             y0 *= acc
+///         acc = square_fp12(acc)
+///     y0 *= acc
 
 power_loop_0:
-    // stack:                                     i  , j, k, sqr, retdest
+    // stack:                                     i  , j, k, ptr, retdest
     DUP1  ISZERO
-    // stack:                             break?, i  , j, k, sqr, retdest
+    // stack:                             break?, i  , j, k, ptr, retdest
     %jumpi(power_loop_0_end)
-    // stack:                                     i  , j, k, sqr, retdest
+    // stack:                                     i  , j, k, ptr, retdest
     %sub_const(1)
-    // stack:                                     i-1, j, k, sqr, retdest
+    // stack:                                     i-1, j, k, ptr, retdest
     DUP1  %mload_kernel_code(power_data_0)
-    // stack:                                abc, i-1, j, k, sqr, retdest
+    // stack:                                abc, i-1, j, k, ptr, retdest
     DUP1  %lt_const(100)
-    // stack:                         skip?, abc, i-1, j, k, sqr, retdest
+    // stack:                         skip?, abc, i-1, j, k, ptr, retdest
     %jumpi(power_loop_0_b)
-    // stack:                                abc, i-1, j, k, sqr, retdest
+    // stack:                                abc, i-1, j, k, ptr, retdest
     %sub_const(100)
-    // stack:                                 bc, i-1, j, k, sqr, retdest
+    // stack:                                 bc, i-1, j, k, ptr, retdest
     PUSH power_loop_0_b  PUSH 212  DUP1  DUP8
-    // stack: sqr, 212, 212, power_loop_0_b,  bc, i-1, j, k, sqr, retdest
+    // stack: ptr, 212, 212, power_loop_0_b,  bc, i-1, j, k, ptr, retdest
     %jump(mul_fp12)
 power_loop_0_b:
-    // stack:                               bc, i, j, k, sqr, retdest
+    // stack:                               bc, i, j, k, ptr, retdest
     DUP1  %lt_const(10)
-    // stack:                        skip?, bc, i, j, k, sqr, retdest
+    // stack:                        skip?, bc, i, j, k, ptr, retdest
     %jumpi(power_loop_0_c)
-    // stack:                               bc, i, j, k, sqr, retdest
+    // stack:                               bc, i, j, k, ptr, retdest
     %sub_const(10)
-    // stack:                                c, i, j, k, sqr, retdest
+    // stack:                                c, i, j, k, ptr, retdest
     PUSH power_loop_0_c  PUSH 224  DUP1  DUP8
-    // stack: sqr, 224, 224, power_loop_0_c, c, i, j, k, sqr, retdest
+    // stack: ptr, 224, 224, power_loop_0_c, c, i, j, k, ptr, retdest
     %jump(mul_fp12)
 power_loop_0_c:
-    // stack:                              c, i, j, k, sqr, retdest
+    // stack:                              c, i, j, k, ptr, retdest
     DUP1  ISZERO
-    // stack:                       skip?, c, i, j, k, sqr, retdest
+    // stack:                       skip?, c, i, j, k, ptr, retdest
     %jumpi(power_loop_0_sq)
-    // stack:                              c, i, j, k, sqr, retdest
+    // stack:                              c, i, j, k, ptr, retdest
     POP
-    // stack:                                 i, j, k, sqr, retdest
+    // stack:                                 i, j, k, ptr, retdest
     PUSH power_loop_0_sq  PUSH 200  DUP1  DUP7
-    // stack: sqr, 200, 200, power_loop_0_sq, i, j, k, sqr, retdest
+    // stack: ptr, 200, 200, power_loop_0_sq, i, j, k, ptr, retdest
     %jump(mul_fp12)
 power_loop_0_sq:
-    // stack:                         i, j, k, sqr, retdest
+    // stack:                         i, j, k, ptr, retdest
     PUSH power_loop_0  DUP5  DUP1
-    // stack: sqr, sqr, power_loop_0, i, j, k, sqr, retdest
+    // stack: ptr, ptr, power_loop_0, i, j, k, ptr, retdest
     %jump(mul_fp12)
 power_loop_0_end:
-    // stack:                           0, j, k, sqr, retdest
+    // stack:                           0, j, k, ptr, retdest
     POP  
-    // stack:                              j, k, sqr, retdest
+    // stack:                              j, k, ptr, retdest
     PUSH power_loop_1  PUSH 212  DUP1  DUP6
-    // stack: sqr, 212, 212, power_loop_1, j, k, sqr, retdest
+    // stack: ptr, 212, 212, power_loop_1, j, k, ptr, retdest
     %jump(mul_fp12)
 
 power_loop_1:
-    // stack:                                   j  , k, sqr, retdest
+    // stack:                                   j  , k, ptr, retdest
     DUP1  ISZERO
-    // stack:                           break?, j  , k, sqr, retdest
+    // stack:                           break?, j  , k, ptr, retdest
     %jumpi(power_loop_1_end)
-    // stack:                                   j  , k, sqr, retdest
+    // stack:                                   j  , k, ptr, retdest
     %sub_const(1)
-    // stack:                                   j-1, k, sqr, retdest
+    // stack:                                   j-1, k, ptr, retdest
     DUP1  %mload_kernel_code(power_data_1)
-    // stack:                               ab, j-1, k, sqr, retdest
+    // stack:                               ab, j-1, k, ptr, retdest
     DUP1  %lt_const(10)
-    // stack:                        skip?, ab, j-1, k, sqr, retdest
+    // stack:                        skip?, ab, j-1, k, ptr, retdest
     %jumpi(power_loop_1_b)
-    // stack:                               ab, j-1, k, sqr, retdest
+    // stack:                               ab, j-1, k, ptr, retdest
     %sub_const(10)
-    // stack:                                b, j-1, k, sqr, retdest
+    // stack:                                b, j-1, k, ptr, retdest
     PUSH power_loop_1_b  PUSH 224  DUP1  DUP7
-    // stack: sqr, 224, 224, power_loop_1_b, b, j-1, k, sqr, retdest
+    // stack: ptr, 224, 224, power_loop_1_b, b, j-1, k, ptr, retdest
     %jump(mul_fp12)
 power_loop_1_b:
-    // stack:                              b, j, k, sqr, retdest
+    // stack:                              b, j, k, ptr, retdest
     DUP1  ISZERO
-    // stack:                       skip?, b, j, k, sqr, retdest
+    // stack:                       skip?, b, j, k, ptr, retdest
     %jumpi(power_loop_1_sq)
-    // stack:                              b, j, k, sqr, retdest
+    // stack:                              b, j, k, ptr, retdest
     POP
-    // stack:                                 j, k, sqr, retdest
+    // stack:                                 j, k, ptr, retdest
     PUSH power_loop_1_sq  PUSH 200  DUP1  DUP6
-    // stack: sqr, 200, 200, power_loop_1_sq, j, k, sqr, retdest
+    // stack: ptr, 200, 200, power_loop_1_sq, j, k, ptr, retdest
     %jump(mul_fp12)
 power_loop_1_sq:
-    // stack:                         j, k, sqr, retdest
+    // stack:                         j, k, ptr, retdest
     PUSH power_loop_1  DUP4  DUP1
-    // stack: sqr, sqr, power_loop_1, j, k, sqr, retdest
+    // stack: ptr, ptr, power_loop_1, j, k, ptr, retdest
     %jump(square_fp12)
 power_loop_1_end:
-    // stack:                           0, k, sqr, retdest
+    // stack:                           0, k, ptr, retdest
     POP  
-    // stack:                              k, sqr, retdest
+    // stack:                              k, ptr, retdest
     PUSH power_loop_2  PUSH 224  DUP1  DUP6
-    // stack: sqr, 224, 224, power_loop_2, k, sqr, retdest
+    // stack: ptr, 224, 224, power_loop_2, k, ptr, retdest
     %jump(mul_fp12)
 
 
 power_loop_2:
-    // stack:                                 k  , sqr, retdest
+    // stack:                                 k  , ptr, retdest
     DUP1  ISZERO
-    // stack:                         break?, k  , sqr, retdest
+    // stack:                         break?, k  , ptr, retdest
     %jumpi(power_loop_2_end)
-    // stack:                                 k  , sqr, retdest
+    // stack:                                 k  , ptr, retdest
     %sub_const(1)
-    // stack:                                 k-1, sqr, retdest
+    // stack:                                 k-1, ptr, retdest
     DUP1  %mload_kernel_code(power_data_2)
-    // stack:                              a, k-1, sqr, retdest
+    // stack:                              a, k-1, ptr, retdest
     DUP1  ISZERO
-    // stack:                       skip?, a, k-1, sqr, retdest
+    // stack:                       skip?, a, k-1, ptr, retdest
     %jumpi(power_loop_2_sq)
-    // stack:                              a, k-1, sqr, retdest
+    // stack:                              a, k-1, ptr, retdest
     POP
-    // stack:                                 k-1, sqr, retdest
+    // stack:                                 k-1, ptr, retdest
     PUSH power_loop_2_sq  PUSH 200  DUP1  DUP5
-    // stack: sqr, 200, 200, power_loop_2_sq, k-1, sqr, retdest
+    // stack: ptr, 200, 200, power_loop_2_sq, k-1, ptr, retdest
     %jump(mul_fp12)
 power_loop_2_sq:
-    // stack:                         k, sqr, retdest
+    // stack:                         k, ptr, retdest
     PUSH power_loop_2  DUP3  DUP1
-    // stack: sqr, sqr, power_loop_2, k, sqr, retdest
+    // stack: ptr, ptr, power_loop_2, k, ptr, retdest
     %jump(square_fp12)
 power_loop_2_end:
-    // stack:                           0, sqr, retdest
+    // stack:                           0, ptr, retdest
     POP  
-    // stack:                              sqr, retdest
+    // stack:                              ptr, retdest
     PUSH power_return  PUSH 200  DUP1  DUP4
-    // stack: sqr, 200, 200, power_return, sqr, retdest
+    // stack: ptr, 200, 200, power_return, ptr, retdest
     %jump(mul_fp12)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
similarity index 99%
rename from evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
index 87c8fdea..7a664a94 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
@@ -971,3 +971,9 @@
     %mstore_kernel_general
     // stack:                                                            
 %endmacro
+
+global ret_stack:
+    // stack: out
+    %load_fp12
+    %jump(0xdeadbeef)
+    
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index 2f30956b..1ee629b0 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -733,14 +733,14 @@ fn test_inv_fp12() -> Result<()> {
 }
 
 fn make_power_stack(f: Fp12) -> Vec<U256> {
-    let sqr = U256::from(100);
-    let out = U256::from(300);
+    let ptr = U256::from(300);
+    let out = U256::from(400);
     let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
     let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
 
-    let mut input = vec![sqr];
+    let mut input = vec![ptr];
     input.extend(f);
-    input.extend(vec![sqr, out, ret_stack, out]);
+    input.extend(vec![ptr, out, ret_stack, out]);
     input.reverse();
     input
 }

From 7cd0dbaed9f6f654301de68d8e8498b8636a3e3f Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Tue, 20 Dec 2022 12:47:09 -0800
Subject: [PATCH 064/201] setup pow

---
 .../bn254/curve_arithmetic/tate_pairing.asm   |  4 +--
 .../curve/bn254/field_arithmetic/inverse.asm  |  6 ++---
 .../curve/bn254/field_arithmetic/power.asm    |  4 +--
 evm/src/cpu/kernel/tests/bn254_field.rs       | 27 +++++++++++++++++--
 4 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 031dc93b..27185084 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -1,7 +1,7 @@
 /// def tate(P : [Fp; 2], Q: [Fp2; 2]) -> Fp12:
 ///     out = miller_loop(P, Q)
 ///
-///     inv = inverse_fp12(out)
+///     inv = inv_fp12(out)
 ///     out = frob_fp12_6(out)
 ///     out = mul_fp12(out, inv)
 ///
@@ -27,7 +27,7 @@ global post_mllr:
     // stack:            100, tate_inv, out, retdest
     DUP3 
     // stack:       out, 100, tate_inv, out, retdest
-    %jump(inverse_fp12)
+    %jump(inv_fp12)
 tate_inv:
     // stack:                           out, retdest  {100: inv}
     PUSH tate_mul1
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 6b067960..1fe61024 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -22,13 +22,13 @@
 %endmacro
 
 
-global test_inverse_fp12:
+global test_inv_fp12:
     // stack:                        ptr, f, ptr, inv, retdest
     %store_fp12
     // stack:                                ptr, inv, retdest
-    %jump(inverse_fp12)
+    %jump(inv_fp12)
 
-global inverse_fp12:
+global inv_fp12:
     // stack:                 ptr, inv, retdest
     DUP1  %load_fp12
     // stack:              f, ptr, inv, retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index 6ab2eb3f..52d8bf0e 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -1,4 +1,4 @@
-global test_power:
+global test_pow:
     // stack: ptr, f, ptr, out, ret_stack, out
     %store_fp12
     // stack:         ptr, out, ret_stack, out
@@ -39,7 +39,7 @@ power_return:
     // stack:                                out, retdest  {200: y0, 212: y1, 224: y2}
     PUSH power_return_1  PUSH 236  PUSH 200
     // stack:      200, 236, power_return_1, out, retdest  {200: y0, 212: y1, 224: y2}
-    %jump(inverse_fp12)
+    %jump(inv_fp12)
 power_return_1:
     // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
     PUSH power_return_2  PUSH 248  PUSH 224
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index 1ee629b0..3a1499b4 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -722,7 +722,7 @@ fn make_inv_stack(f: Fp12) -> Vec<U256> {
 fn test_inv_fp12() -> Result<()> {
     let f: Fp12 = gen_fp12();
 
-    let test_inv = KERNEL.global_labels["test_inverse_fp12"];
+    let test_inv = KERNEL.global_labels["test_inv_fp12"];
     let stack = make_inv_stack(f);
 
     let output: Vec<U256> = run_interpreter(test_inv, stack)?.stack().to_vec();
@@ -732,7 +732,7 @@ fn test_inv_fp12() -> Result<()> {
     Ok(())
 }
 
-fn make_power_stack(f: Fp12) -> Vec<U256> {
+fn make_pow_stack(f: Fp12) -> Vec<U256> {
     let ptr = U256::from(300);
     let out = U256::from(400);
     let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
@@ -744,3 +744,26 @@ fn make_power_stack(f: Fp12) -> Vec<U256> {
     input.reverse();
     input
 }
+
+fn make_pow_expected(f: Fp12) -> Vec<U256> {
+    fast_exp(f)
+        .into_iter()
+        .flatten()
+        .flatten()
+        .rev()
+        .collect()
+}
+
+fn test_pow_fp12() -> Result<()> {
+    let f: Fp12 = gen_fp12();
+
+    let test_pow = KERNEL.global_labels["test_pow"];
+    let stack = make_pow_stack(f);
+
+    let output: Vec<U256> = run_interpreter(test_pow, stack)?.stack().to_vec();
+    let expected: Vec<U256> = make_pow_expected(f);
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}

From 05e835267ab9728ba82bc24b93a53b7b5a956504 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Tue, 20 Dec 2022 12:47:36 -0800
Subject: [PATCH 065/201] test

---
 evm/src/cpu/kernel/tests/bn254_field.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index 3a1499b4..6d30fd89 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -754,6 +754,7 @@ fn make_pow_expected(f: Fp12) -> Vec<U256> {
         .collect()
 }
 
+#[test]
 fn test_pow_fp12() -> Result<()> {
     let f: Fp12 = gen_fp12();
 

From bc3adc1663635b3c5a9451223aebad9b3ac2f62d Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Tue, 20 Dec 2022 15:10:53 -0800
Subject: [PATCH 066/201] debug pow

---
 .../curve/bn254/field_arithmetic/fp12_mul.asm |   3 +-
 .../curve/bn254/field_arithmetic/inverse.asm  |   4 +-
 .../curve/bn254/field_arithmetic/power.asm    | 154 +++++++++---------
 3 files changed, 78 insertions(+), 83 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index 5c2471d9..e4f13f60 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -418,4 +418,5 @@ post_sq2:
     // stack:                                out, ff + sh(f'f'), inp, out
     %store_fp6
     // stack:                                                    inp, out
-    %pop2  JUMP
+    %pop2
+    JUMP
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 1fe61024..e8815fc3 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -23,9 +23,9 @@
 
 
 global test_inv_fp12:
-    // stack:                        ptr, f, ptr, inv, retdest
+    // stack: ptr, f, ptr, inv, retdest
     %store_fp12
-    // stack:                                ptr, inv, retdest
+    // stack:         ptr, inv, retdest
     %jump(inv_fp12)
 
 global inv_fp12:
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index 52d8bf0e..548108d5 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -22,17 +22,13 @@ global test_pow:
 ///     return y2 * y1 * y0 
 
 global power:
-    // stack:                                                       ptr, out, retdest
+    // stack:             ptr, out, retdest
     PUSH 1  DUP1  DUP1
-    // stack:                                              1, 1, 1, ptr, out, retdest
+    // stack:    1, 1, 1, ptr, out, retdest
     %mstore_kernel_general(200)  %mstore_kernel_general(212)  %mstore_kernel_general(224)
-    // stack:                                                       ptr, out, retdest  {200: y0, 212: y1, 224: y2}
-    PUSH power_loop_2  PUSH power_loop_1  PUSH power_return    
-    // stack:             power_return, power_loop_1, power_loop_2, ptr, out, retdest  {200: y0, 212: y1, 224: y2}
-    SWAP3
-    // stack:             ptr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:             ptr, out, retdest  {200: y0, 212: y1, 224: y2}
     PUSH 65  PUSH 62  PUSH 65
-    // stack: 65, 62, 65, ptr, power_loop_1, power_loop_2, power_return, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack: 65, 62, 65, ptr, out, retdest  {200: y0, 212: y1, 224: y2}
     %jump(power_loop_0)
 
 power_return:
@@ -45,14 +41,14 @@ power_return_1:
     PUSH power_return_2  PUSH 248  PUSH 224
     // stack:      224, 248, power_return_2, out, retdest  {200: y0, 212: y1, 224: y2}
     %jump(square_fp12)
-power_return_2:
+power_return_2: 
     // stack:                                out, retdest  {236: y0, 212: y1, 224: y2, 248: y2^2}
     PUSH power_return_3  PUSH 248  PUSH 224  PUSH 248
     // stack: 248, 236, 248, power_return_3, out, retdest  {236: y0, 212: y1, 224: y2, 248: y2^2}
     %jump(mul_fp12)
 power_return_3:
     // stack:                                out, retdest  {236: y0, 212: y1, 224: y2, 248: y0*y2^2}
-    PUSH power_return_4  PUSH 212  PUSH 248  PUSH 212
+    PUSH power_return_4  PUSH 212  PUSH 248 PUSH 212
     // stack: 212, 248, 212, power_return_4, out, retdest  {236: y0, 212: y1, 224: y2, 248: y0*y2^2}
     %jump(mul_fp12)
 power_return_4:
@@ -69,10 +65,8 @@ power_return_4:
     // stack:                           224, out, retdest  {236: y0, 212: y1, 224: y2}
     POP
     // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
-    PUSH power_return_5  SWAP1
-    // stack:                out, power_return_5, retdest  {236: y0, 212: y1, 224: y2}
-    PUSH 236  PUSH 212
-    // stack:      212, 236, out, power_return_5, retdest  {236: y0, 212: y1, 224: y2}
+    PUSH power_return_5  DUP2  PUSH 236  PUSH 212
+    // stack: 212, 236, out, power_return_5, out, retdest  {236: y0, 212: y1, 224: y2}
     %jump(mul_fp12)
 power_return_5:
     // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
@@ -111,131 +105,131 @@ power_return_5:
 ///     y0 *= acc
 
 power_loop_0:
-    // stack:                                     i  , j, k, ptr, retdest
+    // stack:                                     i  , j, k, ptr
     DUP1  ISZERO
-    // stack:                             break?, i  , j, k, ptr, retdest
+    // stack:                             break?, i  , j, k, ptr
     %jumpi(power_loop_0_end)
-    // stack:                                     i  , j, k, ptr, retdest
+    // stack:                                     i  , j, k, ptr
     %sub_const(1)
-    // stack:                                     i-1, j, k, ptr, retdest
+    // stack:                                     i-1, j, k, ptr
     DUP1  %mload_kernel_code(power_data_0)
-    // stack:                                abc, i-1, j, k, ptr, retdest
+    // stack:                                abc, i-1, j, k, ptr
     DUP1  %lt_const(100)
-    // stack:                         skip?, abc, i-1, j, k, ptr, retdest
+    // stack:                         skip?, abc, i-1, j, k, ptr
     %jumpi(power_loop_0_b)
-    // stack:                                abc, i-1, j, k, ptr, retdest
+    // stack:                                abc, i-1, j, k, ptr
     %sub_const(100)
-    // stack:                                 bc, i-1, j, k, ptr, retdest
+    // stack:                                 bc, i-1, j, k, ptr
     PUSH power_loop_0_b  PUSH 212  DUP1  DUP8
-    // stack: ptr, 212, 212, power_loop_0_b,  bc, i-1, j, k, ptr, retdest
+    // stack: ptr, 212, 212, power_loop_0_b,  bc, i-1, j, k, ptr
     %jump(mul_fp12)
 power_loop_0_b:
-    // stack:                               bc, i, j, k, ptr, retdest
+    // stack:                               bc, i, j, k, ptr
     DUP1  %lt_const(10)
-    // stack:                        skip?, bc, i, j, k, ptr, retdest
+    // stack:                        skip?, bc, i, j, k, ptr
     %jumpi(power_loop_0_c)
-    // stack:                               bc, i, j, k, ptr, retdest
+    // stack:                               bc, i, j, k, ptr
     %sub_const(10)
-    // stack:                                c, i, j, k, ptr, retdest
+    // stack:                                c, i, j, k, ptr
     PUSH power_loop_0_c  PUSH 224  DUP1  DUP8
-    // stack: ptr, 224, 224, power_loop_0_c, c, i, j, k, ptr, retdest
+    // stack: ptr, 224, 224, power_loop_0_c, c, i, j, k, ptr
     %jump(mul_fp12)
 power_loop_0_c:
-    // stack:                              c, i, j, k, ptr, retdest
-    DUP1  ISZERO
-    // stack:                       skip?, c, i, j, k, ptr, retdest
+    // stack:                              c, i, j, k, ptr
+    ISZERO
+    // stack:                          skip?, i, j, k, ptr
     %jumpi(power_loop_0_sq)
-    // stack:                              c, i, j, k, ptr, retdest
-    POP
-    // stack:                                 i, j, k, ptr, retdest
+    // stack:                                 i, j, k, ptr
     PUSH power_loop_0_sq  PUSH 200  DUP1  DUP7
-    // stack: ptr, 200, 200, power_loop_0_sq, i, j, k, ptr, retdest
+    // stack: ptr, 200, 200, power_loop_0_sq, i, j, k, ptr
     %jump(mul_fp12)
 power_loop_0_sq:
-    // stack:                         i, j, k, ptr, retdest
+    // stack:                         i, j, k, ptr
     PUSH power_loop_0  DUP5  DUP1
-    // stack: ptr, ptr, power_loop_0, i, j, k, ptr, retdest
-    %jump(mul_fp12)
+    // stack: ptr, ptr, power_loop_0, i, j, k, ptr
+    %jump(square_fp12)
 power_loop_0_end:
-    // stack:                           0, j, k, ptr, retdest
+    // stack:                           0, j, k, ptr
     POP  
-    // stack:                              j, k, ptr, retdest
+    // stack:                              j, k, ptr
     PUSH power_loop_1  PUSH 212  DUP1  DUP6
-    // stack: ptr, 212, 212, power_loop_1, j, k, ptr, retdest
+    // stack: ptr, 212, 212, power_loop_1, j, k, ptr
     %jump(mul_fp12)
 
 power_loop_1:
-    // stack:                                   j  , k, ptr, retdest
+    // stack:                                   j  , k, ptr
     DUP1  ISZERO
-    // stack:                           break?, j  , k, ptr, retdest
+    // stack:                           break?, j  , k, ptr
     %jumpi(power_loop_1_end)
-    // stack:                                   j  , k, ptr, retdest
+    // stack:                                   j  , k, ptr
     %sub_const(1)
-    // stack:                                   j-1, k, ptr, retdest
+    // stack:                                   j-1, k, ptr
     DUP1  %mload_kernel_code(power_data_1)
-    // stack:                               ab, j-1, k, ptr, retdest
+    // stack:                               ab, j-1, k, ptr
     DUP1  %lt_const(10)
-    // stack:                        skip?, ab, j-1, k, ptr, retdest
+    // stack:                        skip?, ab, j-1, k, ptr
     %jumpi(power_loop_1_b)
-    // stack:                               ab, j-1, k, ptr, retdest
+    // stack:                               ab, j-1, k, ptr
     %sub_const(10)
-    // stack:                                b, j-1, k, ptr, retdest
+    // stack:                                b, j-1, k, ptr
     PUSH power_loop_1_b  PUSH 224  DUP1  DUP7
-    // stack: ptr, 224, 224, power_loop_1_b, b, j-1, k, ptr, retdest
+    // stack: ptr, 224, 224, power_loop_1_b, b, j-1, k, ptr
     %jump(mul_fp12)
 power_loop_1_b:
-    // stack:                              b, j, k, ptr, retdest
-    DUP1  ISZERO
-    // stack:                       skip?, b, j, k, ptr, retdest
+    // stack:                              b, j, k, ptr
+    ISZERO
+    // stack:                          skip?, j, k, ptr
     %jumpi(power_loop_1_sq)
-    // stack:                              b, j, k, ptr, retdest
-    POP
-    // stack:                                 j, k, ptr, retdest
+    // stack:                                 j, k, ptr
     PUSH power_loop_1_sq  PUSH 200  DUP1  DUP6
-    // stack: ptr, 200, 200, power_loop_1_sq, j, k, ptr, retdest
+    // stack: ptr, 200, 200, power_loop_1_sq, j, k, ptr
     %jump(mul_fp12)
 power_loop_1_sq:
-    // stack:                         j, k, ptr, retdest
+    // stack:                         j, k, ptr
     PUSH power_loop_1  DUP4  DUP1
-    // stack: ptr, ptr, power_loop_1, j, k, ptr, retdest
+    // stack: ptr, ptr, power_loop_1, j, k, ptr
     %jump(square_fp12)
 power_loop_1_end:
-    // stack:                           0, k, ptr, retdest
+    // stack:                           0, k, ptr
     POP  
-    // stack:                              k, ptr, retdest
-    PUSH power_loop_2  PUSH 224  DUP1  DUP6
-    // stack: ptr, 224, 224, power_loop_2, k, ptr, retdest
+    // stack:                              k, ptr
+    PUSH power_loop_2  PUSH 224  DUP1  DUP5
+    // stack: ptr, 224, 224, power_loop_2, k, ptr
     %jump(mul_fp12)
 
 
 power_loop_2:
-    // stack:                                 k  , ptr, retdest
+    // stack:                                 k  , ptr
     DUP1  ISZERO
-    // stack:                         break?, k  , ptr, retdest
+    // stack:                         break?, k  , ptr
     %jumpi(power_loop_2_end)
-    // stack:                                 k  , ptr, retdest
+    // stack:                                 k  , ptr
     %sub_const(1)
-    // stack:                                 k-1, ptr, retdest
+    // stack:                                 k-1, ptr
     DUP1  %mload_kernel_code(power_data_2)
-    // stack:                              a, k-1, ptr, retdest
-    DUP1  ISZERO
-    // stack:                       skip?, a, k-1, ptr, retdest
+    // stack:                              a, k-1, ptr
+    ISZERO
+    // stack:                          skip?, k-1, ptr
     %jumpi(power_loop_2_sq)
-    // stack:                              a, k-1, ptr, retdest
-    POP
-    // stack:                                 k-1, ptr, retdest
+    // stack:                                 k-1, ptr
     PUSH power_loop_2_sq  PUSH 200  DUP1  DUP5
-    // stack: ptr, 200, 200, power_loop_2_sq, k-1, ptr, retdest
+    // stack: ptr, 200, 200, power_loop_2_sq, k-1, ptr
     %jump(mul_fp12)
 power_loop_2_sq:
-    // stack:                         k, ptr, retdest
+    // stack:                         k, ptr
     PUSH power_loop_2  DUP3  DUP1
-    // stack: ptr, ptr, power_loop_2, k, ptr, retdest
+    // stack: ptr, ptr, power_loop_2, k, ptr
     %jump(square_fp12)
 power_loop_2_end:
-    // stack:                           0, ptr, retdest
+    // stack:                      0, ptr
     POP  
-    // stack:                              ptr, retdest
-    PUSH power_return  PUSH 200  DUP1  DUP4
-    // stack: ptr, 200, 200, power_return, ptr, retdest
+    // stack:                         ptr
+    PUSH 200  PUSH power_return  SWAP2  DUP2 
+    // stack: 200, ptr, 200, power_return
     %jump(mul_fp12)
+
+
+%macro check(lbl)
+    PUSH $lbl
+    %jump(ret_stack)
+%endmacro
\ No newline at end of file

From 5aab8ac06aae6866f1cc64ee48e4ab799c2def84 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <typecat@Dmitrys-MacBook-Pro.local>
Date: Tue, 20 Dec 2022 15:37:41 -0800
Subject: [PATCH 067/201] first part works

---
 .../bn254/curve_arithmetic/constants.asm      |   6 +-
 .../curve/bn254/field_arithmetic/fp12_mul.asm |   3 +-
 .../curve/bn254/field_arithmetic/power.asm    | 273 +++++++++---------
 evm/src/cpu/kernel/tests/bn254_field.rs       |  16 +-
 4 files changed, 151 insertions(+), 147 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
index b0cea9e3..6c3e0bad 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@@ -7,7 +7,7 @@ global miller_data:
     BYTES 0x11, 0x17, 0x21, 0x23, 0x12, 0x34, 0x11, 0x32
     BYTES 0x32, 0x12, 0x13, 0x22, 0x15
 
-global power_data_0:
+global power_data_4:
     BYTES 111, 010, 011, 111, 110, 101, 001, 100, 001, 100
     BYTES 110, 110, 110, 011, 011, 101, 011, 101, 101, 111
     BYTES 000, 011, 011, 001, 011, 001, 101, 100, 100, 000
@@ -16,7 +16,7 @@ global power_data_0:
     BYTES 110, 110, 110, 010, 101, 110, 101, 010, 101, 001
     BYTES 000, 111, 111, 110, 111
 
-global power_data_1:
+global power_data_2:
     BYTES 11, 01, 11, 10, 11, 10, 01, 10, 00, 01
     BYTES 10, 11, 01, 11, 10, 01, 00, 00, 00, 01
     BYTES 10, 01, 01, 10, 00, 01, 11, 00, 01, 00
@@ -25,7 +25,7 @@ global power_data_1:
     BYTES 11, 01, 01, 10, 11, 10, 11, 10, 10, 00
     BYTES 11, 10
 
-global power_data_2:
+global power_data_0:
     BYTES 0, 1, 1, 0, 0, 1, 1, 1, 1, 0
     BYTES 0, 0, 1, 0, 0, 1, 1, 0, 1, 0
     BYTES 1, 1, 1, 1, 0, 0, 1, 1, 1, 0
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index e4f13f60..5c2471d9 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -418,5 +418,4 @@ post_sq2:
     // stack:                                out, ff + sh(f'f'), inp, out
     %store_fp6
     // stack:                                                    inp, out
-    %pop2
-    JUMP
+    %pop2  JUMP
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index 548108d5..53f18485 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -6,89 +6,90 @@ global test_pow:
 
 /// def power(acc):
 ///     power_init()
-///     power_loop_0()
-///     power_loop_1()
+///     power_loop_4()
 ///     power_loop_2()
+///     power_loop_0()
 ///     power_return()
 ///
 /// def power_init()
-///     y0, y1, y2 = 1, 1, 1
+///     y0, y4, y2 = 1, 1, 1
 ///
 /// def power_return()
 ///     y0  = y0^{-1}
-///     y1 *= y0 * (y2**2)
-///     y1  = frob_fp12_1(y1)
+///     y4 *= y0 * (y2**2)
+///     y4  = frob_fp12_1(y4)
 ///     y2  = frob_fp12_2_(y2)
-///     return y2 * y1 * y0 
+///     return y2 * y4 * y0 
 
 global power:
     // stack:             ptr, out, retdest
     PUSH 1  DUP1  DUP1
     // stack:    1, 1, 1, ptr, out, retdest
-    %mstore_kernel_general(200)  %mstore_kernel_general(212)  %mstore_kernel_general(224)
-    // stack:             ptr, out, retdest  {200: y0, 212: y1, 224: y2}
+    %mstore_kernel_general(200)  %mstore_kernel_general(224)  %mstore_kernel_general(212)
+    // stack:             ptr, out, retdest  {200: y0, 212: y2, 224: y4}
     PUSH 65  PUSH 62  PUSH 65
-    // stack: 65, 62, 65, ptr, out, retdest  {200: y0, 212: y1, 224: y2}
-    %jump(power_loop_0)
+    // stack: 65, 62, 65, ptr, out, retdest  {200: y0, 212: y2, 224: y4}
+    %jump(power_loop_4)
 
 power_return:
-    // stack:                                out, retdest  {200: y0, 212: y1, 224: y2}
+    %check(224)
+    // stack:                                out, retdest  {200: y0, 212: y2, 224: y4}
     PUSH power_return_1  PUSH 236  PUSH 200
-    // stack:      200, 236, power_return_1, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:      200, 236, power_return_1, out, retdest  {200: y0, 212: y2, 224: y4}
     %jump(inv_fp12)
 power_return_1:
-    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
-    PUSH power_return_2  PUSH 248  PUSH 224
-    // stack:      224, 248, power_return_2, out, retdest  {200: y0, 212: y1, 224: y2}
+    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
+    PUSH power_return_2  PUSH 248  PUSH 212
+    // stack:      212, 248, power_return_2, out, retdest  {236: y0, 212: y2, 224: y4}
     %jump(square_fp12)
 power_return_2: 
-    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2, 248: y2^2}
-    PUSH power_return_3  PUSH 248  PUSH 224  PUSH 248
-    // stack: 248, 236, 248, power_return_3, out, retdest  {236: y0, 212: y1, 224: y2, 248: y2^2}
+    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4, 248: y2^2}
+    PUSH power_return_3  PUSH 248  PUSH 212  PUSH 248
+    // stack: 248, 236, 248, power_return_3, out, retdest  {236: y0, 212: y2, 224: y4, 248: y2^2}
     %jump(mul_fp12)
 power_return_3:
-    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2, 248: y0*y2^2}
-    PUSH power_return_4  PUSH 212  PUSH 248 PUSH 212
-    // stack: 212, 248, 212, power_return_4, out, retdest  {236: y0, 212: y1, 224: y2, 248: y0*y2^2}
+    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4, 248: y0*y2^2}
+    PUSH power_return_4  PUSH 224  PUSH 248 PUSH 224
+    // stack: 224, 248, 224, power_return_4, out, retdest  {236: y0, 212: y2, 224: y4, 248: y0*y2^2}
     %jump(mul_fp12)
 power_return_4:
-    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
-    PUSH 212
-    // stack:                           212, out, retdest  {236: y0, 212: y1, 224: y2}
+    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
+    PUSH 224
+    // stack:                           224, out, retdest  {236: y0, 212: y2, 224: y4}
     %frob_fp12_1
-    // stack:                           212, out, retdest  {236: y0, 212: y1, 224: y2}
+    // stack:                           224, out, retdest  {236: y0, 212: y2, 224: y4}
     POP
-    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
-    PUSH 224  DUP1
-    // stack:                      224, 224, out, retdest  {236: y0, 212: y1, 224: y2}
+    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
+    PUSH 212  DUP1
+    // stack:                      212, 212, out, retdest  {236: y0, 212: y2, 224: y4}
     %frob_fp12_2_
-    // stack:                           224, out, retdest  {236: y0, 212: y1, 224: y2}
+    // stack:                           212, out, retdest  {236: y0, 212: y2, 224: y4}
     POP
-    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
-    PUSH power_return_5  DUP2  PUSH 236  PUSH 212
-    // stack: 212, 236, out, power_return_5, out, retdest  {236: y0, 212: y1, 224: y2}
+    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
+    PUSH power_return_5  DUP2  PUSH 236  PUSH 224
+    // stack: 224, 236, out, power_return_5, out, retdest  {236: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_return_5:
-    // stack:                                out, retdest  {236: y0, 212: y1, 224: y2}
-    PUSH 224  DUP2
-    // stack:                      out, 224, out, retdest  {236: y0, 212: y1, 224: y2}
+    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
+    PUSH 212  DUP2
+    // stack:                      out, 212, out, retdest  {236: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 
-/// def power_loop_0():
-///     for i in range(1, len4):
-///         abc = load(power_data_0)
+/// def power_loop_4():
+///     for i in range(65):
+///         abc = load(i, power_data_4)
 ///         if a:
-///             y1 *= acc
+///             y4 *= acc
 ///         if b:
 ///             y2 *= acc
 ///         if c:
 ///             y0 *= acc
 ///         acc = square_fp12(acc)
-///     y1 *= acc
+///     y4 *= acc
 ///
-/// def power_loop_1():
-///     for i in range(len4, len2):
-///        ab = load(power_data_1)
+/// def power_loop_2():
+///     for i in range(62):
+///        ab = load(i, power_data_2)
 ///        if a:
 ///            y2 *= acc
 ///        if b:
@@ -96,131 +97,131 @@ power_return_5:
 ///        acc = square_fp12(acc)
 ///     y2 *= acc
 ///
-/// def power_loop_2():
-///     for i in range(len2, len0):
-///         a = load(power_data_1)
+/// def power_loop_0():
+///     for i in range(65):
+///         a = load(i, power_data_0)
 ///         if a:
 ///             y0 *= acc
 ///         acc = square_fp12(acc)
 ///     y0 *= acc
 
-power_loop_0:
-    // stack:                                     i  , j, k, ptr
+power_loop_4:
+    // stack:                                     i  , j, k, ptr  {200: y0, 212: y2, 224: y4}
     DUP1  ISZERO
-    // stack:                             break?, i  , j, k, ptr
-    %jumpi(power_loop_0_end)
-    // stack:                                     i  , j, k, ptr
+    // stack:                             break?, i  , j, k, ptr  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_4_end)
+    // stack:                                     i  , j, k, ptr  {200: y0, 212: y2, 224: y4}
     %sub_const(1)
-    // stack:                                     i-1, j, k, ptr
-    DUP1  %mload_kernel_code(power_data_0)
-    // stack:                                abc, i-1, j, k, ptr
+    // stack:                                     i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    DUP1  %mload_kernel_code(power_data_4)
+    // stack:                                abc, i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
     DUP1  %lt_const(100)
-    // stack:                         skip?, abc, i-1, j, k, ptr
-    %jumpi(power_loop_0_b)
-    // stack:                                abc, i-1, j, k, ptr
+    // stack:                         skip?, abc, i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_4_b)
+    // stack:                                abc, i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
     %sub_const(100)
-    // stack:                                 bc, i-1, j, k, ptr
-    PUSH power_loop_0_b  PUSH 212  DUP1  DUP8
-    // stack: ptr, 212, 212, power_loop_0_b,  bc, i-1, j, k, ptr
+    // stack:                                 bc, i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_4_b  PUSH 224  DUP1  DUP8
+    // stack: ptr, 224, 224, power_loop_4_b,  bc, i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
-power_loop_0_b:
-    // stack:                               bc, i, j, k, ptr
+power_loop_4_b:
+    // stack:                               bc, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
     DUP1  %lt_const(10)
-    // stack:                        skip?, bc, i, j, k, ptr
-    %jumpi(power_loop_0_c)
-    // stack:                               bc, i, j, k, ptr
+    // stack:                        skip?, bc, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_4_c)
+    // stack:                               bc, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
     %sub_const(10)
-    // stack:                                c, i, j, k, ptr
-    PUSH power_loop_0_c  PUSH 224  DUP1  DUP8
-    // stack: ptr, 224, 224, power_loop_0_c, c, i, j, k, ptr
+    // stack:                                c, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_4_c  PUSH 212  DUP1  DUP8
+    // stack: ptr, 212, 212, power_loop_4_c, c, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
-power_loop_0_c:
-    // stack:                              c, i, j, k, ptr
+power_loop_4_c:
+    // stack:                              c, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
     ISZERO
-    // stack:                          skip?, i, j, k, ptr
-    %jumpi(power_loop_0_sq)
-    // stack:                                 i, j, k, ptr
-    PUSH power_loop_0_sq  PUSH 200  DUP1  DUP7
-    // stack: ptr, 200, 200, power_loop_0_sq, i, j, k, ptr
+    // stack:                          skip?, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_4_sq)
+    // stack:                                 i, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_4_sq  PUSH 200  DUP1  DUP7
+    // stack: ptr, 200, 200, power_loop_4_sq, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
-power_loop_0_sq:
-    // stack:                         i, j, k, ptr
-    PUSH power_loop_0  DUP5  DUP1
-    // stack: ptr, ptr, power_loop_0, i, j, k, ptr
+power_loop_4_sq:
+    // stack:                         i, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_4  DUP5  DUP1
+    // stack: ptr, ptr, power_loop_4, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
     %jump(square_fp12)
-power_loop_0_end:
-    // stack:                           0, j, k, ptr
+power_loop_4_end:
+    // stack:                           0, j, k, ptr  {200: y0, 212: y2, 224: y4}
     POP  
-    // stack:                              j, k, ptr
-    PUSH power_loop_1  PUSH 212  DUP1  DUP6
-    // stack: ptr, 212, 212, power_loop_1, j, k, ptr
+    // stack:                              j, k, ptr  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_2  PUSH 224  DUP1  DUP6
+    // stack: ptr, 224, 224, power_loop_2, j, k, ptr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 
-power_loop_1:
-    // stack:                                   j  , k, ptr
-    DUP1  ISZERO
-    // stack:                           break?, j  , k, ptr
-    %jumpi(power_loop_1_end)
-    // stack:                                   j  , k, ptr
-    %sub_const(1)
-    // stack:                                   j-1, k, ptr
-    DUP1  %mload_kernel_code(power_data_1)
-    // stack:                               ab, j-1, k, ptr
-    DUP1  %lt_const(10)
-    // stack:                        skip?, ab, j-1, k, ptr
-    %jumpi(power_loop_1_b)
-    // stack:                               ab, j-1, k, ptr
-    %sub_const(10)
-    // stack:                                b, j-1, k, ptr
-    PUSH power_loop_1_b  PUSH 224  DUP1  DUP7
-    // stack: ptr, 224, 224, power_loop_1_b, b, j-1, k, ptr
-    %jump(mul_fp12)
-power_loop_1_b:
-    // stack:                              b, j, k, ptr
-    ISZERO
-    // stack:                          skip?, j, k, ptr
-    %jumpi(power_loop_1_sq)
-    // stack:                                 j, k, ptr
-    PUSH power_loop_1_sq  PUSH 200  DUP1  DUP6
-    // stack: ptr, 200, 200, power_loop_1_sq, j, k, ptr
-    %jump(mul_fp12)
-power_loop_1_sq:
-    // stack:                         j, k, ptr
-    PUSH power_loop_1  DUP4  DUP1
-    // stack: ptr, ptr, power_loop_1, j, k, ptr
-    %jump(square_fp12)
-power_loop_1_end:
-    // stack:                           0, k, ptr
-    POP  
-    // stack:                              k, ptr
-    PUSH power_loop_2  PUSH 224  DUP1  DUP5
-    // stack: ptr, 224, 224, power_loop_2, k, ptr
-    %jump(mul_fp12)
-
-
 power_loop_2:
+    // stack:                                   j  , k, ptr  {200: y0, 212: y2, 224: y4}
+    DUP1  ISZERO
+    // stack:                           break?, j  , k, ptr  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_2_end)
+    // stack:                                   j  , k, ptr  {200: y0, 212: y2, 224: y4}
+    %sub_const(1)
+    // stack:                                   j-1, k, ptr  {200: y0, 212: y2, 224: y4}
+    DUP1  %mload_kernel_code(power_data_2)
+    // stack:                               ab, j-1, k, ptr  {200: y0, 212: y2, 224: y4}
+    DUP1  %lt_const(10)
+    // stack:                        skip?, ab, j-1, k, ptr  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_2_b)
+    // stack:                               ab, j-1, k, ptr  {200: y0, 212: y2, 224: y4}
+    %sub_const(10)
+    // stack:                                b, j-1, k, ptr  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_2_b  PUSH 212  DUP1  DUP7
+    // stack: ptr, 212, 212, power_loop_2_b, b, j-1, k, ptr  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+power_loop_2_b:
+    // stack:                              b, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    ISZERO
+    // stack:                          skip?, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_2_sq)
+    // stack:                                 j, k, ptr  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_2_sq  PUSH 200  DUP1  DUP6
+    // stack: ptr, 200, 200, power_loop_2_sq, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+power_loop_2_sq:
+    // stack:                         j, k, ptr  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_2  DUP4  DUP1
+    // stack: ptr, ptr, power_loop_2, j, k, ptr  {200: y0, 212: y2, 224: y4}
+    %jump(square_fp12)
+power_loop_2_end:
+    // stack:                           0, k, ptr  {200: y0, 212: y2, 224: y4}
+    POP  
+    // stack:                              k, ptr  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_0  PUSH 212  DUP1  DUP5
+    // stack: ptr, 212, 212, power_loop_0, k, ptr  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+
+
+power_loop_0:
     // stack:                                 k  , ptr
     DUP1  ISZERO
     // stack:                         break?, k  , ptr
-    %jumpi(power_loop_2_end)
+    %jumpi(power_loop_0_end)
     // stack:                                 k  , ptr
     %sub_const(1)
     // stack:                                 k-1, ptr
-    DUP1  %mload_kernel_code(power_data_2)
+    DUP1  %mload_kernel_code(power_data_0)
     // stack:                              a, k-1, ptr
     ISZERO
     // stack:                          skip?, k-1, ptr
-    %jumpi(power_loop_2_sq)
+    %jumpi(power_loop_0_sq)
     // stack:                                 k-1, ptr
-    PUSH power_loop_2_sq  PUSH 200  DUP1  DUP5
-    // stack: ptr, 200, 200, power_loop_2_sq, k-1, ptr
+    PUSH power_loop_0_sq  PUSH 200  DUP1  DUP5
+    // stack: ptr, 200, 200, power_loop_0_sq, k-1, ptr
     %jump(mul_fp12)
-power_loop_2_sq:
+power_loop_0_sq:
     // stack:                         k, ptr
-    PUSH power_loop_2  DUP3  DUP1
-    // stack: ptr, ptr, power_loop_2, k, ptr
+    PUSH power_loop_0  DUP3  DUP1
+    // stack: ptr, ptr, power_loop_0, k, ptr
     %jump(square_fp12)
-power_loop_2_end:
+power_loop_0_end:
     // stack:                      0, ptr
     POP  
     // stack:                         ptr
@@ -232,4 +233,4 @@ power_loop_2_end:
 %macro check(lbl)
     PUSH $lbl
     %jump(ret_stack)
-%endmacro
\ No newline at end of file
+%endmacro
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index 6d30fd89..c68f8bc0 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -540,9 +540,9 @@ const EXPS0: [bool; 65] = [
 
 fn fast_exp(f: Fp12) -> Fp12 {
     let mut sq: Fp12 = f;
-    let mut y0: Fp12 = embed_fp12(U256::from(1));
-    let mut y2: Fp12 = embed_fp12(U256::from(1));
-    let mut y4: Fp12 = embed_fp12(U256::from(1));
+    let mut y0: Fp12 = embed_fp12(U256::one());
+    let mut y2: Fp12 = embed_fp12(U256::one());
+    let mut y4: Fp12 = embed_fp12(U256::one());
 
     for (a, b, c) in EXPS4 {
         if a {
@@ -556,7 +556,7 @@ fn fast_exp(f: Fp12) -> Fp12 {
         }
         sq = mul_fp12(sq, sq);
     }
-    y4 = mul_fp12(y4, y4);
+    y4 = mul_fp12(y4, sq);
 
     for (a, b) in EXPS2 {
         if a {
@@ -567,7 +567,7 @@ fn fast_exp(f: Fp12) -> Fp12 {
         }
         sq = mul_fp12(sq, sq);
     }
-    y2 = mul_fp12(y2, y2);
+    y2 = mul_fp12(y2, sq);
 
     for a in EXPS0 {
         if a {
@@ -575,8 +575,12 @@ fn fast_exp(f: Fp12) -> Fp12 {
         }
         sq = mul_fp12(sq, sq);
     }
-    y0 = mul_fp12(y0, y0);
+    y0 = mul_fp12(y0, sq);
 
+    println!("y0: {:#?}", y0);
+    println!("y2: {:#?}", y2);
+    println!("y4: {:#?}", y4);
+    
     y0 = inv_fp12(y0);
 
     y4 = mul_fp12(y4, y2);

From e9e5528cc88eb373779a64b9d7a88a3616134fe0 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 20 Dec 2022 15:56:07 -0800
Subject: [PATCH 068/201] space

---
 evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm | 1 +
 1 file changed, 1 insertion(+)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index 53f18485..998af552 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -234,3 +234,4 @@ power_loop_0_end:
     PUSH $lbl
     %jump(ret_stack)
 %endmacro
+

From e88e28a19ced20300f984daf42dc568feadc9308 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 20 Dec 2022 16:33:28 -0800
Subject: [PATCH 069/201] POWER WORKS

---
 .../curve/bn254/field_arithmetic/power.asm    | 21 ++++++++-----------
 evm/src/cpu/kernel/tests/bn254_field.rs       |  9 ++++----
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index 998af552..e83732a5 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -32,25 +32,24 @@ global power:
     %jump(power_loop_4)
 
 power_return:
-    %check(224)
     // stack:                                out, retdest  {200: y0, 212: y2, 224: y4}
     PUSH power_return_1  PUSH 236  PUSH 200
     // stack:      200, 236, power_return_1, out, retdest  {200: y0, 212: y2, 224: y4}
     %jump(inv_fp12)
 power_return_1:
     // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
-    PUSH power_return_2  PUSH 248  PUSH 212
-    // stack:      212, 248, power_return_2, out, retdest  {236: y0, 212: y2, 224: y4}
-    %jump(square_fp12)
+    PUSH power_return_2  PUSH 224  DUP1  PUSH 212
+    // stack: 212, 224, 224, power_return_2, out, retdest  {236: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
 power_return_2: 
-    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4, 248: y2^2}
-    PUSH power_return_3  PUSH 248  PUSH 212  PUSH 248
-    // stack: 248, 236, 248, power_return_3, out, retdest  {236: y0, 212: y2, 224: y4, 248: y2^2}
+    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
+    PUSH power_return_3  PUSH 224  DUP1  PUSH 212
+    // stack: 212, 224, 224, power_return_3, out, retdest  {236: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_return_3:
-    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4, 248: y0*y2^2}
-    PUSH power_return_4  PUSH 224  PUSH 248 PUSH 224
-    // stack: 224, 248, 224, power_return_4, out, retdest  {236: y0, 212: y2, 224: y4, 248: y0*y2^2}
+    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
+    PUSH power_return_4  PUSH 224  DUP1  PUSH 236
+    // stack: 236, 224, 224, power_return_4, out, retdest  {236: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_return_4:
     // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
@@ -229,9 +228,7 @@ power_loop_0_end:
     // stack: 200, ptr, 200, power_return
     %jump(mul_fp12)
 
-
 %macro check(lbl)
     PUSH $lbl
     %jump(ret_stack)
 %endmacro
-
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254_field.rs
index c68f8bc0..e1817fa0 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254_field.rs
@@ -576,17 +576,18 @@ fn fast_exp(f: Fp12) -> Fp12 {
         sq = mul_fp12(sq, sq);
     }
     y0 = mul_fp12(y0, sq);
-
-    println!("y0: {:#?}", y0);
-    println!("y2: {:#?}", y2);
-    println!("y4: {:#?}", y4);
     
     y0 = inv_fp12(y0);
 
+    // println!("y0: {:#?}", y0);
+    // println!("y4: {:#?}", y4);
+
     y4 = mul_fp12(y4, y2);
     y4 = mul_fp12(y4, y2);
     y4 = mul_fp12(y4, y0);
 
+    println!("y4: {:#?}", y4);
+
     y4 = frob_fp12(1, y4);
     y2 = frob_fp12(2, y2);
 

From 6a93a6bee85e5834d0f221ffde8ef1e52a029a53 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 20 Dec 2022 16:57:54 -0800
Subject: [PATCH 070/201] rename

---
 .../asm/curve/bn254/curve_arithmetic/miller_loop.asm       | 6 ++++++
 evm/src/cpu/kernel/tests/{bn254_field.rs => bn254.rs}      | 7 -------
 2 files changed, 6 insertions(+), 7 deletions(-)
 rename evm/src/cpu/kernel/tests/{bn254_field.rs => bn254.rs} (99%)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index f86f3f88..8340a959 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -34,6 +34,12 @@
 /// (4) encoding each pair (n,m) as 0xnm:
 ///     miller_data = [(0x10)n + m for (n,m) in EXP]
 
+global test_miller:
+    // stack: ptr, P, Q, ptr, out, retdest
+    %store_fp6
+    // stack:            ptr, out, retdest
+    %jump(miller_init)
+
 global miller_init:
     // stack:         ptr, out, retdest
     PUSH 1
diff --git a/evm/src/cpu/kernel/tests/bn254_field.rs b/evm/src/cpu/kernel/tests/bn254.rs
similarity index 99%
rename from evm/src/cpu/kernel/tests/bn254_field.rs
rename to evm/src/cpu/kernel/tests/bn254.rs
index e1817fa0..c6e10299 100644
--- a/evm/src/cpu/kernel/tests/bn254_field.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -579,15 +579,10 @@ fn fast_exp(f: Fp12) -> Fp12 {
     
     y0 = inv_fp12(y0);
 
-    // println!("y0: {:#?}", y0);
-    // println!("y4: {:#?}", y4);
-
     y4 = mul_fp12(y4, y2);
     y4 = mul_fp12(y4, y2);
     y4 = mul_fp12(y4, y0);
 
-    println!("y4: {:#?}", y4);
-
     y4 = frob_fp12(1, y4);
     y2 = frob_fp12(2, y2);
 
@@ -602,8 +597,6 @@ fn make_mul_stack(
     g: Fp12,
     mul_label: &str,
 ) -> Vec<U256> {
-    // stack: in0, f, f', in1, g, g', mul_dest, in0, in1, out, ret_stack, out
-
     let in0 = U256::from(in0);
     let in1 = U256::from(in1);
     let out = U256::from(out);

From a99b7d51b11d759150fbca7b3014d8d538e6af0e Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 20 Dec 2022 17:23:05 -0800
Subject: [PATCH 071/201] setup miller

---
 .../bn254/curve_arithmetic/miller_loop.asm    |   7 +-
 evm/src/cpu/kernel/tests/bn254.rs             | 113 ++++++++++++++++--
 evm/src/cpu/kernel/tests/mod.rs               |   2 +-
 3 files changed, 112 insertions(+), 10 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 8340a959..ec957ffd 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -40,6 +40,9 @@ global test_miller:
     // stack:            ptr, out, retdest
     %jump(miller_init)
 
+global return_point:
+
+
 global miller_init:
     // stack:         ptr, out, retdest
     PUSH 1
@@ -74,9 +77,9 @@ miller_final:
     %jump(miller_zero_final)
 miller_end:
     // stack: times, O, P, Q, out, retdest
-    %pop3  %pop3  %pop3
+    POP  %pop2  %pop2  %pop4
     // stack:                 out, retdest
-    SWAP1  %jump(post_mllr)
+    SWAP1  JUMP 
 
 
 miller_one:
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index c6e10299..4feecb04 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -576,7 +576,7 @@ fn fast_exp(f: Fp12) -> Fp12 {
         sq = mul_fp12(sq, sq);
     }
     y0 = mul_fp12(y0, sq);
-    
+
     y0 = inv_fp12(y0);
 
     y4 = mul_fp12(y4, y2);
@@ -744,12 +744,7 @@ fn make_pow_stack(f: Fp12) -> Vec<U256> {
 }
 
 fn make_pow_expected(f: Fp12) -> Vec<U256> {
-    fast_exp(f)
-        .into_iter()
-        .flatten()
-        .flatten()
-        .rev()
-        .collect()
+    fast_exp(f).into_iter().flatten().flatten().rev().collect()
 }
 
 #[test]
@@ -766,3 +761,107 @@ fn test_pow_fp12() -> Result<()> {
 
     Ok(())
 }
+
+fn make_miller_stack(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
+    let ptr = U256::from(300);
+    let out = U256::from(400);
+
+    let p: Vec<U256> = p.into_iter().collect();
+    let q: Vec<U256> = q.into_iter().flatten().collect();
+
+    let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
+
+    let mut input = vec![ptr];
+    input.extend(p);
+    input.extend(q);
+    input.extend(vec![ptr, out, ret_stack]);
+    input.reverse();
+    input
+}
+
+#[test]
+fn test_miller() -> Result<()> {
+    let p = [U256::from(1), U256::from(2)];
+    let q = [
+        [
+            U256::from_str(
+                "10857046999023057135944570762232829481370756359578518086990519993285655852781",
+            )
+            .unwrap(),
+            U256::from_str(
+                "11559732032986387107991004021392285783925812861821192530917403151452391805634",
+            )
+            .unwrap(),
+        ],
+        [
+            U256::from_str(
+                "8495653923123431417604973247489272438418190587263600148770280649306958101930",
+            )
+            .unwrap(),
+            U256::from_str(
+                "4082367875863433681332203403145435568316851327593401208105741076214120093531",
+            )
+            .unwrap(),
+        ],
+    ];
+
+    let test_mill = KERNEL.global_labels["test_miller"];
+    let stack = make_miller_stack(p, q);
+
+    let output: Vec<U256> = run_interpreter(test_mill, stack)?.stack().to_vec();
+    let mut expected: Vec<U256> = vec![
+        U256::from_str(
+            "5408068458366290097693809645929734991458199404659878659553047611146680628954",
+        )
+        .unwrap(),
+        U256::from_str(
+            "7708764853296235550302896633598331924671113766219240748172066028946006022854",
+        )
+        .unwrap(),
+        U256::from_str(
+            "17700926755167371005308910210965003607045179123434251133647055306492170438120",
+        )
+        .unwrap(),
+        U256::from_str(
+            "154397549418641559307524478611787574224314011122269053905755152919215659778",
+        )
+        .unwrap(),
+        U256::from_str(
+            "1984170487336525780293932330785856524432038724373274488958019302386252559231",
+        )
+        .unwrap(),
+        U256::from_str(
+            "3314362000193010715052769662421751145025288853014347901929084743686925091033",
+        )
+        .unwrap(),
+        U256::from_str(
+            "5969572836535217971378806448005698172042029600478282326636924294386246370693",
+        )
+        .unwrap(),
+        U256::from_str(
+            "18564243080196493066086408717287862863335702133957524699743268830525148172506",
+        )
+        .unwrap(),
+        U256::from_str(
+            "17269266067816704782247017427200956927940055030199138534350116254357612253048",
+        )
+        .unwrap(),
+        U256::from_str(
+            "9740411817590043771488498441210821606869449023601574073310485764683435152587",
+        )
+        .unwrap(),
+        U256::from_str(
+            "12727712035316870814661734054996728204626079181372322293888505805399715437139",
+        )
+        .unwrap(),
+        U256::from_str(
+            "20210469749439596480915120057935665765860695731536556057113952828024130849369",
+        )
+        .unwrap(),
+    ];
+    expected.reverse();
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs
index 0f799df1..a4c43955 100644
--- a/evm/src/cpu/kernel/tests/mod.rs
+++ b/evm/src/cpu/kernel/tests/mod.rs
@@ -1,6 +1,6 @@
 mod account_code;
 mod balance;
-mod bn254_field;
+mod bn254;
 mod core;
 mod curve_ops;
 mod ecrecover;

From cee6c6539eb350fabc8cb0369237767d6f03eaee Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 20 Dec 2022 17:27:12 -0800
Subject: [PATCH 072/201] hex

---
 evm/src/cpu/kernel/tests/bn254.rs | 95 ++++++++++---------------------
 1 file changed, 31 insertions(+), 64 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 4feecb04..68a77425 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -784,24 +784,16 @@ fn test_miller() -> Result<()> {
     let p = [U256::from(1), U256::from(2)];
     let q = [
         [
-            U256::from_str(
-                "10857046999023057135944570762232829481370756359578518086990519993285655852781",
-            )
-            .unwrap(),
-            U256::from_str(
-                "11559732032986387107991004021392285783925812861821192530917403151452391805634",
-            )
-            .unwrap(),
+            U256::from_str("0x1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed")
+                .unwrap(),
+            U256::from_str("0x198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2")
+                .unwrap(),
         ],
         [
-            U256::from_str(
-                "8495653923123431417604973247489272438418190587263600148770280649306958101930",
-            )
-            .unwrap(),
-            U256::from_str(
-                "4082367875863433681332203403145435568316851327593401208105741076214120093531",
-            )
-            .unwrap(),
+            U256::from_str("0x12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa")
+                .unwrap(),
+            U256::from_str("0x90689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b")
+                .unwrap(),
         ],
     ];
 
@@ -810,54 +802,29 @@ fn test_miller() -> Result<()> {
 
     let output: Vec<U256> = run_interpreter(test_mill, stack)?.stack().to_vec();
     let mut expected: Vec<U256> = vec![
-        U256::from_str(
-            "5408068458366290097693809645929734991458199404659878659553047611146680628954",
-        )
-        .unwrap(),
-        U256::from_str(
-            "7708764853296235550302896633598331924671113766219240748172066028946006022854",
-        )
-        .unwrap(),
-        U256::from_str(
-            "17700926755167371005308910210965003607045179123434251133647055306492170438120",
-        )
-        .unwrap(),
-        U256::from_str(
-            "154397549418641559307524478611787574224314011122269053905755152919215659778",
-        )
-        .unwrap(),
-        U256::from_str(
-            "1984170487336525780293932330785856524432038724373274488958019302386252559231",
-        )
-        .unwrap(),
-        U256::from_str(
-            "3314362000193010715052769662421751145025288853014347901929084743686925091033",
-        )
-        .unwrap(),
-        U256::from_str(
-            "5969572836535217971378806448005698172042029600478282326636924294386246370693",
-        )
-        .unwrap(),
-        U256::from_str(
-            "18564243080196493066086408717287862863335702133957524699743268830525148172506",
-        )
-        .unwrap(),
-        U256::from_str(
-            "17269266067816704782247017427200956927940055030199138534350116254357612253048",
-        )
-        .unwrap(),
-        U256::from_str(
-            "9740411817590043771488498441210821606869449023601574073310485764683435152587",
-        )
-        .unwrap(),
-        U256::from_str(
-            "12727712035316870814661734054996728204626079181372322293888505805399715437139",
-        )
-        .unwrap(),
-        U256::from_str(
-            "20210469749439596480915120057935665765860695731536556057113952828024130849369",
-        )
-        .unwrap(),
+        U256::from_str("0xbf4dbb7e41fb58122aa29dcced57731d7cbb49b1fe9a73cb13416e1002376da")
+            .unwrap(),
+        U256::from_str("0x110b019c149b43a7fbd6d42d7553debcbebd35c148f63aaecf72a5fbda451ac6")
+            .unwrap(),
+        U256::from_str("0x27225e97ee6c877964c8f32e0b54e61ead09c3e818174cd8b5beabe7cd7385e8")
+            .unwrap(),
+        U256::from_str("0x5762cb6648b4b4c5df8a8874a21d937adf185d91f34e8ccf58f5b39196db02").unwrap(),
+        U256::from_str("0x463002dc1a426b172f4a1e29486fc11eba01de99b559368139c8ef5271eb37f")
+            .unwrap(),
+        U256::from_str("0x753dcc72acdffcc45633803f1b555388969dd7c27d2a674a23a228f522480d9")
+            .unwrap(),
+        U256::from_str("0xd32a892d29151553101376a6638938135e30126f698a40a73f20c6ac64a4585")
+            .unwrap(),
+        U256::from_str("0x290afd3e28c223a624d9f5a737f9f9e4b4200b518333844d81acc445fa5910da")
+            .unwrap(),
+        U256::from_str("0x262e0ee72a8123b741dc113b8e2d207ee8bad011e0f6ae2015439960c789cf78")
+            .unwrap(),
+        U256::from_str("0x1588e0b23d868d7517e3021e620c69eb1521a49faa9bfcd4cf3a54127d4d14cb")
+            .unwrap(),
+        U256::from_str("0x1c23a135a7dfa96db62622c5fef4b9751d121523dd39ca1cefeacb3419835a53")
+            .unwrap(),
+        U256::from_str("0x2caeb873076ec8f37fa7af265d2966dd0024acbc63bd2b21f323084fc71f4a59")
+            .unwrap(),
     ];
     expected.reverse();
 

From 41476ce451aef35cc00b41c9e03149e8f491507c Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 20 Dec 2022 17:32:06 -0800
Subject: [PATCH 073/201] fix

---
 .../asm/curve/bn254/curve_arithmetic/miller_loop.asm       | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index ec957ffd..e538155e 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -40,9 +40,6 @@ global test_miller:
     // stack:            ptr, out, retdest
     %jump(miller_init)
 
-global return_point:
-
-
 global miller_init:
     // stack:         ptr, out, retdest
     PUSH 1
@@ -126,8 +123,8 @@ miller_zero_final:
 ///     O += O
 
 mul_tangent:
-    // stack:                                                   retdest, 0xnm, times, O, P, Q, out
-    PUSH mul_tangent_2  DUP12  PUSH mul_tangent_1
+    // stack:                                              retdest, 0xnm, times, O, P, Q, out
+    PUSH mul_tangent_2  DUP13  PUSH mul_tangent_1
     // stack:           mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     DUP2  DUP1
     // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out

From bf7da1c20500cdd9b85bea54f6da4451f211db3a Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 20 Dec 2022 18:31:26 -0800
Subject: [PATCH 074/201] POP

---
 .../bn254/curve_arithmetic/miller_loop.asm    | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index e538155e..37312b45 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -52,9 +52,9 @@ global miller_init:
     // stack:        P, Q, out, retdest
     DUP2  DUP2
     // stack:     O, P, Q, out, retdest
-    PUSH 61
-    // stack: 61, O, P, Q, out, retdest
-miller_loop:
+    PUSH 53
+    // stack: 53, O, P, Q, out, retdest
+global miller_loop:
     // stack:          times  , O, P, Q, out, retdest
     DUP1  ISZERO
     // stack:  break?, times  , O, P, Q, out, retdest
@@ -67,6 +67,10 @@ miller_loop:
     %mload_kernel_code(miller_data)
     // stack:    0xnm, times-1, O, P, Q, out, retdest
     %jump(miller_one)
+
+miller_loop_pop:
+    POP  %jump(miller_loop)
+    
 miller_final:
     // stack:     0, O, P, Q, out, retdest
     PUSH 28
@@ -95,7 +99,7 @@ miller_zero:
     // stack:              m  , times, O, P, Q, out, retdest
     DUP1  ISZERO
     // stack:       skip?, m  , times, O, P, Q, out, retdest
-    %jumpi(miller_loop)
+    %jumpi(miller_loop_pop)
     // stack:              m  , times, O, P, Q, out, retdest
     %sub_const(1)
     // stack:              m-1, times, O, P, Q, out, retdest
@@ -104,15 +108,15 @@ miller_zero:
     %jump(mul_tangent)
 
 miller_zero_final:
-    // stack:              m  , times, O, P, Q, out, retdest
+    // stack:                    m  , times, O, P, Q, out, retdest
     DUP1  ISZERO
-    // stack:       skip?, m  , times, O, P, Q, out, retdest
+    // stack:             skip?, m  , times, O, P, Q, out, retdest
     %jumpi(miller_end)
-    // stack:              m  , times, O, P, Q, out, retdest
+    // stack:                    m  , times, O, P, Q, out, retdest
     %sub_const(1)
-    // stack:              m-1, times, O, P, Q, out, retdest
-    PUSH miller_zero
-    // stack: miller_zero, m-1, times, O, P, Q, out, retdest
+    // stack:                    m-1, times, O, P, Q, out, retdest
+    PUSH miller_zero_final
+    // stack: miller_zero_final, m-1, times, O, P, Q, out, retdest
     %jump(mul_tangent)
 
 

From bde569a2f36dca66fd61cb17f78a8cbac0606d0d Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 20 Dec 2022 18:36:00 -0800
Subject: [PATCH 075/201] it runs

---
 .../asm/curve/bn254/curve_arithmetic/miller_loop.asm      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 37312b45..1f2d2fc6 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -70,16 +70,16 @@ global miller_loop:
 
 miller_loop_pop:
     POP  %jump(miller_loop)
-    
+
 miller_final:
     // stack:     0, O, P, Q, out, retdest
     PUSH 28
     // stack: 28, 0, O, P, Q, out, retdest
     %jump(miller_zero_final)
 miller_end:
-    // stack: times, O, P, Q, out, retdest
-    POP  %pop2  %pop2  %pop4
-    // stack:                 out, retdest
+    // stack: m, times, O, P, Q, out, retdest
+    %pop2  %pop2  %pop2  %pop4
+    // stack:                    out, retdest
     SWAP1  JUMP 
 
 

From 3c566e987ccc3b711db8cf4a783f248f9441c8a6 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 20 Dec 2022 21:53:52 -0800
Subject: [PATCH 076/201] tangent and cords work

---
 .../bn254/curve_arithmetic/miller_loop.asm    |  50 +++++---
 evm/src/cpu/kernel/tests/bn254.rs             | 113 +++++++++++++++++-
 2 files changed, 139 insertions(+), 24 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 1f2d2fc6..37d17be8 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -196,6 +196,12 @@ after_add:
     %jump(miller_one)
 
 
+global test_store_cord:
+    // stack: p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    %store_cord
+    // stack:
+    %check(100)
+
 /// def store_cord(p1x, p1y, p2x, p2y, qx, qy):
 ///     return sparse_store(
 ///         p1y*p2x - p2y*p1x, 
@@ -243,6 +249,12 @@ after_add:
 %endmacro
 
 
+global test_store_tangent:
+    // stack: px, py, qx, qx_, qy, qy_
+    %store_tangent
+    // stack:
+    %check(100)
+
 /// def store_tangent(px, py, qx, qy):
 ///     return sparse_store(
 ///         py**2 - 9, 
@@ -251,42 +263,42 @@ after_add:
 ///     )
 
 %macro store_tangent
-    // stack:                px, py, qx, qx_, qy, qy_
+    // stack:                px, py, qx, qx_,  qy, qy_
     PUSH 9
-    // stack:             9, px, py, qx, qx_, qy, qy_
+    // stack:             9, px, py, qx, qx_,  qy, qy_
     DUP3
-    // stack:        py , 9, px, py, qx, qx_, qy, qy_
+    // stack:        py , 9, px, py, qx, qx_,  qy, qy_
     DUP1  MULFP254
-    // stack:     py**2 , 9, px, py, qx, qx_, qy, qy_
+    // stack:     py**2 , 9, px, py, qx, qx_,  qy, qy_
     SUBFP254
-    // stack:     py**2 - 9, px, py, qx, qx_, qy, qy_
+    // stack:     py**2 - 9, px, py, qx, qx_,  qy, qy_
     %mstore_kernel_general(100)
-    // stack:                px, py, qx, qx_, qy, qy_
+    // stack:                px, py, qx, qx_,  qy, qy_
     DUP1  MULFP254
-    // stack:             px**2, py, qx, qx_, qy, qy_
+    // stack:             px**2, py, qx, qx_,  qy, qy_
     PUSH 3  MULFP254
-    // stack:           3*px**2, py, qx, qx_, qy, qy_
+    // stack:           3*px**2, py, qx, qx_,  qy, qy_
     PUSH 0  SUBFP254
-    // stack:          -3*px**2, py, qx, qx_, qy, qy_
+    // stack:          -3*px**2, py, qx, qx_,  qy, qy_
     SWAP2
-    // stack:           qx, py, -3px**2, qx_, qy, qy_
+    // stack:           qx, py, -3px**2, qx_,  qy, qy_
     DUP3  MULFP254
-    // stack: (-3*px**2)qx, py, -3px**2, qx_, qy, qy_ 
+    // stack: (-3*px**2)qx, py, -3px**2, qx_,  qy, qy_ 
     %mstore_kernel_general(102)
-    // stack:               py, -3px**2, qx_, qy, qy_ 
+    // stack:               py, -3px**2, qx_,  qy, qy_ 
     PUSH 2  MULFP254
-    // stack:              2py, -3px**2, qx_, qy, qy_ 
+    // stack:              2py, -3px**2, qx_,  qy, qy_ 
     SWAP3 
-    // stack:              qy, -3px**2, qx_, 2py, qy_ 
+    // stack:               qy, -3px**2, qx_, 2py, qy_ 
     DUP4  MULFP254
-    // stack:         (2py)qy, -3px**2, qx_, 2py, qy_ 
+    // stack:          (2py)qy, -3px**2, qx_, 2py, qy_ 
     %mstore_kernel_general(108)
-    // stack:                  -3px**2, qx_, 2py, qy_ 
+    // stack:                   -3px**2, qx_, 2py, qy_ 
     MULFP254
-    // stack:                  (-3px**2)qx_, 2py, qy_ 
+    // stack:                  (-3px**2)*qx_, 2py, qy_ 
     %mstore_kernel_general(103)
-    // stack:                                2py, qy_ 
+    // stack:                                 2py, qy_ 
     MULFP254
-    // stack:                                (2py)qy_ 
+    // stack:                                (2py)*qy_ 
     %mstore_kernel_general(109)
 %endmacro
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 68a77425..16054cd7 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -49,7 +49,7 @@ fn gen_fp12() -> Fp12 {
 }
 
 fn gen_fp12_sparse() -> Fp12 {
-    sparse_embed([gen_fp(), gen_fp(), gen_fp(), gen_fp(), gen_fp()])
+    sparse_embed(gen_fp(), [gen_fp(), gen_fp()], [gen_fp(), gen_fp()])
 }
 
 fn add_fp(x: Fp, y: Fp) -> Fp {
@@ -166,11 +166,10 @@ fn sh(c: Fp6) -> Fp6 {
     [i9(c2), c0, c1]
 }
 
-fn sparse_embed(x: [U256; 5]) -> Fp12 {
-    let [g0, g1, g1_, g2, g2_] = x;
+fn sparse_embed(g0: Fp, g1: Fp2, g2: Fp2) -> Fp12 {
     [
-        [embed_fp2(g0), [g1, g1_], embed_fp2(ZERO)],
-        [embed_fp2(ZERO), [g2, g2_], embed_fp2(ZERO)],
+        [embed_fp2(g0), g1, embed_fp2(ZERO)],
+        [embed_fp2(ZERO), g2, embed_fp2(ZERO)],
     ]
 }
 
@@ -779,6 +778,110 @@ fn make_miller_stack(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
     input
 }
 
+fn store_tangent(p: [Fp; 2], q: [Fp2; 2]) -> Fp12 {
+    let [px, py] = p;
+    let [qx, qy] = q;
+
+    let cx = neg_fp(mul_fp(U256::from(3), mul_fp(px, px)));
+    let cy = mul_fp(U256::from(2), py);
+
+    sparse_embed(
+        sub_fp(mul_fp(py, py), U256::from(9)),
+        mul_fp2(embed_fp2(cx), qx),
+        mul_fp2(embed_fp2(cy), qy),
+    )
+}
+
+fn store_cord(p1: [Fp; 2], p2: [Fp; 2], q: [Fp2; 2]) -> Fp12 {
+    let [p1x, p1y] = p1;
+    let [p2x, p2y] = p2;
+    let [qx, qy] = q;
+
+    let cx = sub_fp(p2y,p1y);
+    let cy = sub_fp(p1x,p2x);
+
+    sparse_embed(
+        sub_fp(mul_fp(p1y, p2x), mul_fp(p2y, p1x)),
+        mul_fp2(embed_fp2(cx), qx),
+        mul_fp2(embed_fp2(cy), qy),
+    )
+}
+
+fn make_tan_stack(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
+    let p: Vec<U256> = p.into_iter().collect();
+    let q: Vec<U256> = q.into_iter().flatten().collect();
+
+    let mut input = p;
+    input.extend(q);
+    input.reverse();
+    input
+}
+
+fn make_tan_expected(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
+    store_tangent(p, q)
+        .into_iter()
+        .flatten()
+        .flatten()
+        .rev()
+        .collect()
+}
+
+#[test]
+fn test_store_tangent() -> Result<()> {
+    let p = [gen_fp(), gen_fp()];
+    let q = [[gen_fp(), gen_fp()], [gen_fp(), gen_fp()]];
+
+    let expected = make_tan_expected(p, q);
+
+    let stack = make_tan_stack(p, q);
+    let test_tan = KERNEL.global_labels["test_store_tangent"];
+
+    let output: Vec<U256> = run_interpreter(test_tan, stack)?.stack().to_vec();
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
+
+fn make_cord_stack(p1: [Fp; 2], p2: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
+    let p1: Vec<U256> = p1.into_iter().collect();
+    let p2: Vec<U256> = p2.into_iter().collect();
+    let q: Vec<U256> = q.into_iter().flatten().collect();
+
+    let mut input = p1;
+    input.extend(p2);
+    input.extend(q);
+    input.reverse();
+    input
+}
+
+fn make_cord_expected(p1: [Fp; 2],p2: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
+    store_cord(p1, p2, q)
+        .into_iter()
+        .flatten()
+        .flatten()
+        .rev()
+        .collect()
+}
+
+#[test]
+fn test_store_cord() -> Result<()> {
+    let p1 = [gen_fp(), gen_fp()];
+    let p2 = [gen_fp(), gen_fp()];
+    let q = [[gen_fp(), gen_fp()], [gen_fp(), gen_fp()]];
+
+    let expected = make_cord_expected(p1, p2, q);
+
+    let stack = make_cord_stack(p1, p2, q);
+    let test_cord = KERNEL.global_labels["test_store_cord"];
+
+    let output: Vec<U256> = run_interpreter(test_cord, stack)?.stack().to_vec();
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
+
 #[test]
 fn test_miller() -> Result<()> {
     let p = [U256::from(1), U256::from(2)];

From d5cec0e68f318fb9c1d5fa785e299e34d8b3cb2c Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 21 Dec 2022 14:52:54 -0800
Subject: [PATCH 077/201] clean up code org

---
 evm/src/bn254.rs                  |  603 +++++++++++++++++
 evm/src/cpu/kernel/interpreter.rs |   10 +-
 evm/src/cpu/kernel/tests/bn254.rs | 1011 ++++++-----------------------
 evm/src/lib.rs                    |    1 +
 4 files changed, 806 insertions(+), 819 deletions(-)
 create mode 100644 evm/src/bn254.rs

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
new file mode 100644
index 00000000..631065dd
--- /dev/null
+++ b/evm/src/bn254.rs
@@ -0,0 +1,603 @@
+use std::str::FromStr;
+
+use ethereum_types::U256;
+use rand::{thread_rng, Rng};
+
+pub const BN_BASE: U256 = U256([
+    4332616871279656263,
+    10917124144477883021,
+    13281191951274694749,
+    3486998266802970665,
+]);
+
+pub type Fp = U256;
+pub type Fp2 = [U256; 2];
+pub type Fp6 = [Fp2; 3];
+pub type Fp12 = [Fp6; 2];
+
+pub type Curve = [Fp; 2];
+pub type TwistedCurve = [Fp2; 2];
+
+const ZERO: Fp = U256([0, 0, 0, 0]);
+
+fn embed_fp2(x: Fp) -> Fp2 {
+    [x, ZERO]
+}
+
+fn embed_fp2_fp6(a: Fp2) -> Fp6 {
+    [a, embed_fp2(ZERO), embed_fp2(ZERO)]
+}
+
+fn embed_fp6(x: Fp) -> Fp6 {
+    embed_fp2_fp6(embed_fp2(x))
+}
+
+fn embed_fp12(x: Fp) -> Fp12 {
+    [embed_fp6(x), embed_fp6(ZERO)]
+}
+
+fn gen_fp() -> Fp {
+    let mut rng = thread_rng();
+    let x64 = rng.gen::<u64>();
+    U256([x64, x64, x64, x64]) % BN_BASE
+}
+
+fn gen_fp2() -> Fp2 {
+    [gen_fp(), gen_fp()]
+}
+
+pub fn gen_curve_point() -> Curve {
+    gen_fp2()
+}
+
+pub fn gen_twisted_curve_point() -> TwistedCurve {
+    [gen_fp2(), gen_fp2()]
+}
+
+fn gen_fp6() -> Fp6 {
+    [gen_fp2(), gen_fp2(), gen_fp2()]
+}
+
+pub fn gen_fp12() -> Fp12 {
+    [gen_fp6(), gen_fp6()]
+}
+
+pub fn gen_fp12_sparse() -> Fp12 {
+    sparse_embed(gen_fp(), [gen_fp(), gen_fp()], [gen_fp(), gen_fp()])
+}
+
+fn add_fp(x: Fp, y: Fp) -> Fp {
+    (x + y) % BN_BASE
+}
+
+fn add3_fp(x: Fp, y: Fp, z: Fp) -> Fp {
+    (x + y + z) % BN_BASE
+}
+
+fn mul_fp(x: Fp, y: Fp) -> Fp {
+    U256::try_from(x.full_mul(y) % BN_BASE).unwrap()
+}
+
+fn sub_fp(x: Fp, y: Fp) -> Fp {
+    (BN_BASE + x - y) % BN_BASE
+}
+
+fn neg_fp(x: Fp) -> Fp {
+    (BN_BASE - x) % BN_BASE
+}
+
+fn conj_fp2(a: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    [a, neg_fp(a_)]
+}
+
+fn add_fp2(a: Fp2, b: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    let [b, b_] = b;
+    [add_fp(a, b), add_fp(a_, b_)]
+}
+
+fn add3_fp2(a: Fp2, b: Fp2, c: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    let [b, b_] = b;
+    let [c, c_] = c;
+    [add3_fp(a, b, c), add3_fp(a_, b_, c_)]
+}
+
+fn sub_fp2(a: Fp2, b: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    let [b, b_] = b;
+    [sub_fp(a, b), sub_fp(a_, b_)]
+}
+
+fn neg_fp2(a: Fp2) -> Fp2 {
+    sub_fp2(embed_fp2(ZERO), a)
+}
+
+fn mul_fp2(a: Fp2, b: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    let [b, b_] = b;
+    [
+        sub_fp(mul_fp(a, b), mul_fp(a_, b_)),
+        add_fp(mul_fp(a, b_), mul_fp(a_, b)),
+    ]
+}
+
+fn i9(a: Fp2) -> Fp2 {
+    let [a, a_] = a;
+    let nine = U256::from(9);
+    [sub_fp(mul_fp(nine, a), a_), add_fp(a, mul_fp(nine, a_))]
+}
+
+fn add_fp6(c: Fp6, d: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    let [d0, d1, d2] = d;
+
+    let e0 = add_fp2(c0, d0);
+    let e1 = add_fp2(c1, d1);
+    let e2 = add_fp2(c2, d2);
+    [e0, e1, e2]
+}
+
+fn sub_fp6(c: Fp6, d: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    let [d0, d1, d2] = d;
+
+    let e0 = sub_fp2(c0, d0);
+    let e1 = sub_fp2(c1, d1);
+    let e2 = sub_fp2(c2, d2);
+    [e0, e1, e2]
+}
+
+fn neg_fp6(a: Fp6) -> Fp6 {
+    sub_fp6(embed_fp6(ZERO), a)
+}
+
+fn mul_fp6(c: Fp6, d: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    let [d0, d1, d2] = d;
+
+    let c0d0 = mul_fp2(c0, d0);
+    let c0d1 = mul_fp2(c0, d1);
+    let c0d2 = mul_fp2(c0, d2);
+    let c1d0 = mul_fp2(c1, d0);
+    let c1d1 = mul_fp2(c1, d1);
+    let c1d2 = mul_fp2(c1, d2);
+    let c2d0 = mul_fp2(c2, d0);
+    let c2d1 = mul_fp2(c2, d1);
+    let c2d2 = mul_fp2(c2, d2);
+    let cd12 = add_fp2(c1d2, c2d1);
+
+    [
+        add_fp2(c0d0, i9(cd12)),
+        add3_fp2(c0d1, c1d0, i9(c2d2)),
+        add3_fp2(c0d2, c1d1, c2d0),
+    ]
+}
+
+fn sh(c: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    [i9(c2), c0, c1]
+}
+
+fn sparse_embed(g0: Fp, g1: Fp2, g2: Fp2) -> Fp12 {
+    [
+        [embed_fp2(g0), g1, embed_fp2(ZERO)],
+        [embed_fp2(ZERO), g2, embed_fp2(ZERO)],
+    ]
+}
+
+pub fn mul_fp12(f: Fp12, g: Fp12) -> Fp12 {
+    let [f0, f1] = f;
+    let [g0, g1] = g;
+
+    let h0 = mul_fp6(f0, g0);
+    let h1 = mul_fp6(f1, g1);
+    let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1));
+    [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
+}
+
+fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
+    let [c0, c1, c2] = c;
+    let _c0 = conj_fp2(c0);
+    let _c1 = conj_fp2(c1);
+    let _c2 = conj_fp2(c2);
+
+    let n = n % 6;
+    let frob_t1 = frob_t1(n);
+    let frob_t2 = frob_t2(n);
+
+    if n % 2 != 0 {
+        [_c0, mul_fp2(frob_t1, _c1), mul_fp2(frob_t2, _c2)]
+    } else {
+        [c0, mul_fp2(frob_t1, c1), mul_fp2(frob_t2, c2)]
+    }
+}
+
+pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
+    let [f0, f1] = f;
+    let scale = embed_fp2_fp6(frob_z(n));
+
+    [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
+}
+
+fn exp_fp(x: Fp, e: U256) -> Fp {
+    let mut current = x;
+    let mut product = U256::one();
+
+    for j in 0..256 {
+        if e.bit(j) {
+            product = U256::try_from(product.full_mul(current) % BN_BASE).unwrap();
+        }
+        current = U256::try_from(current.full_mul(current) % BN_BASE).unwrap();
+    }
+    product
+}
+
+fn inv_fp(x: Fp) -> Fp {
+    exp_fp(x, BN_BASE - 2)
+}
+
+fn inv_fp2(a: Fp2) -> Fp2 {
+    let [a0, a1] = a;
+    let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
+    [mul_fp(norm, a0), neg_fp(mul_fp(norm, a1))]
+}
+
+fn inv_fp6(c: Fp6) -> Fp6 {
+    let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
+    let e = mul_fp6(b, frob_fp6(5, c))[0];
+    let n = mul_fp2(e, conj_fp2(e))[0];
+    let i = inv_fp(n);
+    let d = mul_fp2(embed_fp2(i), e);
+    let [f0, f1, f2] = frob_fp6(1, b);
+    [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
+}
+
+fn inv_fp12(f: Fp12) -> Fp12 {
+    let [f0, f1] = f;
+    let a = mul_fp12(frob_fp12(1, f), frob_fp12(7, f))[0];
+    let b = mul_fp6(a, frob_fp6(2, a));
+    let c = mul_fp6(b, frob_fp6(4, a))[0];
+    let n = mul_fp2(c, conj_fp2(c))[0];
+    let i = inv_fp(n);
+    let d = mul_fp2(embed_fp2(i), c);
+    let [g0, g1, g2] = frob_fp6(1, b);
+    let e = [mul_fp2(d, g0), mul_fp2(d, g1), mul_fp2(d, g2)];
+    [mul_fp6(e, f0), neg_fp6(mul_fp6(e, f1))]
+}
+
+pub fn power(f: Fp12) -> Fp12 {
+    let mut sq: Fp12 = f;
+    let mut y0: Fp12 = embed_fp12(U256::one());
+    let mut y2: Fp12 = embed_fp12(U256::one());
+    let mut y4: Fp12 = embed_fp12(U256::one());
+
+    for (a, b, c) in EXPS4 {
+        if a {
+            y4 = mul_fp12(y4, sq);
+        }
+        if b {
+            y2 = mul_fp12(y2, sq);
+        }
+        if c {
+            y0 = mul_fp12(y0, sq);
+        }
+        sq = mul_fp12(sq, sq);
+    }
+    y4 = mul_fp12(y4, sq);
+
+    for (a, b) in EXPS2 {
+        if a {
+            y2 = mul_fp12(y2, sq);
+        }
+        if b {
+            y0 = mul_fp12(y0, sq);
+        }
+        sq = mul_fp12(sq, sq);
+    }
+    y2 = mul_fp12(y2, sq);
+
+    for a in EXPS0 {
+        if a {
+            y0 = mul_fp12(y0, sq);
+        }
+        sq = mul_fp12(sq, sq);
+    }
+    y0 = mul_fp12(y0, sq);
+
+    y0 = inv_fp12(y0);
+
+    y4 = mul_fp12(y4, y2);
+    y4 = mul_fp12(y4, y2);
+    y4 = mul_fp12(y4, y0);
+
+    y4 = frob_fp12(1, y4);
+    y2 = frob_fp12(2, y2);
+
+    mul_fp12(mul_fp12(y4, y2), y0)
+}
+
+fn frob_t1(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
+                .unwrap(),
+            U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
+                .unwrap(),
+            U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
+                .unwrap(),
+            U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+fn frob_t2(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
+                .unwrap(),
+            U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
+                .unwrap(),
+            U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
+                .unwrap(),
+            U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+fn frob_z(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
+                .unwrap(),
+            U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
+                .unwrap(),
+            U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
+                .unwrap(),
+            U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
+                .unwrap(),
+        ],
+        6 => [
+            U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        7 => [
+            U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
+                .unwrap(),
+            U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
+                .unwrap(),
+        ],
+        8 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        9 => [
+            U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
+                .unwrap(),
+            U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
+                .unwrap(),
+        ],
+        10 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        11 => [
+            U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
+                .unwrap(),
+            U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+const EXPS4: [(bool, bool, bool); 65] = [
+    (true, true, true),
+    (true, true, false),
+    (true, true, true),
+    (true, true, true),
+    (false, false, false),
+    (false, false, true),
+    (true, false, true),
+    (false, true, false),
+    (true, false, true),
+    (true, true, false),
+    (true, false, true),
+    (false, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (false, true, false),
+    (false, true, false),
+    (false, false, true),
+    (true, false, true),
+    (true, true, false),
+    (false, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (false, false, true),
+    (false, false, true),
+    (true, false, true),
+    (true, false, true),
+    (true, true, false),
+    (true, false, false),
+    (true, true, false),
+    (false, true, false),
+    (true, true, false),
+    (true, false, false),
+    (false, true, false),
+    (false, false, false),
+    (true, false, false),
+    (true, false, false),
+    (true, false, true),
+    (false, false, true),
+    (false, true, true),
+    (false, false, true),
+    (false, true, true),
+    (false, true, true),
+    (false, false, false),
+    (true, true, true),
+    (true, false, true),
+    (true, false, true),
+    (false, true, true),
+    (true, false, true),
+    (false, true, true),
+    (false, true, true),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, false, false),
+    (false, false, true),
+    (true, false, false),
+    (false, false, true),
+    (true, false, true),
+    (true, true, false),
+    (true, true, true),
+    (false, true, true),
+    (false, true, false),
+    (true, true, true),
+];
+
+const EXPS2: [(bool, bool); 62] = [
+    (true, false),
+    (true, true),
+    (false, false),
+    (true, false),
+    (true, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (true, false),
+    (false, true),
+    (false, true),
+    (true, true),
+    (true, true),
+    (false, false),
+    (true, true),
+    (false, false),
+    (false, false),
+    (false, true),
+    (false, true),
+    (true, true),
+    (true, true),
+    (true, true),
+    (false, true),
+    (true, true),
+    (false, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (false, false),
+    (true, true),
+    (true, true),
+    (true, false),
+    (false, false),
+    (false, true),
+    (false, false),
+    (true, true),
+    (false, true),
+    (false, false),
+    (true, false),
+    (false, true),
+    (false, true),
+    (true, false),
+    (false, true),
+    (false, false),
+    (false, false),
+    (false, false),
+    (false, true),
+    (true, false),
+    (true, true),
+    (false, true),
+    (true, true),
+    (true, false),
+    (false, true),
+    (false, false),
+    (true, false),
+    (false, true),
+    (true, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (false, true),
+    (true, true),
+];
+
+const EXPS0: [bool; 65] = [
+    false, false, true, false, false, true, true, false, true, false, true, true, true, false,
+    true, false, false, false, true, false, false, true, false, true, false, true, true, false,
+    false, false, false, false, true, false, true, false, true, true, true, false, false, true,
+    true, true, true, false, true, false, true, true, false, false, true, false, false, false,
+    true, true, true, true, false, false, true, true, false,
+];
diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index e53b4cae..580adeb7 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -7,6 +7,8 @@ use ethereum_types::{U256, U512};
 use keccak_hash::keccak;
 use plonky2::field::goldilocks_field::GoldilocksField;
 
+use crate::bn254::BN_BASE;
+
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::constants::context_metadata::ContextMetadata;
 use crate::cpu::kernel::constants::global_metadata::GlobalMetadata;
@@ -23,14 +25,6 @@ type F = GoldilocksField;
 /// Halt interpreter execution whenever a jump to this offset is done.
 const DEFAULT_HALT_OFFSET: usize = 0xdeadbeef;
 
-/// Order of the BN254 base field.
-pub const BN_BASE: U256 = U256([
-    4332616871279656263,
-    10917124144477883021,
-    13281191951274694749,
-    3486998266802970665,
-]);
-
 impl MemoryState {
     fn mload_general(&self, context: usize, segment: Segment, offset: usize) -> U256 {
         self.get(MemoryAddress::new(context, segment, offset))
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 16054cd7..0f402c25 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -2,590 +2,20 @@ use std::str::FromStr;
 
 use anyhow::Result;
 use ethereum_types::U256;
-use rand::{thread_rng, Rng};
 
+use crate::bn254::{
+    frob_fp12, gen_curve_point, gen_fp12, gen_fp12_sparse, gen_twisted_curve_point, mul_fp12,
+    power, Fp, Fp12, Fp2,
+};
 use crate::cpu::kernel::aggregator::KERNEL;
-use crate::cpu::kernel::interpreter::{run_interpreter, BN_BASE};
+use crate::cpu::kernel::interpreter::run_interpreter;
 
-type Fp = U256;
-type Fp2 = [U256; 2];
-type Fp6 = [Fp2; 3];
-type Fp12 = [Fp6; 2];
-
-const ZERO: Fp = U256([0, 0, 0, 0]);
-
-fn embed_fp2(x: Fp) -> Fp2 {
-    [x, ZERO]
+fn fp12_as_stack_input(f: Fp12) -> Vec<U256> {
+    f.into_iter().flatten().flatten().collect()
 }
 
-fn embed_fp2_fp6(a: Fp2) -> Fp6 {
-    [a, embed_fp2(ZERO), embed_fp2(ZERO)]
-}
-
-fn embed_fp6(x: Fp) -> Fp6 {
-    embed_fp2_fp6(embed_fp2(x))
-}
-
-fn embed_fp12(x: Fp) -> Fp12 {
-    [embed_fp6(x), embed_fp6(ZERO)]
-}
-
-fn gen_fp() -> Fp {
-    let mut rng = thread_rng();
-    let x64 = rng.gen::<u64>();
-    U256([x64, x64, x64, x64]) % BN_BASE
-}
-
-fn gen_fp6() -> Fp6 {
-    [
-        [gen_fp(), gen_fp()],
-        [gen_fp(), gen_fp()],
-        [gen_fp(), gen_fp()],
-    ]
-}
-
-fn gen_fp12() -> Fp12 {
-    [gen_fp6(), gen_fp6()]
-}
-
-fn gen_fp12_sparse() -> Fp12 {
-    sparse_embed(gen_fp(), [gen_fp(), gen_fp()], [gen_fp(), gen_fp()])
-}
-
-fn add_fp(x: Fp, y: Fp) -> Fp {
-    (x + y) % BN_BASE
-}
-
-fn add3_fp(x: Fp, y: Fp, z: Fp) -> Fp {
-    (x + y + z) % BN_BASE
-}
-
-fn mul_fp(x: Fp, y: Fp) -> Fp {
-    U256::try_from(x.full_mul(y) % BN_BASE).unwrap()
-}
-
-fn sub_fp(x: Fp, y: Fp) -> Fp {
-    (BN_BASE + x - y) % BN_BASE
-}
-
-fn neg_fp(x: Fp) -> Fp {
-    (BN_BASE - x) % BN_BASE
-}
-
-fn conj_fp2(a: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    [a, neg_fp(a_)]
-}
-
-fn add_fp2(a: Fp2, b: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [add_fp(a, b), add_fp(a_, b_)]
-}
-
-fn add3_fp2(a: Fp2, b: Fp2, c: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    let [c, c_] = c;
-    [add3_fp(a, b, c), add3_fp(a_, b_, c_)]
-}
-
-fn sub_fp2(a: Fp2, b: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [sub_fp(a, b), sub_fp(a_, b_)]
-}
-
-fn neg_fp2(a: Fp2) -> Fp2 {
-    sub_fp2(embed_fp2(ZERO), a)
-}
-
-fn mul_fp2(a: Fp2, b: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [
-        sub_fp(mul_fp(a, b), mul_fp(a_, b_)),
-        add_fp(mul_fp(a, b_), mul_fp(a_, b)),
-    ]
-}
-
-fn i9(a: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let nine = U256::from(9);
-    [sub_fp(mul_fp(nine, a), a_), add_fp(a, mul_fp(nine, a_))]
-}
-
-fn add_fp6(c: Fp6, d: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let [d0, d1, d2] = d;
-
-    let e0 = add_fp2(c0, d0);
-    let e1 = add_fp2(c1, d1);
-    let e2 = add_fp2(c2, d2);
-    [e0, e1, e2]
-}
-
-fn sub_fp6(c: Fp6, d: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let [d0, d1, d2] = d;
-
-    let e0 = sub_fp2(c0, d0);
-    let e1 = sub_fp2(c1, d1);
-    let e2 = sub_fp2(c2, d2);
-    [e0, e1, e2]
-}
-
-fn neg_fp6(a: Fp6) -> Fp6 {
-    sub_fp6(embed_fp6(ZERO), a)
-}
-
-fn mul_fp6(c: Fp6, d: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let [d0, d1, d2] = d;
-
-    let c0d0 = mul_fp2(c0, d0);
-    let c0d1 = mul_fp2(c0, d1);
-    let c0d2 = mul_fp2(c0, d2);
-    let c1d0 = mul_fp2(c1, d0);
-    let c1d1 = mul_fp2(c1, d1);
-    let c1d2 = mul_fp2(c1, d2);
-    let c2d0 = mul_fp2(c2, d0);
-    let c2d1 = mul_fp2(c2, d1);
-    let c2d2 = mul_fp2(c2, d2);
-    let cd12 = add_fp2(c1d2, c2d1);
-
-    [
-        add_fp2(c0d0, i9(cd12)),
-        add3_fp2(c0d1, c1d0, i9(c2d2)),
-        add3_fp2(c0d2, c1d1, c2d0),
-    ]
-}
-
-fn sh(c: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    [i9(c2), c0, c1]
-}
-
-fn sparse_embed(g0: Fp, g1: Fp2, g2: Fp2) -> Fp12 {
-    [
-        [embed_fp2(g0), g1, embed_fp2(ZERO)],
-        [embed_fp2(ZERO), g2, embed_fp2(ZERO)],
-    ]
-}
-
-fn mul_fp12(f: Fp12, g: Fp12) -> Fp12 {
-    let [f0, f1] = f;
-    let [g0, g1] = g;
-
-    let h0 = mul_fp6(f0, g0);
-    let h1 = mul_fp6(f1, g1);
-    let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1));
-    [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
-}
-
-fn frob_t1(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
-                .unwrap(),
-            U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
-                .unwrap(),
-            U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
-                .unwrap(),
-            U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    }
-}
-
-fn frob_t2(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
-                .unwrap(),
-            U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
-                .unwrap(),
-            U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
-                .unwrap(),
-            U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    }
-}
-
-fn frob_z(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
-                .unwrap(),
-            U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
-                .unwrap(),
-            U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
-                .unwrap(),
-            U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
-                .unwrap(),
-        ],
-        6 => [
-            U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        7 => [
-            U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
-                .unwrap(),
-            U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
-                .unwrap(),
-        ],
-        8 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        9 => [
-            U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
-                .unwrap(),
-            U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
-                .unwrap(),
-        ],
-        10 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        11 => [
-            U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
-                .unwrap(),
-            U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    }
-}
-
-fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let _c0 = conj_fp2(c0);
-    let _c1 = conj_fp2(c1);
-    let _c2 = conj_fp2(c2);
-
-    let n = n % 6;
-    let frob_t1 = frob_t1(n);
-    let frob_t2 = frob_t2(n);
-
-    if n % 2 != 0 {
-        [_c0, mul_fp2(frob_t1, _c1), mul_fp2(frob_t2, _c2)]
-    } else {
-        [c0, mul_fp2(frob_t1, c1), mul_fp2(frob_t2, c2)]
-    }
-}
-
-fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
-    let [f0, f1] = f;
-    let scale = embed_fp2_fp6(frob_z(n));
-
-    [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
-}
-
-fn exp_fp(x: Fp, e: U256) -> Fp {
-    let mut current = x;
-    let mut product = U256::one();
-
-    for j in 0..256 {
-        if e.bit(j) {
-            product = U256::try_from(product.full_mul(current) % BN_BASE).unwrap();
-        }
-        current = U256::try_from(current.full_mul(current) % BN_BASE).unwrap();
-    }
-    product
-}
-
-fn inv_fp(x: Fp) -> Fp {
-    exp_fp(x, BN_BASE - 2)
-}
-
-fn inv_fp2(a: Fp2) -> Fp2 {
-    let [a0, a1] = a;
-    let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
-    [mul_fp(norm, a0), neg_fp(mul_fp(norm, a1))]
-}
-
-fn inv_fp6(c: Fp6) -> Fp6 {
-    let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
-    let e = mul_fp6(b, frob_fp6(5, c))[0];
-    let n = mul_fp2(e, conj_fp2(e))[0];
-    let i = inv_fp(n);
-    let d = mul_fp2(embed_fp2(i), e);
-    let [f0, f1, f2] = frob_fp6(1, b);
-    [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
-}
-
-fn inv_fp12(f: Fp12) -> Fp12 {
-    let [f0, f1] = f;
-    let a = mul_fp12(frob_fp12(1, f), frob_fp12(7, f))[0];
-    let b = mul_fp6(a, frob_fp6(2, a));
-    let c = mul_fp6(b, frob_fp6(4, a))[0];
-    let n = mul_fp2(c, conj_fp2(c))[0];
-    let i = inv_fp(n);
-    let d = mul_fp2(embed_fp2(i), c);
-    let [g0, g1, g2] = frob_fp6(1, b);
-    let e = [mul_fp2(d, g0), mul_fp2(d, g1), mul_fp2(d, g2)];
-    [mul_fp6(e, f0), neg_fp6(mul_fp6(e, f1))]
-}
-
-const EXPS4: [(bool, bool, bool); 65] = [
-    (true, true, true),
-    (true, true, false),
-    (true, true, true),
-    (true, true, true),
-    (false, false, false),
-    (false, false, true),
-    (true, false, true),
-    (false, true, false),
-    (true, false, true),
-    (true, true, false),
-    (true, false, true),
-    (false, true, false),
-    (true, true, false),
-    (true, true, false),
-    (true, true, false),
-    (false, true, false),
-    (false, true, false),
-    (false, false, true),
-    (true, false, true),
-    (true, true, false),
-    (false, true, false),
-    (true, true, false),
-    (true, true, false),
-    (true, true, false),
-    (false, false, true),
-    (false, false, true),
-    (true, false, true),
-    (true, false, true),
-    (true, true, false),
-    (true, false, false),
-    (true, true, false),
-    (false, true, false),
-    (true, true, false),
-    (true, false, false),
-    (false, true, false),
-    (false, false, false),
-    (true, false, false),
-    (true, false, false),
-    (true, false, true),
-    (false, false, true),
-    (false, true, true),
-    (false, false, true),
-    (false, true, true),
-    (false, true, true),
-    (false, false, false),
-    (true, true, true),
-    (true, false, true),
-    (true, false, true),
-    (false, true, true),
-    (true, false, true),
-    (false, true, true),
-    (false, true, true),
-    (true, true, false),
-    (true, true, false),
-    (true, true, false),
-    (true, false, false),
-    (false, false, true),
-    (true, false, false),
-    (false, false, true),
-    (true, false, true),
-    (true, true, false),
-    (true, true, true),
-    (false, true, true),
-    (false, true, false),
-    (true, true, true),
-];
-
-const EXPS2: [(bool, bool); 62] = [
-    (true, false),
-    (true, true),
-    (false, false),
-    (true, false),
-    (true, false),
-    (true, true),
-    (true, false),
-    (true, true),
-    (true, false),
-    (false, true),
-    (false, true),
-    (true, true),
-    (true, true),
-    (false, false),
-    (true, true),
-    (false, false),
-    (false, false),
-    (false, true),
-    (false, true),
-    (true, true),
-    (true, true),
-    (true, true),
-    (false, true),
-    (true, true),
-    (false, false),
-    (true, true),
-    (true, false),
-    (true, true),
-    (false, false),
-    (true, true),
-    (true, true),
-    (true, false),
-    (false, false),
-    (false, true),
-    (false, false),
-    (true, true),
-    (false, true),
-    (false, false),
-    (true, false),
-    (false, true),
-    (false, true),
-    (true, false),
-    (false, true),
-    (false, false),
-    (false, false),
-    (false, false),
-    (false, true),
-    (true, false),
-    (true, true),
-    (false, true),
-    (true, true),
-    (true, false),
-    (false, true),
-    (false, false),
-    (true, false),
-    (false, true),
-    (true, false),
-    (true, true),
-    (true, false),
-    (true, true),
-    (false, true),
-    (true, true),
-];
-
-const EXPS0: [bool; 65] = [
-    false, false, true, false, false, true, true, false, true, false, true, true, true, false,
-    true, false, false, false, true, false, false, true, false, true, false, true, true, false,
-    false, false, false, false, true, false, true, false, true, true, true, false, false, true,
-    true, true, true, false, true, false, true, true, false, false, true, false, false, false,
-    true, true, true, true, false, false, true, true, false,
-];
-
-fn fast_exp(f: Fp12) -> Fp12 {
-    let mut sq: Fp12 = f;
-    let mut y0: Fp12 = embed_fp12(U256::one());
-    let mut y2: Fp12 = embed_fp12(U256::one());
-    let mut y4: Fp12 = embed_fp12(U256::one());
-
-    for (a, b, c) in EXPS4 {
-        if a {
-            y4 = mul_fp12(y4, sq);
-        }
-        if b {
-            y2 = mul_fp12(y2, sq);
-        }
-        if c {
-            y0 = mul_fp12(y0, sq);
-        }
-        sq = mul_fp12(sq, sq);
-    }
-    y4 = mul_fp12(y4, sq);
-
-    for (a, b) in EXPS2 {
-        if a {
-            y2 = mul_fp12(y2, sq);
-        }
-        if b {
-            y0 = mul_fp12(y0, sq);
-        }
-        sq = mul_fp12(sq, sq);
-    }
-    y2 = mul_fp12(y2, sq);
-
-    for a in EXPS0 {
-        if a {
-            y0 = mul_fp12(y0, sq);
-        }
-        sq = mul_fp12(sq, sq);
-    }
-    y0 = mul_fp12(y0, sq);
-
-    y0 = inv_fp12(y0);
-
-    y4 = mul_fp12(y4, y2);
-    y4 = mul_fp12(y4, y2);
-    y4 = mul_fp12(y4, y0);
-
-    y4 = frob_fp12(1, y4);
-    y2 = frob_fp12(2, y2);
-
-    mul_fp12(mul_fp12(y4, y2), y0)
+fn fp12_as_stack_output(f: Fp12) -> Vec<U256> {
+    f.into_iter().flatten().flatten().rev().collect()
 }
 
 fn make_mul_stack(
@@ -600,30 +30,18 @@ fn make_mul_stack(
     let in1 = U256::from(in1);
     let out = U256::from(out);
 
-    let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
-    let g: Vec<U256> = g.into_iter().flatten().flatten().collect();
-
     let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
     let mul_dest = U256::from(KERNEL.global_labels[mul_label]);
 
     let mut input = vec![in0];
-    input.extend(f);
+    input.extend(fp12_as_stack_input(f));
     input.extend(vec![in1]);
-    input.extend(g);
+    input.extend(fp12_as_stack_input(g));
     input.extend(vec![mul_dest, in0, in1, out, ret_stack, out]);
     input.reverse();
     input
 }
 
-fn make_mul_expected(f: Fp12, g: Fp12) -> Vec<U256> {
-    mul_fp12(f, g)
-        .into_iter()
-        .flatten()
-        .flatten()
-        .rev()
-        .collect()
-}
-
 #[test]
 fn test_mul_fp12() -> Result<()> {
     let in0 = 64;
@@ -644,9 +62,9 @@ fn test_mul_fp12() -> Result<()> {
     let out_sparse: Vec<U256> = run_interpreter(test_mul, sparse)?.stack().to_vec();
     let out_square: Vec<U256> = run_interpreter(test_mul, square)?.stack().to_vec();
 
-    let exp_normal: Vec<U256> = make_mul_expected(f, g);
-    let exp_sparse: Vec<U256> = make_mul_expected(f, h);
-    let exp_square: Vec<U256> = make_mul_expected(f, f);
+    let exp_normal: Vec<U256> = fp12_as_stack_output(mul_fp12(f, g));
+    let exp_sparse: Vec<U256> = fp12_as_stack_output(mul_fp12(f, h));
+    let exp_square: Vec<U256> = fp12_as_stack_output(mul_fp12(f, f));
 
     assert_eq!(out_normal, exp_normal);
     assert_eq!(out_sparse, exp_sparse);
@@ -655,27 +73,9 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-fn make_frob_stack(f: Fp12) -> Vec<U256> {
-    let ptr = U256::from(100);
-    let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
-    let mut input = vec![ptr];
-    input.extend(f);
-    input.extend(vec![ptr]);
-    input.reverse();
-    input
-}
-
-fn make_frob_expected(n: usize, f: Fp12) -> Vec<U256> {
-    frob_fp12(n, f)
-        .into_iter()
-        .flatten()
-        .flatten()
-        .rev()
-        .collect()
-}
-
 #[test]
 fn test_frob_fp12() -> Result<()> {
+    let ptr = U256::from(100);
     let f: Fp12 = gen_fp12();
 
     let test_frob1 = KERNEL.global_labels["test_frob_fp12_1"];
@@ -683,17 +83,20 @@ fn test_frob_fp12() -> Result<()> {
     let test_frob3 = KERNEL.global_labels["test_frob_fp12_3"];
     let test_frob6 = KERNEL.global_labels["test_frob_fp12_6"];
 
-    let stack = make_frob_stack(f);
+    let mut stack = vec![ptr];
+    stack.extend(fp12_as_stack_input(f));
+    stack.extend(vec![ptr]);
+    stack.reverse();
 
     let out_frob1: Vec<U256> = run_interpreter(test_frob1, stack.clone())?.stack().to_vec();
     let out_frob2: Vec<U256> = run_interpreter(test_frob2, stack.clone())?.stack().to_vec();
     let out_frob3: Vec<U256> = run_interpreter(test_frob3, stack.clone())?.stack().to_vec();
     let out_frob6: Vec<U256> = run_interpreter(test_frob6, stack)?.stack().to_vec();
 
-    let exp_frob1: Vec<U256> = make_frob_expected(1, f);
-    let exp_frob2: Vec<U256> = make_frob_expected(2, f);
-    let exp_frob3: Vec<U256> = make_frob_expected(3, f);
-    let exp_frob6: Vec<U256> = make_frob_expected(6, f);
+    let exp_frob1: Vec<U256> = fp12_as_stack_output(frob_fp12(1, f));
+    let exp_frob2: Vec<U256> = fp12_as_stack_output(frob_fp12(2, f));
+    let exp_frob3: Vec<U256> = fp12_as_stack_output(frob_fp12(3, f));
+    let exp_frob6: Vec<U256> = fp12_as_stack_output(frob_fp12(6, f));
 
     assert_eq!(out_frob1, exp_frob1);
     assert_eq!(out_frob2, exp_frob2);
@@ -703,24 +106,19 @@ fn test_frob_fp12() -> Result<()> {
     Ok(())
 }
 
-fn make_inv_stack(f: Fp12) -> Vec<U256> {
-    let ptr = U256::from(200);
-    let inv = U256::from(300);
-    let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
-
-    let mut input = vec![ptr];
-    input.extend(f);
-    input.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
-    input.reverse();
-    input
-}
-
 #[test]
 fn test_inv_fp12() -> Result<()> {
+    let ptr = U256::from(200);
+    let inv = U256::from(300);
+
     let f: Fp12 = gen_fp12();
 
     let test_inv = KERNEL.global_labels["test_inv_fp12"];
-    let stack = make_inv_stack(f);
+
+    let mut stack = vec![ptr];
+    stack.extend(fp12_as_stack_input(f));
+    stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
+    stack.reverse();
 
     let output: Vec<U256> = run_interpreter(test_inv, stack)?.stack().to_vec();
 
@@ -729,209 +127,200 @@ fn test_inv_fp12() -> Result<()> {
     Ok(())
 }
 
-fn make_pow_stack(f: Fp12) -> Vec<U256> {
-    let ptr = U256::from(300);
-    let out = U256::from(400);
-    let f: Vec<U256> = f.into_iter().flatten().flatten().collect();
-    let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
-
-    let mut input = vec![ptr];
-    input.extend(f);
-    input.extend(vec![ptr, out, ret_stack, out]);
-    input.reverse();
-    input
-}
-
-fn make_pow_expected(f: Fp12) -> Vec<U256> {
-    fast_exp(f).into_iter().flatten().flatten().rev().collect()
-}
-
 #[test]
 fn test_pow_fp12() -> Result<()> {
-    let f: Fp12 = gen_fp12();
-
-    let test_pow = KERNEL.global_labels["test_pow"];
-    let stack = make_pow_stack(f);
-
-    let output: Vec<U256> = run_interpreter(test_pow, stack)?.stack().to_vec();
-    let expected: Vec<U256> = make_pow_expected(f);
-
-    assert_eq!(output, expected);
-
-    Ok(())
-}
-
-fn make_miller_stack(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
     let ptr = U256::from(300);
     let out = U256::from(400);
 
-    let p: Vec<U256> = p.into_iter().collect();
-    let q: Vec<U256> = q.into_iter().flatten().collect();
+    let f: Fp12 = gen_fp12();
 
     let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
+    let test_pow = KERNEL.global_labels["test_pow"];
 
-    let mut input = vec![ptr];
-    input.extend(p);
-    input.extend(q);
-    input.extend(vec![ptr, out, ret_stack]);
-    input.reverse();
-    input
-}
+    let mut stack = vec![ptr];
+    stack.extend(fp12_as_stack_input(f));
+    stack.extend(vec![ptr, out, ret_stack, out]);
+    stack.reverse();
 
-fn store_tangent(p: [Fp; 2], q: [Fp2; 2]) -> Fp12 {
-    let [px, py] = p;
-    let [qx, qy] = q;
-
-    let cx = neg_fp(mul_fp(U256::from(3), mul_fp(px, px)));
-    let cy = mul_fp(U256::from(2), py);
-
-    sparse_embed(
-        sub_fp(mul_fp(py, py), U256::from(9)),
-        mul_fp2(embed_fp2(cx), qx),
-        mul_fp2(embed_fp2(cy), qy),
-    )
-}
-
-fn store_cord(p1: [Fp; 2], p2: [Fp; 2], q: [Fp2; 2]) -> Fp12 {
-    let [p1x, p1y] = p1;
-    let [p2x, p2y] = p2;
-    let [qx, qy] = q;
-
-    let cx = sub_fp(p2y,p1y);
-    let cy = sub_fp(p1x,p2x);
-
-    sparse_embed(
-        sub_fp(mul_fp(p1y, p2x), mul_fp(p2y, p1x)),
-        mul_fp2(embed_fp2(cx), qx),
-        mul_fp2(embed_fp2(cy), qy),
-    )
-}
-
-fn make_tan_stack(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
-    let p: Vec<U256> = p.into_iter().collect();
-    let q: Vec<U256> = q.into_iter().flatten().collect();
-
-    let mut input = p;
-    input.extend(q);
-    input.reverse();
-    input
-}
-
-fn make_tan_expected(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
-    store_tangent(p, q)
-        .into_iter()
-        .flatten()
-        .flatten()
-        .rev()
-        .collect()
-}
-
-#[test]
-fn test_store_tangent() -> Result<()> {
-    let p = [gen_fp(), gen_fp()];
-    let q = [[gen_fp(), gen_fp()], [gen_fp(), gen_fp()]];
-
-    let expected = make_tan_expected(p, q);
-
-    let stack = make_tan_stack(p, q);
-    let test_tan = KERNEL.global_labels["test_store_tangent"];
-
-    let output: Vec<U256> = run_interpreter(test_tan, stack)?.stack().to_vec();
+    let output: Vec<U256> = run_interpreter(test_pow, stack)?.stack().to_vec();
+    let expected: Vec<U256> = fp12_as_stack_output(power(f));
 
     assert_eq!(output, expected);
 
     Ok(())
 }
 
-fn make_cord_stack(p1: [Fp; 2], p2: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
-    let p1: Vec<U256> = p1.into_iter().collect();
-    let p2: Vec<U256> = p2.into_iter().collect();
-    let q: Vec<U256> = q.into_iter().flatten().collect();
+// fn make_miller_stack(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
+//     let ptr = U256::from(300);
+//     let out = U256::from(400);
 
-    let mut input = p1;
-    input.extend(p2);
-    input.extend(q);
-    input.reverse();
-    input
-}
+//     let p: Vec<U256> = p.into_iter().collect();
+//     let q: Vec<U256> = q.into_iter().flatten().collect();
 
-fn make_cord_expected(p1: [Fp; 2],p2: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
-    store_cord(p1, p2, q)
-        .into_iter()
-        .flatten()
-        .flatten()
-        .rev()
-        .collect()
-}
+//     let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
 
-#[test]
-fn test_store_cord() -> Result<()> {
-    let p1 = [gen_fp(), gen_fp()];
-    let p2 = [gen_fp(), gen_fp()];
-    let q = [[gen_fp(), gen_fp()], [gen_fp(), gen_fp()]];
+//     let mut input = vec![ptr];
+//     input.extend(p);
+//     input.extend(q);
+//     input.extend(vec![ptr, out, ret_stack]);
+//     input.reverse();
+//     input
+// }
 
-    let expected = make_cord_expected(p1, p2, q);
+// fn store_tangent(p: [Fp; 2], q: [Fp2; 2]) -> Fp12 {
+//     let [px, py] = p;
+//     let [qx, qy] = q;
 
-    let stack = make_cord_stack(p1, p2, q);
-    let test_cord = KERNEL.global_labels["test_store_cord"];
+//     let cx = neg_fp(mul_fp(U256::from(3), mul_fp(px, px)));
+//     let cy = mul_fp(U256::from(2), py);
 
-    let output: Vec<U256> = run_interpreter(test_cord, stack)?.stack().to_vec();
+//     sparse_embed(
+//         sub_fp(mul_fp(py, py), U256::from(9)),
+//         mul_fp2(embed_fp2(cx), qx),
+//         mul_fp2(embed_fp2(cy), qy),
+//     )
+// }
 
-    assert_eq!(output, expected);
+// fn store_cord(p1: [Fp; 2], p2: [Fp; 2], q: [Fp2; 2]) -> Fp12 {
+//     let [p1x, p1y] = p1;
+//     let [p2x, p2y] = p2;
+//     let [qx, qy] = q;
 
-    Ok(())
-}
+//     let cx = sub_fp(p2y, p1y);
+//     let cy = sub_fp(p1x, p2x);
 
-#[test]
-fn test_miller() -> Result<()> {
-    let p = [U256::from(1), U256::from(2)];
-    let q = [
-        [
-            U256::from_str("0x1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed")
-                .unwrap(),
-            U256::from_str("0x198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2")
-                .unwrap(),
-        ],
-        [
-            U256::from_str("0x12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa")
-                .unwrap(),
-            U256::from_str("0x90689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b")
-                .unwrap(),
-        ],
-    ];
+//     sparse_embed(
+//         sub_fp(mul_fp(p1y, p2x), mul_fp(p2y, p1x)),
+//         mul_fp2(embed_fp2(cx), qx),
+//         mul_fp2(embed_fp2(cy), qy),
+//     )
+// }
 
-    let test_mill = KERNEL.global_labels["test_miller"];
-    let stack = make_miller_stack(p, q);
+// fn make_tan_stack(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
+//     let p: Vec<U256> = p.into_iter().collect();
+//     let q: Vec<U256> = q.into_iter().flatten().collect();
 
-    let output: Vec<U256> = run_interpreter(test_mill, stack)?.stack().to_vec();
-    let mut expected: Vec<U256> = vec![
-        U256::from_str("0xbf4dbb7e41fb58122aa29dcced57731d7cbb49b1fe9a73cb13416e1002376da")
-            .unwrap(),
-        U256::from_str("0x110b019c149b43a7fbd6d42d7553debcbebd35c148f63aaecf72a5fbda451ac6")
-            .unwrap(),
-        U256::from_str("0x27225e97ee6c877964c8f32e0b54e61ead09c3e818174cd8b5beabe7cd7385e8")
-            .unwrap(),
-        U256::from_str("0x5762cb6648b4b4c5df8a8874a21d937adf185d91f34e8ccf58f5b39196db02").unwrap(),
-        U256::from_str("0x463002dc1a426b172f4a1e29486fc11eba01de99b559368139c8ef5271eb37f")
-            .unwrap(),
-        U256::from_str("0x753dcc72acdffcc45633803f1b555388969dd7c27d2a674a23a228f522480d9")
-            .unwrap(),
-        U256::from_str("0xd32a892d29151553101376a6638938135e30126f698a40a73f20c6ac64a4585")
-            .unwrap(),
-        U256::from_str("0x290afd3e28c223a624d9f5a737f9f9e4b4200b518333844d81acc445fa5910da")
-            .unwrap(),
-        U256::from_str("0x262e0ee72a8123b741dc113b8e2d207ee8bad011e0f6ae2015439960c789cf78")
-            .unwrap(),
-        U256::from_str("0x1588e0b23d868d7517e3021e620c69eb1521a49faa9bfcd4cf3a54127d4d14cb")
-            .unwrap(),
-        U256::from_str("0x1c23a135a7dfa96db62622c5fef4b9751d121523dd39ca1cefeacb3419835a53")
-            .unwrap(),
-        U256::from_str("0x2caeb873076ec8f37fa7af265d2966dd0024acbc63bd2b21f323084fc71f4a59")
-            .unwrap(),
-    ];
-    expected.reverse();
+//     let mut input = p;
+//     input.extend(q);
+//     input.reverse();
+//     input
+// }
 
-    assert_eq!(output, expected);
+// fn make_tan_expected(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
+//     store_tangent(p, q)
+//         .into_iter()
+//         .flatten()
+//         .flatten()
+//         .rev()
+//         .collect()
+// }
 
-    Ok(())
-}
+// #[test]
+// fn test_store_tangent() -> Result<()> {
+//     let p = [gen_fp(), gen_fp()];
+//     let q = [[gen_fp(), gen_fp()], [gen_fp(), gen_fp()]];
+
+//     let expected = make_tan_expected(p, q);
+
+//     let stack = make_tan_stack(p, q);
+//     let test_tan = KERNEL.global_labels["test_store_tangent"];
+
+//     let output: Vec<U256> = run_interpreter(test_tan, stack)?.stack().to_vec();
+
+//     assert_eq!(output, expected);
+
+//     Ok(())
+// }
+
+// fn make_cord_stack(p1: [Fp; 2], p2: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
+//     let p1: Vec<U256> = p1.into_iter().collect();
+//     let p2: Vec<U256> = p2.into_iter().collect();
+//     let q: Vec<U256> = q.into_iter().flatten().collect();
+
+//     let mut input = p1;
+//     input.extend(p2);
+//     input.extend(q);
+//     input.reverse();
+//     input
+// }
+
+// fn make_cord_expected(p1: [Fp; 2], p2: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
+//     store_cord(p1, p2, q)
+//         .into_iter()
+//         .flatten()
+//         .flatten()
+//         .rev()
+//         .collect()
+// }
+
+// #[test]
+// fn test_store_cord() -> Result<()> {
+//     let p1 = gen_curve_point();
+//     let p2 = gen_curve_point();
+//     let q = gen_twisted_curve_point();
+
+//     let expected = make_cord_expected(p1, p2, q);
+
+//     let stack = make_cord_stack(p1, p2, q);
+//     let test_cord = KERNEL.global_labels["test_store_cord"];
+
+//     let output: Vec<U256> = run_interpreter(test_cord, stack)?.stack().to_vec();
+
+//     assert_eq!(output, expected);
+
+//     Ok(())
+// }
+
+// #[test]
+// fn test_miller() -> Result<()> {
+//     let p = [U256::from(1), U256::from(2)];
+//     let q = [
+//         [
+//             U256::from_str("0x1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed")
+//                 .unwrap(),
+//             U256::from_str("0x198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2")
+//                 .unwrap(),
+//         ],
+//         [
+//             U256::from_str("0x12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa")
+//                 .unwrap(),
+//             U256::from_str("0x90689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b")
+//                 .unwrap(),
+//         ],
+//     ];
+
+//     let test_mill = KERNEL.global_labels["test_miller"];
+//     let stack = make_miller_stack(p, q);
+
+//     let output: Vec<U256> = run_interpreter(test_mill, stack)?.stack().to_vec();
+//     let mut expected: Vec<U256> = vec![
+//         U256::from_str("0xbf4dbb7e41fb58122aa29dcced57731d7cbb49b1fe9a73cb13416e1002376da")
+//             .unwrap(),
+//         U256::from_str("0x110b019c149b43a7fbd6d42d7553debcbebd35c148f63aaecf72a5fbda451ac6")
+//             .unwrap(),
+//         U256::from_str("0x27225e97ee6c877964c8f32e0b54e61ead09c3e818174cd8b5beabe7cd7385e8")
+//             .unwrap(),
+//         U256::from_str("0x5762cb6648b4b4c5df8a8874a21d937adf185d91f34e8ccf58f5b39196db02").unwrap(),
+//         U256::from_str("0x463002dc1a426b172f4a1e29486fc11eba01de99b559368139c8ef5271eb37f")
+//             .unwrap(),
+//         U256::from_str("0x753dcc72acdffcc45633803f1b555388969dd7c27d2a674a23a228f522480d9")
+//             .unwrap(),
+//         U256::from_str("0xd32a892d29151553101376a6638938135e30126f698a40a73f20c6ac64a4585")
+//             .unwrap(),
+//         U256::from_str("0x290afd3e28c223a624d9f5a737f9f9e4b4200b518333844d81acc445fa5910da")
+//             .unwrap(),
+//         U256::from_str("0x262e0ee72a8123b741dc113b8e2d207ee8bad011e0f6ae2015439960c789cf78")
+//             .unwrap(),
+//         U256::from_str("0x1588e0b23d868d7517e3021e620c69eb1521a49faa9bfcd4cf3a54127d4d14cb")
+//             .unwrap(),
+//         U256::from_str("0x1c23a135a7dfa96db62622c5fef4b9751d121523dd39ca1cefeacb3419835a53")
+//             .unwrap(),
+//         U256::from_str("0x2caeb873076ec8f37fa7af265d2966dd0024acbc63bd2b21f323084fc71f4a59")
+//             .unwrap(),
+//     ];
+//     expected.reverse();
+
+//     assert_eq!(output, expected);
+
+//     Ok(())
+// }
diff --git a/evm/src/lib.rs b/evm/src/lib.rs
index 4c368491..7ac3e1e7 100644
--- a/evm/src/lib.rs
+++ b/evm/src/lib.rs
@@ -8,6 +8,7 @@
 
 pub mod all_stark;
 pub mod arithmetic;
+pub mod bn254;
 pub mod config;
 pub mod constraint_consumer;
 pub mod cpu;

From 7af11f434078226cb3d880ddadf2212cfbdd3c78 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 21 Dec 2022 14:55:07 -0800
Subject: [PATCH 078/201] clean up prover code

---
 evm/src/bn254.rs                   |   2 +-
 evm/src/generation/prover_input.rs | 376 +----------------------------
 2 files changed, 3 insertions(+), 375 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 631065dd..a86394e6 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -254,7 +254,7 @@ fn inv_fp6(c: Fp6) -> Fp6 {
     [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
 }
 
-fn inv_fp12(f: Fp12) -> Fp12 {
+pub fn inv_fp12(f: Fp12) -> Fp12 {
     let [f0, f1] = f;
     let a = mul_fp12(frob_fp12(1, f), frob_fp12(7, f))[0];
     let b = mul_fp6(a, frob_fp6(2, a));
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index d8b24432..3fa5a0ed 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -1,5 +1,7 @@
 use std::str::FromStr;
 
+use crate::bn254::inv_fp12;
+
 use ethereum_types::{BigEndianHash, H256, U256};
 use plonky2::field::types::Field;
 
@@ -324,377 +326,3 @@ fn modexp(x: U256, e: U256, n: U256) -> U256 {
     }
     product
 }
-
-type Fp = U256;
-type Fp2 = [U256; 2];
-type Fp6 = [Fp2; 3];
-type Fp12 = [Fp6; 2];
-
-const ZERO: Fp = U256([0, 0, 0, 0]);
-
-const BN_BASE: U256 = U256([
-    4332616871279656263,
-    10917124144477883021,
-    13281191951274694749,
-    3486998266802970665,
-]);
-
-fn embed_fp2(x: Fp) -> Fp2 {
-    [x, ZERO]
-}
-
-fn embed_fp2_fp6(a: Fp2) -> Fp6 {
-    [a, embed_fp2(ZERO), embed_fp2(ZERO)]
-}
-
-fn embed_fp6(x: Fp) -> Fp6 {
-    embed_fp2_fp6(embed_fp2(x))
-}
-
-fn embed_fp12(x: Fp) -> Fp12 {
-    [embed_fp6(x), embed_fp6(ZERO)]
-}
-
-fn add_fp(x: Fp, y: Fp) -> Fp {
-    (x + y) % BN_BASE
-}
-
-fn add3_fp(x: Fp, y: Fp, z: Fp) -> Fp {
-    (x + y + z) % BN_BASE
-}
-
-fn mul_fp(x: Fp, y: Fp) -> Fp {
-    U256::try_from(x.full_mul(y) % BN_BASE).unwrap()
-}
-
-fn sub_fp(x: Fp, y: Fp) -> Fp {
-    (BN_BASE + x - y) % BN_BASE
-}
-
-fn neg_fp(x: Fp) -> Fp {
-    (BN_BASE - x) % BN_BASE
-}
-
-fn conj_fp2(a: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    [a, neg_fp(a_)]
-}
-
-fn add_fp2(a: Fp2, b: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [add_fp(a, b), add_fp(a_, b_)]
-}
-
-fn add3_fp2(a: Fp2, b: Fp2, c: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    let [c, c_] = c;
-    [add3_fp(a, b, c), add3_fp(a_, b_, c_)]
-}
-
-fn sub_fp2(a: Fp2, b: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [sub_fp(a, b), sub_fp(a_, b_)]
-}
-
-fn neg_fp2(a: Fp2) -> Fp2 {
-    sub_fp2(embed_fp2(ZERO), a)
-}
-
-fn mul_fp2(a: Fp2, b: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [
-        sub_fp(mul_fp(a, b), mul_fp(a_, b_)),
-        add_fp(mul_fp(a, b_), mul_fp(a_, b)),
-    ]
-}
-
-fn i9(a: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let nine = U256::from(9);
-    [sub_fp(mul_fp(nine, a), a_), add_fp(a, mul_fp(nine, a_))]
-}
-
-fn add_fp6(c: Fp6, d: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let [d0, d1, d2] = d;
-
-    let e0 = add_fp2(c0, d0);
-    let e1 = add_fp2(c1, d1);
-    let e2 = add_fp2(c2, d2);
-    [e0, e1, e2]
-}
-
-fn sub_fp6(c: Fp6, d: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let [d0, d1, d2] = d;
-
-    let e0 = sub_fp2(c0, d0);
-    let e1 = sub_fp2(c1, d1);
-    let e2 = sub_fp2(c2, d2);
-    [e0, e1, e2]
-}
-
-fn neg_fp6(a: Fp6) -> Fp6 {
-    sub_fp6(embed_fp6(ZERO), a)
-}
-
-fn mul_fp6(c: Fp6, d: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let [d0, d1, d2] = d;
-
-    let c0d0 = mul_fp2(c0, d0);
-    let c0d1 = mul_fp2(c0, d1);
-    let c0d2 = mul_fp2(c0, d2);
-    let c1d0 = mul_fp2(c1, d0);
-    let c1d1 = mul_fp2(c1, d1);
-    let c1d2 = mul_fp2(c1, d2);
-    let c2d0 = mul_fp2(c2, d0);
-    let c2d1 = mul_fp2(c2, d1);
-    let c2d2 = mul_fp2(c2, d2);
-    let cd12 = add_fp2(c1d2, c2d1);
-
-    [
-        add_fp2(c0d0, i9(cd12)),
-        add3_fp2(c0d1, c1d0, i9(c2d2)),
-        add3_fp2(c0d2, c1d1, c2d0),
-    ]
-}
-
-fn sh(c: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    [i9(c2), c0, c1]
-}
-
-fn sparse_embed(x: [U256; 5]) -> Fp12 {
-    let [g0, g1, g1_, g2, g2_] = x;
-    [
-        [embed_fp2(g0), [g1, g1_], embed_fp2(ZERO)],
-        [embed_fp2(ZERO), [g2, g2_], embed_fp2(ZERO)],
-    ]
-}
-
-fn mul_fp12(f: Fp12, g: Fp12) -> Fp12 {
-    let [f0, f1] = f;
-    let [g0, g1] = g;
-
-    let h0 = mul_fp6(f0, g0);
-    let h1 = mul_fp6(f1, g1);
-    let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1));
-    [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
-}
-
-fn frob_t1(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
-                .unwrap(),
-            U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
-                .unwrap(),
-            U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
-                .unwrap(),
-            U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    }
-}
-
-fn frob_t2(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
-                .unwrap(),
-            U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
-                .unwrap(),
-            U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
-                .unwrap(),
-            U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    }
-}
-
-fn frob_z(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
-                .unwrap(),
-            U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
-                .unwrap(),
-            U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
-                .unwrap(),
-            U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
-                .unwrap(),
-        ],
-        6 => [
-            U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        7 => [
-            U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
-                .unwrap(),
-            U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
-                .unwrap(),
-        ],
-        8 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        9 => [
-            U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
-                .unwrap(),
-            U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
-                .unwrap(),
-        ],
-        10 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        11 => [
-            U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
-                .unwrap(),
-            U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    }
-}
-
-fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let _c0 = conj_fp2(c0);
-    let _c1 = conj_fp2(c1);
-    let _c2 = conj_fp2(c2);
-
-    let n = n % 6;
-    let frob_t1 = frob_t1(n);
-    let frob_t2 = frob_t2(n);
-
-    if n % 2 != 0 {
-        [_c0, mul_fp2(frob_t1, _c1), mul_fp2(frob_t2, _c2)]
-    } else {
-        [c0, mul_fp2(frob_t1, c1), mul_fp2(frob_t2, c2)]
-    }
-}
-
-fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
-    let [f0, f1] = f;
-    let scale = embed_fp2_fp6(frob_z(n));
-
-    [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
-}
-
-fn exp_fp(x: Fp, e: U256) -> Fp {
-    let mut current = x;
-    let mut product = U256::one();
-
-    for j in 0..256 {
-        if e.bit(j) {
-            product = U256::try_from(product.full_mul(current) % BN_BASE).unwrap();
-        }
-        current = U256::try_from(current.full_mul(current) % BN_BASE).unwrap();
-    }
-    product
-}
-
-fn inv_fp(x: Fp) -> Fp {
-    exp_fp(x, BN_BASE - 2)
-}
-
-fn inv_fp2(a: Fp2) -> Fp2 {
-    let [a0, a1] = a;
-    let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
-    [mul_fp(norm, a0), neg_fp(mul_fp(norm, a1))]
-}
-
-fn inv_fp6(c: Fp6) -> Fp6 {
-    let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
-    let e = mul_fp6(b, frob_fp6(5, c))[0];
-    let n = mul_fp2(e, conj_fp2(e))[0];
-    let i = inv_fp(n);
-    let d = mul_fp2(embed_fp2(i), e);
-    let [f0, f1, f2] = frob_fp6(1, b);
-    [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
-}
-
-fn inv_fp12(f: Fp12) -> Fp12 {
-    let a = mul_fp12(frob_fp12(1, f), frob_fp12(7, f))[0];
-    let b = mul_fp6(a, frob_fp6(2, a));
-    let c = mul_fp6(b, frob_fp6(4, a))[0];
-    let n = mul_fp2(c, conj_fp2(c))[0];
-    let i = inv_fp(n);
-    let d = mul_fp2(embed_fp2(i), c);
-    let [g0, g1, g2] = frob_fp6(1, b);
-    let e = [mul_fp2(d, g0), mul_fp2(d, g1), mul_fp2(d, g2)];
-    [mul_fp6(e, f[0]), neg_fp6(mul_fp6(e, f[1]))]
-}

From 89093b4df5ee516e9b1151865c468675dabe13db Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 22 Dec 2022 15:10:29 -0800
Subject: [PATCH 079/201] clean up

---
 evm/src/bn254.rs                   |  58 +++++++++
 evm/src/cpu/kernel/interpreter.rs  |   1 -
 evm/src/cpu/kernel/tests/bn254.rs  | 189 ++++++++++-------------------
 evm/src/generation/prover_input.rs |  29 +----
 4 files changed, 126 insertions(+), 151 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index a86394e6..5c2e34fa 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -15,6 +15,35 @@ pub type Fp2 = [U256; 2];
 pub type Fp6 = [Fp2; 3];
 pub type Fp12 = [Fp6; 2];
 
+pub fn fp12_to_vec(f: Fp12) -> Vec<U256> {
+    f.into_iter().flatten().flatten().collect()
+}
+
+pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
+    let [[[f0, f1], [f2, f3], [f4, f5]], [[f6, f7], [f8, f9], [f10, f11]]] = f;
+    [f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11]
+}
+
+pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
+    let f0 = xs.clone().into_iter().nth(0).unwrap();
+    let f1 = xs.clone().into_iter().nth(1).unwrap();
+    let f2 = xs.clone().into_iter().nth(2).unwrap();
+    let f3 = xs.clone().into_iter().nth(3).unwrap();
+    let f4 = xs.clone().into_iter().nth(4).unwrap();
+    let f5 = xs.clone().into_iter().nth(5).unwrap();
+    let f6 = xs.clone().into_iter().nth(6).unwrap();
+    let f7 = xs.clone().into_iter().nth(7).unwrap();
+    let f8 = xs.clone().into_iter().nth(8).unwrap();
+    let f9 = xs.clone().into_iter().nth(9).unwrap();
+    let f10 = xs.clone().into_iter().nth(10).unwrap();
+    let f11 = xs.clone().into_iter().nth(11).unwrap();
+
+    [
+        [[f0, f1], [f2, f3], [f4, f5]],
+        [[f6, f7], [f8, f9], [f10, f11]],
+    ]
+}
+
 pub type Curve = [Fp; 2];
 pub type TwistedCurve = [Fp2; 2];
 
@@ -601,3 +630,32 @@ const EXPS0: [bool; 65] = [
     true, true, true, false, true, false, true, true, false, false, true, false, false, false,
     true, true, true, true, false, false, true, true, false,
 ];
+
+pub fn store_tangent(p: Curve, q: TwistedCurve) -> Fp12 {
+    let [px, py] = p;
+    let [qx, qy] = q;
+
+    let cx = neg_fp(mul_fp(U256::from(3), mul_fp(px, px)));
+    let cy = mul_fp(U256::from(2), py);
+
+    sparse_embed(
+        sub_fp(mul_fp(py, py), U256::from(9)),
+        mul_fp2(embed_fp2(cx), qx),
+        mul_fp2(embed_fp2(cy), qy),
+    )
+}
+
+pub fn store_cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
+    let [p1x, p1y] = p1;
+    let [p2x, p2y] = p2;
+    let [qx, qy] = q;
+
+    let cx = sub_fp(p2y, p1y);
+    let cy = sub_fp(p1x, p2x);
+
+    sparse_embed(
+        sub_fp(mul_fp(p1y, p2x), mul_fp(p2y, p1x)),
+        mul_fp2(embed_fp2(cx), qx),
+        mul_fp2(embed_fp2(cy), qy),
+    )
+}
diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 580adeb7..40fd7dbc 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -8,7 +8,6 @@ use keccak_hash::keccak;
 use plonky2::field::goldilocks_field::GoldilocksField;
 
 use crate::bn254::BN_BASE;
-
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::constants::context_metadata::ContextMetadata;
 use crate::cpu::kernel::constants::global_metadata::GlobalMetadata;
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 0f402c25..945925a7 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -4,17 +4,13 @@ use anyhow::Result;
 use ethereum_types::U256;
 
 use crate::bn254::{
-    frob_fp12, gen_curve_point, gen_fp12, gen_fp12_sparse, gen_twisted_curve_point, mul_fp12,
-    power, Fp, Fp12, Fp2,
+    fp12_to_vec, frob_fp12, gen_curve_point, gen_fp12, gen_fp12_sparse, gen_twisted_curve_point,
+    mul_fp12, power, store_cord, store_tangent, Curve, Fp12, TwistedCurve,
 };
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::run_interpreter;
 
-fn fp12_as_stack_input(f: Fp12) -> Vec<U256> {
-    f.into_iter().flatten().flatten().collect()
-}
-
-fn fp12_as_stack_output(f: Fp12) -> Vec<U256> {
+fn fp12_as_stack(f: Fp12) -> Vec<U256> {
     f.into_iter().flatten().flatten().rev().collect()
 }
 
@@ -34,9 +30,9 @@ fn make_mul_stack(
     let mul_dest = U256::from(KERNEL.global_labels[mul_label]);
 
     let mut input = vec![in0];
-    input.extend(fp12_as_stack_input(f));
+    input.extend(fp12_to_vec(f));
     input.extend(vec![in1]);
-    input.extend(fp12_as_stack_input(g));
+    input.extend(fp12_to_vec(g));
     input.extend(vec![mul_dest, in0, in1, out, ret_stack, out]);
     input.reverse();
     input
@@ -62,9 +58,9 @@ fn test_mul_fp12() -> Result<()> {
     let out_sparse: Vec<U256> = run_interpreter(test_mul, sparse)?.stack().to_vec();
     let out_square: Vec<U256> = run_interpreter(test_mul, square)?.stack().to_vec();
 
-    let exp_normal: Vec<U256> = fp12_as_stack_output(mul_fp12(f, g));
-    let exp_sparse: Vec<U256> = fp12_as_stack_output(mul_fp12(f, h));
-    let exp_square: Vec<U256> = fp12_as_stack_output(mul_fp12(f, f));
+    let exp_normal: Vec<U256> = fp12_as_stack(mul_fp12(f, g));
+    let exp_sparse: Vec<U256> = fp12_as_stack(mul_fp12(f, h));
+    let exp_square: Vec<U256> = fp12_as_stack(mul_fp12(f, f));
 
     assert_eq!(out_normal, exp_normal);
     assert_eq!(out_sparse, exp_sparse);
@@ -84,7 +80,7 @@ fn test_frob_fp12() -> Result<()> {
     let test_frob6 = KERNEL.global_labels["test_frob_fp12_6"];
 
     let mut stack = vec![ptr];
-    stack.extend(fp12_as_stack_input(f));
+    stack.extend(fp12_to_vec(f));
     stack.extend(vec![ptr]);
     stack.reverse();
 
@@ -93,10 +89,10 @@ fn test_frob_fp12() -> Result<()> {
     let out_frob3: Vec<U256> = run_interpreter(test_frob3, stack.clone())?.stack().to_vec();
     let out_frob6: Vec<U256> = run_interpreter(test_frob6, stack)?.stack().to_vec();
 
-    let exp_frob1: Vec<U256> = fp12_as_stack_output(frob_fp12(1, f));
-    let exp_frob2: Vec<U256> = fp12_as_stack_output(frob_fp12(2, f));
-    let exp_frob3: Vec<U256> = fp12_as_stack_output(frob_fp12(3, f));
-    let exp_frob6: Vec<U256> = fp12_as_stack_output(frob_fp12(6, f));
+    let exp_frob1: Vec<U256> = fp12_as_stack(frob_fp12(1, f));
+    let exp_frob2: Vec<U256> = fp12_as_stack(frob_fp12(2, f));
+    let exp_frob3: Vec<U256> = fp12_as_stack(frob_fp12(3, f));
+    let exp_frob6: Vec<U256> = fp12_as_stack(frob_fp12(6, f));
 
     assert_eq!(out_frob1, exp_frob1);
     assert_eq!(out_frob2, exp_frob2);
@@ -116,7 +112,7 @@ fn test_inv_fp12() -> Result<()> {
     let test_inv = KERNEL.global_labels["test_inv_fp12"];
 
     let mut stack = vec![ptr];
-    stack.extend(fp12_as_stack_input(f));
+    stack.extend(fp12_to_vec(f));
     stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
     stack.reverse();
 
@@ -138,12 +134,61 @@ fn test_pow_fp12() -> Result<()> {
     let test_pow = KERNEL.global_labels["test_pow"];
 
     let mut stack = vec![ptr];
-    stack.extend(fp12_as_stack_input(f));
+    stack.extend(fp12_to_vec(f));
     stack.extend(vec![ptr, out, ret_stack, out]);
     stack.reverse();
 
     let output: Vec<U256> = run_interpreter(test_pow, stack)?.stack().to_vec();
-    let expected: Vec<U256> = fp12_as_stack_output(power(f));
+    let expected: Vec<U256> = fp12_as_stack(power(f));
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
+
+#[test]
+fn test_store_tangent() -> Result<()> {
+    let p: Curve = gen_curve_point();
+    let q: TwistedCurve = gen_twisted_curve_point();
+
+    let p_: Vec<U256> = p.into_iter().collect();
+    let q_: Vec<U256> = q.into_iter().flatten().collect();
+
+    let test_tan = KERNEL.global_labels["test_store_tangent"];
+
+    let mut stack = p_;
+    stack.extend(q_);
+    stack.reverse();
+
+    let output: Vec<U256> = run_interpreter(test_tan, stack)?.stack().to_vec();
+
+    let expected = fp12_as_stack(store_tangent(p, q));
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
+
+#[test]
+fn test_store_cord() -> Result<()> {
+    let p1: Curve = gen_curve_point();
+    let p2: Curve = gen_curve_point();
+    let q: TwistedCurve = gen_twisted_curve_point();
+
+    let p1_: Vec<U256> = p1.into_iter().collect();
+    let p2_: Vec<U256> = p2.into_iter().collect();
+    let q_: Vec<U256> = q.into_iter().flatten().collect();
+
+    let mut stack = p1_;
+    stack.extend(p2_);
+    stack.extend(q_);
+    stack.reverse();
+
+    let test_cord = KERNEL.global_labels["test_store_cord"];
+
+    let output: Vec<U256> = run_interpreter(test_cord, stack)?.stack().to_vec();
+
+    let expected = fp12_as_stack(store_cord(p1, p2, q));
 
     assert_eq!(output, expected);
 
@@ -167,110 +212,6 @@ fn test_pow_fp12() -> Result<()> {
 //     input
 // }
 
-// fn store_tangent(p: [Fp; 2], q: [Fp2; 2]) -> Fp12 {
-//     let [px, py] = p;
-//     let [qx, qy] = q;
-
-//     let cx = neg_fp(mul_fp(U256::from(3), mul_fp(px, px)));
-//     let cy = mul_fp(U256::from(2), py);
-
-//     sparse_embed(
-//         sub_fp(mul_fp(py, py), U256::from(9)),
-//         mul_fp2(embed_fp2(cx), qx),
-//         mul_fp2(embed_fp2(cy), qy),
-//     )
-// }
-
-// fn store_cord(p1: [Fp; 2], p2: [Fp; 2], q: [Fp2; 2]) -> Fp12 {
-//     let [p1x, p1y] = p1;
-//     let [p2x, p2y] = p2;
-//     let [qx, qy] = q;
-
-//     let cx = sub_fp(p2y, p1y);
-//     let cy = sub_fp(p1x, p2x);
-
-//     sparse_embed(
-//         sub_fp(mul_fp(p1y, p2x), mul_fp(p2y, p1x)),
-//         mul_fp2(embed_fp2(cx), qx),
-//         mul_fp2(embed_fp2(cy), qy),
-//     )
-// }
-
-// fn make_tan_stack(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
-//     let p: Vec<U256> = p.into_iter().collect();
-//     let q: Vec<U256> = q.into_iter().flatten().collect();
-
-//     let mut input = p;
-//     input.extend(q);
-//     input.reverse();
-//     input
-// }
-
-// fn make_tan_expected(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
-//     store_tangent(p, q)
-//         .into_iter()
-//         .flatten()
-//         .flatten()
-//         .rev()
-//         .collect()
-// }
-
-// #[test]
-// fn test_store_tangent() -> Result<()> {
-//     let p = [gen_fp(), gen_fp()];
-//     let q = [[gen_fp(), gen_fp()], [gen_fp(), gen_fp()]];
-
-//     let expected = make_tan_expected(p, q);
-
-//     let stack = make_tan_stack(p, q);
-//     let test_tan = KERNEL.global_labels["test_store_tangent"];
-
-//     let output: Vec<U256> = run_interpreter(test_tan, stack)?.stack().to_vec();
-
-//     assert_eq!(output, expected);
-
-//     Ok(())
-// }
-
-// fn make_cord_stack(p1: [Fp; 2], p2: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
-//     let p1: Vec<U256> = p1.into_iter().collect();
-//     let p2: Vec<U256> = p2.into_iter().collect();
-//     let q: Vec<U256> = q.into_iter().flatten().collect();
-
-//     let mut input = p1;
-//     input.extend(p2);
-//     input.extend(q);
-//     input.reverse();
-//     input
-// }
-
-// fn make_cord_expected(p1: [Fp; 2], p2: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
-//     store_cord(p1, p2, q)
-//         .into_iter()
-//         .flatten()
-//         .flatten()
-//         .rev()
-//         .collect()
-// }
-
-// #[test]
-// fn test_store_cord() -> Result<()> {
-//     let p1 = gen_curve_point();
-//     let p2 = gen_curve_point();
-//     let q = gen_twisted_curve_point();
-
-//     let expected = make_cord_expected(p1, p2, q);
-
-//     let stack = make_cord_stack(p1, p2, q);
-//     let test_cord = KERNEL.global_labels["test_store_cord"];
-
-//     let output: Vec<U256> = run_interpreter(test_cord, stack)?.stack().to_vec();
-
-//     assert_eq!(output, expected);
-
-//     Ok(())
-// }
-
 // #[test]
 // fn test_miller() -> Result<()> {
 //     let p = [U256::from(1), U256::from(2)];
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 3fa5a0ed..fdce266d 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -1,10 +1,9 @@
 use std::str::FromStr;
 
-use crate::bn254::inv_fp12;
-
 use ethereum_types::{BigEndianHash, H256, U256};
 use plonky2::field::types::Field;
 
+use crate::bn254::{fp12_to_array, inv_fp12, vec_to_fp12};
 use crate::generation::prover_input::EvmField::{
     Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar,
 };
@@ -239,30 +238,8 @@ impl EvmField {
     }
 
     fn ext_inv(&self, xs: Vec<U256>, offset: usize) -> [U256; 12] {
-        let f0 = xs.clone().into_iter().nth(offset).unwrap();
-        let f1 = xs.clone().into_iter().nth(offset + 1).unwrap();
-        let f2 = xs.clone().into_iter().nth(offset + 2).unwrap();
-        let f3 = xs.clone().into_iter().nth(offset + 3).unwrap();
-        let f4 = xs.clone().into_iter().nth(offset + 4).unwrap();
-        let f5 = xs.clone().into_iter().nth(offset + 5).unwrap();
-        let f6 = xs.clone().into_iter().nth(offset + 6).unwrap();
-        let f7 = xs.clone().into_iter().nth(offset + 7).unwrap();
-        let f8 = xs.clone().into_iter().nth(offset + 8).unwrap();
-        let f9 = xs.clone().into_iter().nth(offset + 9).unwrap();
-        let f10 = xs.clone().into_iter().nth(offset + 10).unwrap();
-        let f11 = xs.clone().into_iter().nth(offset + 11).unwrap();
-
-        let f = [
-            [[f0, f1], [f2, f3], [f4, f5]],
-            [[f6, f7], [f8, f9], [f10, f11]],
-        ];
-
-        let g = inv_fp12(f);
-
-        [
-            g[0][0][0], g[0][0][1], g[0][1][0], g[0][1][1], g[0][2][0], g[0][2][1], g[1][0][0],
-            g[1][0][1], g[1][1][0], g[1][1][1], g[1][2][0], g[1][2][1],
-        ]
+        let vec: Vec<U256> = xs[offset..].to_vec();
+        fp12_to_array(inv_fp12(vec_to_fp12(vec)))
     }
 
     fn ext_inv0(&self, xs: Vec<U256>) -> U256 {

From b1f31cafeca7caaf304396225b2bc6cf0655f5f7 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 22 Dec 2022 17:07:24 -0800
Subject: [PATCH 080/201] more cleaning

---
 evm/src/bn254.rs                              |   4 +-
 .../bn254/curve_arithmetic/miller_loop.asm    |  20 ++--
 evm/src/cpu/kernel/tests/bn254.rs             | 109 +++++++-----------
 evm/src/generation/prover_input.rs            |  72 +++---------
 4 files changed, 66 insertions(+), 139 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 5c2e34fa..80a5c032 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -631,7 +631,7 @@ const EXPS0: [bool; 65] = [
     true, true, true, true, false, false, true, true, false,
 ];
 
-pub fn store_tangent(p: Curve, q: TwistedCurve) -> Fp12 {
+pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
     let [px, py] = p;
     let [qx, qy] = q;
 
@@ -645,7 +645,7 @@ pub fn store_tangent(p: Curve, q: TwistedCurve) -> Fp12 {
     )
 }
 
-pub fn store_cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
+pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
     let [p1x, p1y] = p1;
     let [p2x, p2y] = p2;
     let [qx, qy] = q;
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 37d17be8..e2b39c7c 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -139,7 +139,7 @@ mul_tangent_1:
     // stack:        Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     DUP11  DUP11
     // stack:     O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    %store_tangent
+    %tangent
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
     PUSH 100  DUP2
     // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
@@ -173,7 +173,7 @@ mul_cord:
     // stack:          O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
     DUP13  DUP13
     // stack:       P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
-    %store_cord 
+    %cord 
     // stack:                mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     DUP12
     // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
@@ -196,20 +196,20 @@ after_add:
     %jump(miller_one)
 
 
-global test_store_cord:
+global test_cord:
     // stack: p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    %store_cord
+    %cord
     // stack:
     %check(100)
 
-/// def store_cord(p1x, p1y, p2x, p2y, qx, qy):
+/// def cord(p1x, p1y, p2x, p2y, qx, qy):
 ///     return sparse_store(
 ///         p1y*p2x - p2y*p1x, 
 ///         (p2y - p1y) * qx, 
 ///         (p1x - p2x) * qy,
 ///     )
 
-%macro store_cord
+%macro cord
     // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     DUP1  DUP5  MULFP254
     // stack:           p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
@@ -249,20 +249,20 @@ global test_store_cord:
 %endmacro
 
 
-global test_store_tangent:
+global test_tangent:
     // stack: px, py, qx, qx_, qy, qy_
-    %store_tangent
+    %tangent
     // stack:
     %check(100)
 
-/// def store_tangent(px, py, qx, qy):
+/// def tangent(px, py, qx, qy):
 ///     return sparse_store(
 ///         py**2 - 9, 
 ///         (-3px**2) * qx, 
 ///         (2py)     * qy,
 ///     )
 
-%macro store_tangent
+%macro tangent
     // stack:                px, py, qx, qx_,  qy, qy_
     PUSH 9
     // stack:             9, px, py, qx, qx_,  qy, qy_
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 945925a7..49c0586c 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -4,14 +4,19 @@ use anyhow::Result;
 use ethereum_types::U256;
 
 use crate::bn254::{
-    fp12_to_vec, frob_fp12, gen_curve_point, gen_fp12, gen_fp12_sparse, gen_twisted_curve_point,
-    mul_fp12, power, store_cord, store_tangent, Curve, Fp12, TwistedCurve,
+    cord, fp12_to_vec, frob_fp12, gen_curve_point, gen_fp12, gen_fp12_sparse,
+    gen_twisted_curve_point, mul_fp12, power, tangent, Curve, Fp12, TwistedCurve,
 };
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::run_interpreter;
 
-fn fp12_as_stack(f: Fp12) -> Vec<U256> {
-    f.into_iter().flatten().flatten().rev().collect()
+fn get_output(lbl: &str, stack: Vec<U256>) -> Vec<U256> {
+    let label = KERNEL.global_labels[lbl];
+    let mut input = stack;
+    input.reverse();
+    let mut output = run_interpreter(label, input).unwrap().stack().to_vec();
+    output.reverse();
+    output
 }
 
 fn make_mul_stack(
@@ -34,7 +39,6 @@ fn make_mul_stack(
     input.extend(vec![in1]);
     input.extend(fp12_to_vec(g));
     input.extend(vec![mul_dest, in0, in1, out, ret_stack, out]);
-    input.reverse();
     input
 }
 
@@ -48,19 +52,17 @@ fn test_mul_fp12() -> Result<()> {
     let g: Fp12 = gen_fp12();
     let h: Fp12 = gen_fp12_sparse();
 
-    let test_mul = KERNEL.global_labels["test_mul_fp12"];
-
     let normal: Vec<U256> = make_mul_stack(in0, in1, out, f, g, "mul_fp12");
     let sparse: Vec<U256> = make_mul_stack(in0, in1, out, f, h, "mul_fp12_sparse");
     let square: Vec<U256> = make_mul_stack(in0, in1, out, f, f, "square_fp12_test");
 
-    let out_normal: Vec<U256> = run_interpreter(test_mul, normal)?.stack().to_vec();
-    let out_sparse: Vec<U256> = run_interpreter(test_mul, sparse)?.stack().to_vec();
-    let out_square: Vec<U256> = run_interpreter(test_mul, square)?.stack().to_vec();
+    let out_normal: Vec<U256> = get_output("test_mul_fp12", normal);
+    let out_sparse: Vec<U256> = get_output("test_mul_fp12", sparse);
+    let out_square: Vec<U256> = get_output("test_mul_fp12", square);
 
-    let exp_normal: Vec<U256> = fp12_as_stack(mul_fp12(f, g));
-    let exp_sparse: Vec<U256> = fp12_as_stack(mul_fp12(f, h));
-    let exp_square: Vec<U256> = fp12_as_stack(mul_fp12(f, f));
+    let exp_normal: Vec<U256> = fp12_to_vec(mul_fp12(f, g));
+    let exp_sparse: Vec<U256> = fp12_to_vec(mul_fp12(f, h));
+    let exp_square: Vec<U256> = fp12_to_vec(mul_fp12(f, f));
 
     assert_eq!(out_normal, exp_normal);
     assert_eq!(out_sparse, exp_sparse);
@@ -74,25 +76,19 @@ fn test_frob_fp12() -> Result<()> {
     let ptr = U256::from(100);
     let f: Fp12 = gen_fp12();
 
-    let test_frob1 = KERNEL.global_labels["test_frob_fp12_1"];
-    let test_frob2 = KERNEL.global_labels["test_frob_fp12_2"];
-    let test_frob3 = KERNEL.global_labels["test_frob_fp12_3"];
-    let test_frob6 = KERNEL.global_labels["test_frob_fp12_6"];
-
     let mut stack = vec![ptr];
     stack.extend(fp12_to_vec(f));
     stack.extend(vec![ptr]);
-    stack.reverse();
 
-    let out_frob1: Vec<U256> = run_interpreter(test_frob1, stack.clone())?.stack().to_vec();
-    let out_frob2: Vec<U256> = run_interpreter(test_frob2, stack.clone())?.stack().to_vec();
-    let out_frob3: Vec<U256> = run_interpreter(test_frob3, stack.clone())?.stack().to_vec();
-    let out_frob6: Vec<U256> = run_interpreter(test_frob6, stack)?.stack().to_vec();
+    let out_frob1: Vec<U256> = get_output("test_frob_fp12_1", stack.clone());
+    let out_frob2: Vec<U256> = get_output("test_frob_fp12_2", stack.clone());
+    let out_frob3: Vec<U256> = get_output("test_frob_fp12_3", stack.clone());
+    let out_frob6: Vec<U256> = get_output("test_frob_fp12_6", stack);
 
-    let exp_frob1: Vec<U256> = fp12_as_stack(frob_fp12(1, f));
-    let exp_frob2: Vec<U256> = fp12_as_stack(frob_fp12(2, f));
-    let exp_frob3: Vec<U256> = fp12_as_stack(frob_fp12(3, f));
-    let exp_frob6: Vec<U256> = fp12_as_stack(frob_fp12(6, f));
+    let exp_frob1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
+    let exp_frob2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
+    let exp_frob3: Vec<U256> = fp12_to_vec(frob_fp12(3, f));
+    let exp_frob6: Vec<U256> = fp12_to_vec(frob_fp12(6, f));
 
     assert_eq!(out_frob1, exp_frob1);
     assert_eq!(out_frob2, exp_frob2);
@@ -109,14 +105,11 @@ fn test_inv_fp12() -> Result<()> {
 
     let f: Fp12 = gen_fp12();
 
-    let test_inv = KERNEL.global_labels["test_inv_fp12"];
-
     let mut stack = vec![ptr];
     stack.extend(fp12_to_vec(f));
     stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
-    stack.reverse();
 
-    let output: Vec<U256> = run_interpreter(test_inv, stack)?.stack().to_vec();
+    let output: Vec<U256> = get_output("test_inv_fp12", stack);
 
     assert_eq!(output, vec![]);
 
@@ -131,15 +124,13 @@ fn test_pow_fp12() -> Result<()> {
     let f: Fp12 = gen_fp12();
 
     let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
-    let test_pow = KERNEL.global_labels["test_pow"];
 
     let mut stack = vec![ptr];
     stack.extend(fp12_to_vec(f));
     stack.extend(vec![ptr, out, ret_stack, out]);
-    stack.reverse();
 
-    let output: Vec<U256> = run_interpreter(test_pow, stack)?.stack().to_vec();
-    let expected: Vec<U256> = fp12_as_stack(power(f));
+    let output: Vec<U256> = get_output("test_pow", stack);
+    let expected: Vec<U256> = fp12_to_vec(power(f));
 
     assert_eq!(output, expected);
 
@@ -147,50 +138,30 @@ fn test_pow_fp12() -> Result<()> {
 }
 
 #[test]
-fn test_store_tangent() -> Result<()> {
-    let p: Curve = gen_curve_point();
-    let q: TwistedCurve = gen_twisted_curve_point();
-
-    let p_: Vec<U256> = p.into_iter().collect();
-    let q_: Vec<U256> = q.into_iter().flatten().collect();
-
-    let test_tan = KERNEL.global_labels["test_store_tangent"];
-
-    let mut stack = p_;
-    stack.extend(q_);
-    stack.reverse();
-
-    let output: Vec<U256> = run_interpreter(test_tan, stack)?.stack().to_vec();
-
-    let expected = fp12_as_stack(store_tangent(p, q));
-
-    assert_eq!(output, expected);
-
-    Ok(())
-}
-
-#[test]
-fn test_store_cord() -> Result<()> {
+fn test_line() -> Result<()> {
     let p1: Curve = gen_curve_point();
     let p2: Curve = gen_curve_point();
     let q: TwistedCurve = gen_twisted_curve_point();
 
-    let p1_: Vec<U256> = p1.into_iter().collect();
-    let p2_: Vec<U256> = p2.into_iter().collect();
+    let p1_: Vec<U256> = p1.to_vec();
+    let p2_: Vec<U256> = p2.to_vec();
     let q_: Vec<U256> = q.into_iter().flatten().collect();
 
-    let mut stack = p1_;
-    stack.extend(p2_);
-    stack.extend(q_);
-    stack.reverse();
+    let mut tan_stack = p1_.clone();
+    tan_stack.extend(q_.clone());
 
-    let test_cord = KERNEL.global_labels["test_store_cord"];
+    let mut cord_stack = p1_;
+    cord_stack.extend(p2_);
+    cord_stack.extend(q_);
 
-    let output: Vec<U256> = run_interpreter(test_cord, stack)?.stack().to_vec();
+    let output_tan: Vec<U256> = get_output("test_tangent", tan_stack);
+    let output_cord: Vec<U256> = get_output("test_cord", cord_stack);
 
-    let expected = fp12_as_stack(store_cord(p1, p2, q));
+    let expected_tan = fp12_to_vec(tangent(p1, q));
+    let expected_cord = fp12_to_vec(cord(p1, p2, q));
 
-    assert_eq!(output, expected);
+    assert_eq!(output_tan, expected_tan);
+    assert_eq!(output_cord, expected_cord);
 
     Ok(())
 }
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index fdce266d..2e403ca6 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -205,18 +205,18 @@ impl EvmField {
 
     fn extop(&self, op: FieldExtOp, xs: Vec<U256>) -> U256 {
         match op {
-            FieldExtOp::ExtInv0 => self.ext_inv0(xs),
-            FieldExtOp::ExtInv1 => self.ext_inv1(xs),
-            FieldExtOp::ExtInv2 => self.ext_inv2(xs),
-            FieldExtOp::ExtInv3 => self.ext_inv3(xs),
-            FieldExtOp::ExtInv4 => self.ext_inv4(xs),
-            FieldExtOp::ExtInv5 => self.ext_inv5(xs),
-            FieldExtOp::ExtInv6 => self.ext_inv6(xs),
-            FieldExtOp::ExtInv7 => self.ext_inv7(xs),
-            FieldExtOp::ExtInv8 => self.ext_inv8(xs),
-            FieldExtOp::ExtInv9 => self.ext_inv9(xs),
-            FieldExtOp::ExtInv10 => self.ext_inv10(xs),
-            FieldExtOp::ExtInv11 => self.ext_inv11(xs),
+            FieldExtOp::ExtInv0 => self.ext_inv_component(0, xs),
+            FieldExtOp::ExtInv1 => self.ext_inv_component(1, xs),
+            FieldExtOp::ExtInv2 => self.ext_inv_component(2, xs),
+            FieldExtOp::ExtInv3 => self.ext_inv_component(3, xs),
+            FieldExtOp::ExtInv4 => self.ext_inv_component(4, xs),
+            FieldExtOp::ExtInv5 => self.ext_inv_component(5, xs),
+            FieldExtOp::ExtInv6 => self.ext_inv_component(6, xs),
+            FieldExtOp::ExtInv7 => self.ext_inv_component(7, xs),
+            FieldExtOp::ExtInv8 => self.ext_inv_component(8, xs),
+            FieldExtOp::ExtInv9 => self.ext_inv_component(9, xs),
+            FieldExtOp::ExtInv10 => self.ext_inv_component(10, xs),
+            FieldExtOp::ExtInv11 => self.ext_inv_component(11, xs),
         }
     }
 
@@ -242,52 +242,8 @@ impl EvmField {
         fp12_to_array(inv_fp12(vec_to_fp12(vec)))
     }
 
-    fn ext_inv0(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 12)[0]
-    }
-
-    fn ext_inv1(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 11)[1]
-    }
-
-    fn ext_inv2(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 10)[2]
-    }
-
-    fn ext_inv3(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 9)[3]
-    }
-
-    fn ext_inv4(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 8)[4]
-    }
-
-    fn ext_inv5(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 7)[5]
-    }
-
-    fn ext_inv6(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 6)[6]
-    }
-
-    fn ext_inv7(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 5)[7]
-    }
-
-    fn ext_inv8(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 4)[8]
-    }
-
-    fn ext_inv9(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 3)[9]
-    }
-
-    fn ext_inv10(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 2)[10]
-    }
-
-    fn ext_inv11(&self, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 1)[11]
+    fn ext_inv_component(&self, n: usize, xs: Vec<U256>) -> U256 {
+        Self::ext_inv(&self, xs, 12 - n)[n]
     }
 }
 

From 31ee8987700a481e6b291dffc228c0c4a2027764 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 22 Dec 2022 17:15:21 -0800
Subject: [PATCH 081/201] clippy

---
 evm/src/bn254.rs                   | 36 +++++++++++++-----------------
 evm/src/generation/prover_input.rs |  2 +-
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 80a5c032..6c993d8c 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -25,7 +25,7 @@ pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
 }
 
 pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
-    let f0 = xs.clone().into_iter().nth(0).unwrap();
+    let f0 = xs.clone().into_iter().next().unwrap();
     let f1 = xs.clone().into_iter().nth(1).unwrap();
     let f2 = xs.clone().into_iter().nth(2).unwrap();
     let f3 = xs.clone().into_iter().nth(3).unwrap();
@@ -36,7 +36,7 @@ pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
     let f8 = xs.clone().into_iter().nth(8).unwrap();
     let f9 = xs.clone().into_iter().nth(9).unwrap();
     let f10 = xs.clone().into_iter().nth(10).unwrap();
-    let f11 = xs.clone().into_iter().nth(11).unwrap();
+    let f11 = xs.into_iter().nth(11).unwrap();
 
     [
         [[f0, f1], [f2, f3], [f4, f5]],
@@ -139,10 +139,6 @@ fn sub_fp2(a: Fp2, b: Fp2) -> Fp2 {
     [sub_fp(a, b), sub_fp(a_, b_)]
 }
 
-fn neg_fp2(a: Fp2) -> Fp2 {
-    sub_fp2(embed_fp2(ZERO), a)
-}
-
 fn mul_fp2(a: Fp2, b: Fp2) -> Fp2 {
     let [a, a_] = a;
     let [b, b_] = b;
@@ -267,21 +263,21 @@ fn inv_fp(x: Fp) -> Fp {
     exp_fp(x, BN_BASE - 2)
 }
 
-fn inv_fp2(a: Fp2) -> Fp2 {
-    let [a0, a1] = a;
-    let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
-    [mul_fp(norm, a0), neg_fp(mul_fp(norm, a1))]
-}
+// fn inv_fp2(a: Fp2) -> Fp2 {
+//     let [a0, a1] = a;
+//     let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
+//     [mul_fp(norm, a0), neg_fp(mul_fp(norm, a1))]
+// }
 
-fn inv_fp6(c: Fp6) -> Fp6 {
-    let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
-    let e = mul_fp6(b, frob_fp6(5, c))[0];
-    let n = mul_fp2(e, conj_fp2(e))[0];
-    let i = inv_fp(n);
-    let d = mul_fp2(embed_fp2(i), e);
-    let [f0, f1, f2] = frob_fp6(1, b);
-    [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
-}
+// fn inv_fp6(c: Fp6) -> Fp6 {
+//     let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
+//     let e = mul_fp6(b, frob_fp6(5, c))[0];
+//     let n = mul_fp2(e, conj_fp2(e))[0];
+//     let i = inv_fp(n);
+//     let d = mul_fp2(embed_fp2(i), e);
+//     let [f0, f1, f2] = frob_fp6(1, b);
+//     [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
+// }
 
 pub fn inv_fp12(f: Fp12) -> Fp12 {
     let [f0, f1] = f;
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 2e403ca6..7735412b 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -243,7 +243,7 @@ impl EvmField {
     }
 
     fn ext_inv_component(&self, n: usize, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(&self, xs, 12 - n)[n]
+        Self::ext_inv(self, xs, 12 - n)[n]
     }
 }
 

From f2787a06a0598c49a385fa8bac45f6dc1784261d Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 22 Dec 2022 17:39:18 -0800
Subject: [PATCH 082/201] more clean

---
 evm/src/cpu/kernel/tests/bn254.rs | 83 ++++++++++++++++---------------
 1 file changed, 42 insertions(+), 41 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 49c0586c..b5b30b87 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -10,6 +10,18 @@ use crate::bn254::{
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::run_interpreter;
 
+fn make_label(lbl: &str) -> U256 {
+    U256::from(KERNEL.global_labels[lbl])
+}
+
+fn make_stack(vecs: Vec<Vec<U256>>) -> Vec<U256> {
+    let mut stack = vec![];
+    for vec in vecs {
+        stack.extend(vec)
+    }
+    stack
+}
+
 fn get_output(lbl: &str, stack: Vec<U256>) -> Vec<U256> {
     let label = KERNEL.global_labels[lbl];
     let mut input = stack;
@@ -19,42 +31,36 @@ fn get_output(lbl: &str, stack: Vec<U256>) -> Vec<U256> {
     output
 }
 
-fn make_mul_stack(
-    in0: usize,
-    in1: usize,
-    out: usize,
-    f: Fp12,
-    g: Fp12,
-    mul_label: &str,
-) -> Vec<U256> {
-    let in0 = U256::from(in0);
-    let in1 = U256::from(in1);
-    let out = U256::from(out);
+fn make_mul_stack(f: Fp12, g: Fp12, mul_label: &str) -> Vec<U256> {
+    let in0 = U256::from(64);
+    let in1 = U256::from(76);
+    let out = U256::from(88);
 
-    let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
-    let mul_dest = U256::from(KERNEL.global_labels[mul_label]);
-
-    let mut input = vec![in0];
-    input.extend(fp12_to_vec(f));
-    input.extend(vec![in1]);
-    input.extend(fp12_to_vec(g));
-    input.extend(vec![mul_dest, in0, in1, out, ret_stack, out]);
-    input
+    make_stack(vec![
+        vec![in0],
+        fp12_to_vec(f),
+        vec![in1],
+        fp12_to_vec(g),
+        vec![
+            make_label(mul_label),
+            in0,
+            in1,
+            out,
+            make_label("ret_stack"),
+            out,
+        ],
+    ])
 }
 
 #[test]
 fn test_mul_fp12() -> Result<()> {
-    let in0 = 64;
-    let in1 = 76;
-    let out = 88;
-
     let f: Fp12 = gen_fp12();
     let g: Fp12 = gen_fp12();
     let h: Fp12 = gen_fp12_sparse();
 
-    let normal: Vec<U256> = make_mul_stack(in0, in1, out, f, g, "mul_fp12");
-    let sparse: Vec<U256> = make_mul_stack(in0, in1, out, f, h, "mul_fp12_sparse");
-    let square: Vec<U256> = make_mul_stack(in0, in1, out, f, f, "square_fp12_test");
+    let normal: Vec<U256> = make_mul_stack(f, g, "mul_fp12");
+    let sparse: Vec<U256> = make_mul_stack(f, h, "mul_fp12_sparse");
+    let square: Vec<U256> = make_mul_stack(f, f, "square_fp12_test");
 
     let out_normal: Vec<U256> = get_output("test_mul_fp12", normal);
     let out_sparse: Vec<U256> = get_output("test_mul_fp12", sparse);
@@ -74,11 +80,10 @@ fn test_mul_fp12() -> Result<()> {
 #[test]
 fn test_frob_fp12() -> Result<()> {
     let ptr = U256::from(100);
+
     let f: Fp12 = gen_fp12();
 
-    let mut stack = vec![ptr];
-    stack.extend(fp12_to_vec(f));
-    stack.extend(vec![ptr]);
+    let stack = make_stack(vec![vec![ptr], fp12_to_vec(f), vec![ptr]]);
 
     let out_frob1: Vec<U256> = get_output("test_frob_fp12_1", stack.clone());
     let out_frob2: Vec<U256> = get_output("test_frob_fp12_2", stack.clone());
@@ -123,11 +128,11 @@ fn test_pow_fp12() -> Result<()> {
 
     let f: Fp12 = gen_fp12();
 
-    let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
-
-    let mut stack = vec![ptr];
-    stack.extend(fp12_to_vec(f));
-    stack.extend(vec![ptr, out, ret_stack, out]);
+    let stack = make_stack(vec![
+        vec![ptr],
+        fp12_to_vec(f),
+        vec![ptr, out, make_label("ret_stack"), out],
+    ]);
 
     let output: Vec<U256> = get_output("test_pow", stack);
     let expected: Vec<U256> = fp12_to_vec(power(f));
@@ -147,12 +152,8 @@ fn test_line() -> Result<()> {
     let p2_: Vec<U256> = p2.to_vec();
     let q_: Vec<U256> = q.into_iter().flatten().collect();
 
-    let mut tan_stack = p1_.clone();
-    tan_stack.extend(q_.clone());
-
-    let mut cord_stack = p1_;
-    cord_stack.extend(p2_);
-    cord_stack.extend(q_);
+    let tan_stack = make_stack(vec![p1_.clone(), q_.clone()]);
+    let cord_stack = make_stack(vec![p1_, p2_, q_]);
 
     let output_tan: Vec<U256> = get_output("test_tangent", tan_stack);
     let output_cord: Vec<U256> = get_output("test_cord", cord_stack);

From ef824110fd7030d6c7652f6b8f7249d678380a95 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 27 Dec 2022 14:16:22 -0800
Subject: [PATCH 083/201] miller in rust

---
 evm/src/bn254.rs                              | 104 +++++++++++++++---
 .../bn254/curve_arithmetic/miller_loop.asm    |  30 +++--
 2 files changed, 105 insertions(+), 29 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 6c993d8c..8f29e12d 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -115,6 +115,27 @@ fn neg_fp(x: Fp) -> Fp {
     (BN_BASE - x) % BN_BASE
 }
 
+fn exp_fp(x: Fp, e: U256) -> Fp {
+    let mut current = x;
+    let mut product = U256::one();
+
+    for j in 0..256 {
+        if e.bit(j) {
+            product = U256::try_from(product.full_mul(current) % BN_BASE).unwrap();
+        }
+        current = U256::try_from(current.full_mul(current) % BN_BASE).unwrap();
+    }
+    product
+}
+
+fn inv_fp(x: Fp) -> Fp {
+    exp_fp(x, BN_BASE - 2)
+}
+
+fn div_fp(x: Fp, y: Fp) -> Fp {
+    mul_fp(x, inv_fp(y))
+}
+
 fn conj_fp2(a: Fp2) -> Fp2 {
     let [a, a_] = a;
     [a, neg_fp(a_)]
@@ -246,23 +267,6 @@ pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
     [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
 }
 
-fn exp_fp(x: Fp, e: U256) -> Fp {
-    let mut current = x;
-    let mut product = U256::one();
-
-    for j in 0..256 {
-        if e.bit(j) {
-            product = U256::try_from(product.full_mul(current) % BN_BASE).unwrap();
-        }
-        current = U256::try_from(current.full_mul(current) % BN_BASE).unwrap();
-    }
-    product
-}
-
-fn inv_fp(x: Fp) -> Fp {
-    exp_fp(x, BN_BASE - 2)
-}
-
 // fn inv_fp2(a: Fp2) -> Fp2 {
 //     let [a0, a1] = a;
 //     let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
@@ -655,3 +659,69 @@ pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
         mul_fp2(embed_fp2(cy), qy),
     )
 }
+
+fn tangent_slope(p: Curve) -> Fp {
+    let [px, py] = p;
+    let num = mul_fp(mul_fp(px, px), U256::from(3));
+    let denom = mul_fp(py, U256::from(2));
+    div_fp(num, denom)
+}
+
+fn cord_slope(p: Curve, q: Curve) -> Fp {
+    let [px, py] = p;
+    let [qx, qy] = q;
+    let num = sub_fp(qy, py);
+    let denom = sub_fp(qx, px);
+    div_fp(num, denom)
+}
+
+fn third_point(m: Fp, p: Curve, q: Curve) -> Curve {
+    let [px, py] = p;
+    let [qx, _] = q;
+    let ox = sub_fp(mul_fp(m, m), add_fp(px, qx));
+    let oy = sub_fp(mul_fp(m, sub_fp(px, ox)), py);
+    [ox, oy]
+}
+
+fn curve_add(p: Curve, q: Curve) -> Curve {
+    if p == q {
+        curve_double(p)
+    }
+    else { 
+        third_point(cord_slope(p, q), p, q)
+    }
+}
+
+fn curve_double(p: Curve) -> Curve {
+    third_point(tangent_slope(p), p, p)
+}
+
+const EXP: [usize; 253] = [
+    1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1,
+    0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
+    1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
+    0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
+    1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
+    0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
+    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+];
+
+pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
+    let mut o = p;
+    let mut acc = embed_fp12(U256::one());
+    let mut line;
+
+    for i in EXP {
+        acc = mul_fp12(acc, acc);
+        line = tangent(o, q);
+        acc = mul_fp12(line, acc);
+        o = curve_double(o);
+        if i != 0 {
+            line = cord(p, o, q);
+            acc = mul_fp12(line, acc);
+            o = curve_add(p, o);
+        }
+    }
+    acc
+}
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index e2b39c7c..0f860047 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -126,6 +126,12 @@ miller_zero_final:
 ///     out = mul_fp12_sparse(out, line)
 ///     O += O
 
+global test_mul_tan:
+    // stack: out, f, retdest, 0xnm, times, O, P, Q, out
+    %store_fp12
+    // stack:         retdest, 0xnm, times, O, P, Q, out
+    %jump(mul_tangent)
+
 mul_tangent:
     // stack:                                              retdest, 0xnm, times, O, P, Q, out
     PUSH mul_tangent_2  DUP13  PUSH mul_tangent_1
@@ -154,7 +160,13 @@ mul_tangent_2:
 after_double:
     // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
     SWAP5  POP  SWAP5  POP
+
     // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
+
+    %pop3  %pop2  %pop2  %pop4
+    %load_fp12
+    %jump(0xdeadbeef)
+
     JUMP
 
 
@@ -163,6 +175,12 @@ after_double:
 ///     out = mul_fp12_sparse(out, line)
 ///     O += P
 
+global test_mul_cord:
+    // stack: out, f, 0xnm, times, O, P, Q, out
+    %store_fp12
+    // stack:         0xnm, times, O, P, Q, out
+    %jump(mul_cord)
+
 mul_cord:
     // stack:                            0xnm, times, O, P, Q, out
     PUSH mul_cord_1
@@ -196,12 +214,6 @@ after_add:
     %jump(miller_one)
 
 
-global test_cord:
-    // stack: p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    %cord
-    // stack:
-    %check(100)
-
 /// def cord(p1x, p1y, p2x, p2y, qx, qy):
 ///     return sparse_store(
 ///         p1y*p2x - p2y*p1x, 
@@ -249,12 +261,6 @@ global test_cord:
 %endmacro
 
 
-global test_tangent:
-    // stack: px, py, qx, qx_, qy, qy_
-    %tangent
-    // stack:
-    %check(100)
-
 /// def tangent(px, py, qx, qy):
 ///     return sparse_store(
 ///         py**2 - 9, 

From e35644e9e0e6330cd778e494304be1369814dc65 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 27 Dec 2022 14:55:47 -0800
Subject: [PATCH 084/201] miller test passes

---
 evm/src/bn254.rs                              |  3 +-
 .../bn254/curve_arithmetic/miller_loop.asm    |  6 --
 evm/src/cpu/kernel/tests/bn254.rs             | 93 ++++++-------------
 3 files changed, 30 insertions(+), 72 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 8f29e12d..26accbfd 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -686,8 +686,7 @@ fn third_point(m: Fp, p: Curve, q: Curve) -> Curve {
 fn curve_add(p: Curve, q: Curve) -> Curve {
     if p == q {
         curve_double(p)
-    }
-    else { 
+    } else {
         third_point(cord_slope(p, q), p, q)
     }
 }
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 0f860047..972bffb9 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -160,13 +160,7 @@ mul_tangent_2:
 after_double:
     // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
     SWAP5  POP  SWAP5  POP
-
     // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
-
-    %pop3  %pop2  %pop2  %pop4
-    %load_fp12
-    %jump(0xdeadbeef)
-
     JUMP
 
 
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index b5b30b87..71c4c3da 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -5,7 +5,7 @@ use ethereum_types::U256;
 
 use crate::bn254::{
     cord, fp12_to_vec, frob_fp12, gen_curve_point, gen_fp12, gen_fp12_sparse,
-    gen_twisted_curve_point, mul_fp12, power, tangent, Curve, Fp12, TwistedCurve,
+    gen_twisted_curve_point, miller_loop, mul_fp12, power, tangent, Curve, Fp12, TwistedCurve,
 };
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::run_interpreter;
@@ -167,73 +167,38 @@ fn test_line() -> Result<()> {
     Ok(())
 }
 
-// fn make_miller_stack(p: [Fp; 2], q: [Fp2; 2]) -> Vec<U256> {
-//     let ptr = U256::from(300);
-//     let out = U256::from(400);
+#[test]
+fn test_miller() -> Result<()> {
+    let ptr = U256::from(300);
+    let out = U256::from(400);
 
-//     let p: Vec<U256> = p.into_iter().collect();
-//     let q: Vec<U256> = q.into_iter().flatten().collect();
+    let p: Curve = [U256::one(), U256::from(2)];
+    let q: TwistedCurve = [
+        [
+            U256::from_str("0x1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed")
+                .unwrap(),
+            U256::from_str("0x198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2")
+                .unwrap(),
+        ],
+        [
+            U256::from_str("0x12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa")
+                .unwrap(),
+            U256::from_str("0x90689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b")
+                .unwrap(),
+        ],
+    ];
 
-//     let ret_stack = U256::from(KERNEL.global_labels["ret_stack"]);
+    let p_: Vec<U256> = p.into_iter().collect();
+    let q_: Vec<U256> = q.into_iter().flatten().collect();
 
-//     let mut input = vec![ptr];
-//     input.extend(p);
-//     input.extend(q);
-//     input.extend(vec![ptr, out, ret_stack]);
-//     input.reverse();
-//     input
-// }
+    let ret_stack = make_label("ret_stack");
 
-// #[test]
-// fn test_miller() -> Result<()> {
-//     let p = [U256::from(1), U256::from(2)];
-//     let q = [
-//         [
-//             U256::from_str("0x1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed")
-//                 .unwrap(),
-//             U256::from_str("0x198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2")
-//                 .unwrap(),
-//         ],
-//         [
-//             U256::from_str("0x12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa")
-//                 .unwrap(),
-//             U256::from_str("0x90689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b")
-//                 .unwrap(),
-//         ],
-//     ];
+    let initial_stack = make_stack(vec![vec![ptr], p_, q_, vec![ptr, out, ret_stack]]);
 
-//     let test_mill = KERNEL.global_labels["test_miller"];
-//     let stack = make_miller_stack(p, q);
+    let output = get_output("test_miller", initial_stack);
+    let expected = fp12_to_vec(miller_loop(p, q));
 
-//     let output: Vec<U256> = run_interpreter(test_mill, stack)?.stack().to_vec();
-//     let mut expected: Vec<U256> = vec![
-//         U256::from_str("0xbf4dbb7e41fb58122aa29dcced57731d7cbb49b1fe9a73cb13416e1002376da")
-//             .unwrap(),
-//         U256::from_str("0x110b019c149b43a7fbd6d42d7553debcbebd35c148f63aaecf72a5fbda451ac6")
-//             .unwrap(),
-//         U256::from_str("0x27225e97ee6c877964c8f32e0b54e61ead09c3e818174cd8b5beabe7cd7385e8")
-//             .unwrap(),
-//         U256::from_str("0x5762cb6648b4b4c5df8a8874a21d937adf185d91f34e8ccf58f5b39196db02").unwrap(),
-//         U256::from_str("0x463002dc1a426b172f4a1e29486fc11eba01de99b559368139c8ef5271eb37f")
-//             .unwrap(),
-//         U256::from_str("0x753dcc72acdffcc45633803f1b555388969dd7c27d2a674a23a228f522480d9")
-//             .unwrap(),
-//         U256::from_str("0xd32a892d29151553101376a6638938135e30126f698a40a73f20c6ac64a4585")
-//             .unwrap(),
-//         U256::from_str("0x290afd3e28c223a624d9f5a737f9f9e4b4200b518333844d81acc445fa5910da")
-//             .unwrap(),
-//         U256::from_str("0x262e0ee72a8123b741dc113b8e2d207ee8bad011e0f6ae2015439960c789cf78")
-//             .unwrap(),
-//         U256::from_str("0x1588e0b23d868d7517e3021e620c69eb1521a49faa9bfcd4cf3a54127d4d14cb")
-//             .unwrap(),
-//         U256::from_str("0x1c23a135a7dfa96db62622c5fef4b9751d121523dd39ca1cefeacb3419835a53")
-//             .unwrap(),
-//         U256::from_str("0x2caeb873076ec8f37fa7af265d2966dd0024acbc63bd2b21f323084fc71f4a59")
-//             .unwrap(),
-//     ];
-//     expected.reverse();
+    assert_eq!(output, expected);
 
-//     assert_eq!(output, expected);
-
-//     Ok(())
-// }
+    Ok(())
+}

From f1d5c6bfb9a939738173b2aed967682d95680b96 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 27 Dec 2022 15:10:59 -0800
Subject: [PATCH 085/201] tuck const

---
 evm/src/bn254.rs | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 26accbfd..1e0e973f 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -695,18 +695,18 @@ fn curve_double(p: Curve) -> Curve {
     third_point(tangent_slope(p), p, p)
 }
 
-const EXP: [usize; 253] = [
-    1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1,
-    0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
-    1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
-    0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
-    1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
-    0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
-    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-];
-
 pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
+    const EXP: [usize; 253] = [
+        1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1,
+        0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
+        1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
+        0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1,
+        0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
+        1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
+        0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
+        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    ];
+
     let mut o = p;
     let mut acc = embed_fp12(U256::one());
     let mut line;

From 826702a756ccb80080615c92be2a3540658018b3 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 27 Dec 2022 16:00:16 -0800
Subject: [PATCH 086/201] clean

---
 evm/src/bn254.rs                              | 309 +++++++++---------
 .../bn254/curve_arithmetic/miller_loop.asm    |   2 +-
 evm/src/cpu/kernel/tests/bn254.rs             |  27 +-
 3 files changed, 156 insertions(+), 182 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 1e0e973f..6b3026ef 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -297,19 +297,158 @@ pub fn inv_fp12(f: Fp12) -> Fp12 {
 }
 
 pub fn power(f: Fp12) -> Fp12 {
+    const EXPS4: [(usize, usize, usize); 65] = [
+        (1, 1, 1),
+        (1, 1, 0),
+        (1, 1, 1),
+        (1, 1, 1),
+        (0, 0, 0),
+        (0, 0, 1),
+        (1, 0, 1),
+        (0, 1, 0),
+        (1, 0, 1),
+        (1, 1, 0),
+        (1, 0, 1),
+        (0, 1, 0),
+        (1, 1, 0),
+        (1, 1, 0),
+        (1, 1, 0),
+        (0, 1, 0),
+        (0, 1, 0),
+        (0, 0, 1),
+        (1, 0, 1),
+        (1, 1, 0),
+        (0, 1, 0),
+        (1, 1, 0),
+        (1, 1, 0),
+        (1, 1, 0),
+        (0, 0, 1),
+        (0, 0, 1),
+        (1, 0, 1),
+        (1, 0, 1),
+        (1, 1, 0),
+        (1, 0, 0),
+        (1, 1, 0),
+        (0, 1, 0),
+        (1, 1, 0),
+        (1, 0, 0),
+        (0, 1, 0),
+        (0, 0, 0),
+        (1, 0, 0),
+        (1, 0, 0),
+        (1, 0, 1),
+        (0, 0, 1),
+        (0, 1, 1),
+        (0, 0, 1),
+        (0, 1, 1),
+        (0, 1, 1),
+        (0, 0, 0),
+        (1, 1, 1),
+        (1, 0, 1),
+        (1, 0, 1),
+        (0, 1, 1),
+        (1, 0, 1),
+        (0, 1, 1),
+        (0, 1, 1),
+        (1, 1, 0),
+        (1, 1, 0),
+        (1, 1, 0),
+        (1, 0, 0),
+        (0, 0, 1),
+        (1, 0, 0),
+        (0, 0, 1),
+        (1, 0, 1),
+        (1, 1, 0),
+        (1, 1, 1),
+        (0, 1, 1),
+        (0, 1, 0),
+        (1, 1, 1),
+    ];
+
+    const EXPS2: [(usize, usize); 62] = [
+        (1, 0),
+        (1, 1),
+        (0, 0),
+        (1, 0),
+        (1, 0),
+        (1, 1),
+        (1, 0),
+        (1, 1),
+        (1, 0),
+        (0, 1),
+        (0, 1),
+        (1, 1),
+        (1, 1),
+        (0, 0),
+        (1, 1),
+        (0, 0),
+        (0, 0),
+        (0, 1),
+        (0, 1),
+        (1, 1),
+        (1, 1),
+        (1, 1),
+        (0, 1),
+        (1, 1),
+        (0, 0),
+        (1, 1),
+        (1, 0),
+        (1, 1),
+        (0, 0),
+        (1, 1),
+        (1, 1),
+        (1, 0),
+        (0, 0),
+        (0, 1),
+        (0, 0),
+        (1, 1),
+        (0, 1),
+        (0, 0),
+        (1, 0),
+        (0, 1),
+        (0, 1),
+        (1, 0),
+        (0, 1),
+        (0, 0),
+        (0, 0),
+        (0, 0),
+        (0, 1),
+        (1, 0),
+        (1, 1),
+        (0, 1),
+        (1, 1),
+        (1, 0),
+        (0, 1),
+        (0, 0),
+        (1, 0),
+        (0, 1),
+        (1, 0),
+        (1, 1),
+        (1, 0),
+        (1, 1),
+        (0, 1),
+        (1, 1),
+    ];
+
+    const EXPS0: [usize; 65] = [
+        0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,
+        0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1,
+        0, 0, 1, 1, 0,
+    ];
+
     let mut sq: Fp12 = f;
     let mut y0: Fp12 = embed_fp12(U256::one());
     let mut y2: Fp12 = embed_fp12(U256::one());
     let mut y4: Fp12 = embed_fp12(U256::one());
 
     for (a, b, c) in EXPS4 {
-        if a {
+        if a != 0 {
             y4 = mul_fp12(y4, sq);
         }
-        if b {
+        if b != 0 {
             y2 = mul_fp12(y2, sq);
         }
-        if c {
+        if c != 0 {
             y0 = mul_fp12(y0, sq);
         }
         sq = mul_fp12(sq, sq);
@@ -317,10 +456,10 @@ pub fn power(f: Fp12) -> Fp12 {
     y4 = mul_fp12(y4, sq);
 
     for (a, b) in EXPS2 {
-        if a {
+        if a != 0 {
             y2 = mul_fp12(y2, sq);
         }
-        if b {
+        if b != 0 {
             y0 = mul_fp12(y0, sq);
         }
         sq = mul_fp12(sq, sq);
@@ -328,7 +467,7 @@ pub fn power(f: Fp12) -> Fp12 {
     y2 = mul_fp12(y2, sq);
 
     for a in EXPS0 {
-        if a {
+        if a != 0 {
             y0 = mul_fp12(y0, sq);
         }
         sq = mul_fp12(sq, sq);
@@ -490,147 +629,6 @@ fn frob_z(n: usize) -> Fp2 {
     }
 }
 
-const EXPS4: [(bool, bool, bool); 65] = [
-    (true, true, true),
-    (true, true, false),
-    (true, true, true),
-    (true, true, true),
-    (false, false, false),
-    (false, false, true),
-    (true, false, true),
-    (false, true, false),
-    (true, false, true),
-    (true, true, false),
-    (true, false, true),
-    (false, true, false),
-    (true, true, false),
-    (true, true, false),
-    (true, true, false),
-    (false, true, false),
-    (false, true, false),
-    (false, false, true),
-    (true, false, true),
-    (true, true, false),
-    (false, true, false),
-    (true, true, false),
-    (true, true, false),
-    (true, true, false),
-    (false, false, true),
-    (false, false, true),
-    (true, false, true),
-    (true, false, true),
-    (true, true, false),
-    (true, false, false),
-    (true, true, false),
-    (false, true, false),
-    (true, true, false),
-    (true, false, false),
-    (false, true, false),
-    (false, false, false),
-    (true, false, false),
-    (true, false, false),
-    (true, false, true),
-    (false, false, true),
-    (false, true, true),
-    (false, false, true),
-    (false, true, true),
-    (false, true, true),
-    (false, false, false),
-    (true, true, true),
-    (true, false, true),
-    (true, false, true),
-    (false, true, true),
-    (true, false, true),
-    (false, true, true),
-    (false, true, true),
-    (true, true, false),
-    (true, true, false),
-    (true, true, false),
-    (true, false, false),
-    (false, false, true),
-    (true, false, false),
-    (false, false, true),
-    (true, false, true),
-    (true, true, false),
-    (true, true, true),
-    (false, true, true),
-    (false, true, false),
-    (true, true, true),
-];
-
-const EXPS2: [(bool, bool); 62] = [
-    (true, false),
-    (true, true),
-    (false, false),
-    (true, false),
-    (true, false),
-    (true, true),
-    (true, false),
-    (true, true),
-    (true, false),
-    (false, true),
-    (false, true),
-    (true, true),
-    (true, true),
-    (false, false),
-    (true, true),
-    (false, false),
-    (false, false),
-    (false, true),
-    (false, true),
-    (true, true),
-    (true, true),
-    (true, true),
-    (false, true),
-    (true, true),
-    (false, false),
-    (true, true),
-    (true, false),
-    (true, true),
-    (false, false),
-    (true, true),
-    (true, true),
-    (true, false),
-    (false, false),
-    (false, true),
-    (false, false),
-    (true, true),
-    (false, true),
-    (false, false),
-    (true, false),
-    (false, true),
-    (false, true),
-    (true, false),
-    (false, true),
-    (false, false),
-    (false, false),
-    (false, false),
-    (false, true),
-    (true, false),
-    (true, true),
-    (false, true),
-    (true, true),
-    (true, false),
-    (false, true),
-    (false, false),
-    (true, false),
-    (false, true),
-    (true, false),
-    (true, true),
-    (true, false),
-    (true, true),
-    (false, true),
-    (true, true),
-];
-
-const EXPS0: [bool; 65] = [
-    false, false, true, false, false, true, true, false, true, false, true, true, true, false,
-    true, false, false, false, true, false, false, true, false, true, false, true, true, false,
-    false, false, false, false, true, false, true, false, true, true, true, false, false, true,
-    true, true, true, false, true, false, true, true, false, false, true, false, false, false,
-    true, true, true, true, false, false, true, true, false,
-];
-
 pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
     let [px, py] = p;
     let [qx, qy] = q;
@@ -697,14 +695,15 @@ fn curve_double(p: Curve) -> Curve {
 
 pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
     const EXP: [usize; 253] = [
-        1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1,
-        0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
-        1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
-        0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1,
-        0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
-        1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
-        0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1,
+        1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
+        1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
+        1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
+        1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,
+        1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
+        0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
+        1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ];
 
     let mut o = p;
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 972bffb9..ba73fed1 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -250,7 +250,7 @@ after_add:
     %mstore_kernel_general(103)
     // stack:                                            p1x - p2x, qy_
     MULFP254
-    // stack:                                            (p1x - p2x)qy_
+    // stack:                                           (p1x - p2x)*qy_
     %mstore_kernel_general(109)
 %endmacro
 
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 71c4c3da..1831e910 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -122,7 +122,7 @@ fn test_inv_fp12() -> Result<()> {
 }
 
 #[test]
-fn test_pow_fp12() -> Result<()> {
+fn test_power() -> Result<()> {
     let ptr = U256::from(300);
     let out = U256::from(400);
 
@@ -142,31 +142,6 @@ fn test_pow_fp12() -> Result<()> {
     Ok(())
 }
 
-#[test]
-fn test_line() -> Result<()> {
-    let p1: Curve = gen_curve_point();
-    let p2: Curve = gen_curve_point();
-    let q: TwistedCurve = gen_twisted_curve_point();
-
-    let p1_: Vec<U256> = p1.to_vec();
-    let p2_: Vec<U256> = p2.to_vec();
-    let q_: Vec<U256> = q.into_iter().flatten().collect();
-
-    let tan_stack = make_stack(vec![p1_.clone(), q_.clone()]);
-    let cord_stack = make_stack(vec![p1_, p2_, q_]);
-
-    let output_tan: Vec<U256> = get_output("test_tangent", tan_stack);
-    let output_cord: Vec<U256> = get_output("test_cord", cord_stack);
-
-    let expected_tan = fp12_to_vec(tangent(p1, q));
-    let expected_cord = fp12_to_vec(cord(p1, p2, q));
-
-    assert_eq!(output_tan, expected_tan);
-    assert_eq!(output_cord, expected_cord);
-
-    Ok(())
-}
-
 #[test]
 fn test_miller() -> Result<()> {
     let ptr = U256::from(300);

From d4d80f35288067810c789dc5b7c14bc86f25e7c7 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 27 Dec 2022 16:02:55 -0800
Subject: [PATCH 087/201] rearrange

---
 evm/src/bn254.rs | 286 +++++++++++++++++++++++------------------------
 1 file changed, 143 insertions(+), 143 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 6b3026ef..33a5eb75 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -267,6 +267,149 @@ pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
     [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
 }
 
+fn frob_t1(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
+                .unwrap(),
+            U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
+                .unwrap(),
+            U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
+                .unwrap(),
+            U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+fn frob_t2(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
+                .unwrap(),
+            U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
+                .unwrap(),
+            U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
+                .unwrap(),
+            U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
+fn frob_z(n: usize) -> Fp2 {
+    match n {
+        0 => [
+            U256::from_str("0x1").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        1 => [
+            U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
+                .unwrap(),
+            U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        3 => [
+            U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
+                .unwrap(),
+            U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        5 => [
+            U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
+                .unwrap(),
+            U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
+                .unwrap(),
+        ],
+        6 => [
+            U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
+                .unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        7 => [
+            U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
+                .unwrap(),
+            U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
+                .unwrap(),
+        ],
+        8 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        9 => [
+            U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
+                .unwrap(),
+            U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
+                .unwrap(),
+        ],
+        10 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
+            U256::from_str("0x0").unwrap(),
+        ],
+        11 => [
+            U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
+                .unwrap(),
+            U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    }
+}
+
 // fn inv_fp2(a: Fp2) -> Fp2 {
 //     let [a0, a1] = a;
 //     let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
@@ -486,149 +629,6 @@ pub fn power(f: Fp12) -> Fp12 {
     mul_fp12(mul_fp12(y4, y2), y0)
 }
 
-fn frob_t1(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
-                .unwrap(),
-            U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
-                .unwrap(),
-            U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
-                .unwrap(),
-            U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    }
-}
-
-fn frob_t2(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
-                .unwrap(),
-            U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
-                .unwrap(),
-            U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
-                .unwrap(),
-            U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    }
-}
-
-fn frob_z(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
-                .unwrap(),
-            U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
-                .unwrap(),
-            U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
-                .unwrap(),
-            U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
-                .unwrap(),
-        ],
-        6 => [
-            U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        7 => [
-            U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
-                .unwrap(),
-            U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
-                .unwrap(),
-        ],
-        8 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        9 => [
-            U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
-                .unwrap(),
-            U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
-                .unwrap(),
-        ],
-        10 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        11 => [
-            U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
-                .unwrap(),
-            U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    }
-}
-
 pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
     let [px, py] = p;
     let [qx, qy] = q;

From 84fab8d6be0fa89f4bec44ad0721e32ae690ef61 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 27 Dec 2022 16:19:08 -0800
Subject: [PATCH 088/201] clean

---
 evm/src/bn254.rs                  | 36 ++++++++++++-------------------
 evm/src/cpu/kernel/tests/bn254.rs |  4 ++--
 2 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 33a5eb75..4081d7ab 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -75,14 +75,6 @@ fn gen_fp2() -> Fp2 {
     [gen_fp(), gen_fp()]
 }
 
-pub fn gen_curve_point() -> Curve {
-    gen_fp2()
-}
-
-pub fn gen_twisted_curve_point() -> TwistedCurve {
-    [gen_fp2(), gen_fp2()]
-}
-
 fn gen_fp6() -> Fp6 {
     [gen_fp2(), gen_fp2(), gen_fp2()]
 }
@@ -410,21 +402,21 @@ fn frob_z(n: usize) -> Fp2 {
     }
 }
 
-// fn inv_fp2(a: Fp2) -> Fp2 {
-//     let [a0, a1] = a;
-//     let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
-//     [mul_fp(norm, a0), neg_fp(mul_fp(norm, a1))]
-// }
+pub fn inv_fp2(a: Fp2) -> Fp2 {
+    let [a0, a1] = a;
+    let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
+    [mul_fp(norm, a0), neg_fp(mul_fp(norm, a1))]
+}
 
-// fn inv_fp6(c: Fp6) -> Fp6 {
-//     let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
-//     let e = mul_fp6(b, frob_fp6(5, c))[0];
-//     let n = mul_fp2(e, conj_fp2(e))[0];
-//     let i = inv_fp(n);
-//     let d = mul_fp2(embed_fp2(i), e);
-//     let [f0, f1, f2] = frob_fp6(1, b);
-//     [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
-// }
+pub fn inv_fp6(c: Fp6) -> Fp6 {
+    let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
+    let e = mul_fp6(b, frob_fp6(5, c))[0];
+    let n = mul_fp2(e, conj_fp2(e))[0];
+    let i = inv_fp(n);
+    let d = mul_fp2(embed_fp2(i), e);
+    let [f0, f1, f2] = frob_fp6(1, b);
+    [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
+}
 
 pub fn inv_fp12(f: Fp12) -> Fp12 {
     let [f0, f1] = f;
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 1831e910..a4710f08 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -4,8 +4,8 @@ use anyhow::Result;
 use ethereum_types::U256;
 
 use crate::bn254::{
-    cord, fp12_to_vec, frob_fp12, gen_curve_point, gen_fp12, gen_fp12_sparse,
-    gen_twisted_curve_point, miller_loop, mul_fp12, power, tangent, Curve, Fp12, TwistedCurve,
+    fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, miller_loop, mul_fp12, power, Curve, Fp12,
+    TwistedCurve,
 };
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::run_interpreter;

From 9beca707e52aa757aab34ec1b83bdbd0eaa68b97 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 27 Dec 2022 16:21:56 -0800
Subject: [PATCH 089/201] clean

---
 evm/src/generation/prover_input.rs | 34 ++++++++++++++----------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 7735412b..2b9860ca 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -205,18 +205,18 @@ impl EvmField {
 
     fn extop(&self, op: FieldExtOp, xs: Vec<U256>) -> U256 {
         match op {
-            FieldExtOp::ExtInv0 => self.ext_inv_component(0, xs),
-            FieldExtOp::ExtInv1 => self.ext_inv_component(1, xs),
-            FieldExtOp::ExtInv2 => self.ext_inv_component(2, xs),
-            FieldExtOp::ExtInv3 => self.ext_inv_component(3, xs),
-            FieldExtOp::ExtInv4 => self.ext_inv_component(4, xs),
-            FieldExtOp::ExtInv5 => self.ext_inv_component(5, xs),
-            FieldExtOp::ExtInv6 => self.ext_inv_component(6, xs),
-            FieldExtOp::ExtInv7 => self.ext_inv_component(7, xs),
-            FieldExtOp::ExtInv8 => self.ext_inv_component(8, xs),
-            FieldExtOp::ExtInv9 => self.ext_inv_component(9, xs),
-            FieldExtOp::ExtInv10 => self.ext_inv_component(10, xs),
-            FieldExtOp::ExtInv11 => self.ext_inv_component(11, xs),
+            FieldExtOp::ExtInv0 => self.ext_inv(0, xs),
+            FieldExtOp::ExtInv1 => self.ext_inv(1, xs),
+            FieldExtOp::ExtInv2 => self.ext_inv(2, xs),
+            FieldExtOp::ExtInv3 => self.ext_inv(3, xs),
+            FieldExtOp::ExtInv4 => self.ext_inv(4, xs),
+            FieldExtOp::ExtInv5 => self.ext_inv(5, xs),
+            FieldExtOp::ExtInv6 => self.ext_inv(6, xs),
+            FieldExtOp::ExtInv7 => self.ext_inv(7, xs),
+            FieldExtOp::ExtInv8 => self.ext_inv(8, xs),
+            FieldExtOp::ExtInv9 => self.ext_inv(9, xs),
+            FieldExtOp::ExtInv10 => self.ext_inv(10, xs),
+            FieldExtOp::ExtInv11 => self.ext_inv(11, xs),
         }
     }
 
@@ -237,13 +237,11 @@ impl EvmField {
         modexp(x, q, n)
     }
 
-    fn ext_inv(&self, xs: Vec<U256>, offset: usize) -> [U256; 12] {
+    fn ext_inv(&self, n: usize, xs: Vec<U256>) -> U256 {
+        let offset = 12 - n;
         let vec: Vec<U256> = xs[offset..].to_vec();
-        fp12_to_array(inv_fp12(vec_to_fp12(vec)))
-    }
-
-    fn ext_inv_component(&self, n: usize, xs: Vec<U256>) -> U256 {
-        Self::ext_inv(self, xs, 12 - n)[n]
+        let f = fp12_to_array(inv_fp12(vec_to_fp12(vec)));
+        f[n]
     }
 }
 

From 1f176734575ce31791ec2ffdab12c3b8979a23eb Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 27 Dec 2022 16:28:20 -0800
Subject: [PATCH 090/201] better vec to fp12

---
 evm/src/bn254.rs | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 4081d7ab..1ffe5211 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -1,5 +1,6 @@
 use std::str::FromStr;
 
+use itertools::Itertools;
 use ethereum_types::U256;
 use rand::{thread_rng, Rng};
 
@@ -25,23 +26,13 @@ pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
 }
 
 pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
-    let f0 = xs.clone().into_iter().next().unwrap();
-    let f1 = xs.clone().into_iter().nth(1).unwrap();
-    let f2 = xs.clone().into_iter().nth(2).unwrap();
-    let f3 = xs.clone().into_iter().nth(3).unwrap();
-    let f4 = xs.clone().into_iter().nth(4).unwrap();
-    let f5 = xs.clone().into_iter().nth(5).unwrap();
-    let f6 = xs.clone().into_iter().nth(6).unwrap();
-    let f7 = xs.clone().into_iter().nth(7).unwrap();
-    let f8 = xs.clone().into_iter().nth(8).unwrap();
-    let f9 = xs.clone().into_iter().nth(9).unwrap();
-    let f10 = xs.clone().into_iter().nth(10).unwrap();
-    let f11 = xs.into_iter().nth(11).unwrap();
-
-    [
-        [[f0, f1], [f2, f3], [f4, f5]],
-        [[f6, f7], [f8, f9], [f10, f11]],
-    ]
+    xs.into_iter()
+        .tuples::<(U256, U256)>()
+        .map(|(v1, v2)| [v1, v2])
+        .tuples()
+        .map(|(a1, a2, a3, a4, a5, a6)| [[a1, a2, a3], [a4, a5, a6]])
+        .next()
+        .unwrap()
 }
 
 pub type Curve = [Fp; 2];
@@ -715,3 +706,7 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
     }
     acc
 }
+
+// pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
+
+// }

From c0744d76b945526227035eafcc08ebcfa34b2964 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 27 Dec 2022 18:38:20 -0800
Subject: [PATCH 091/201] TATE TEST PASSES

---
 evm/src/bn254.rs                              | 39 +++++++-
 .../bn254/curve_arithmetic/miller_loop.asm    | 16 +--
 .../bn254/curve_arithmetic/tate_pairing.asm   | 44 ++++++---
 .../curve/bn254/field_arithmetic/inverse.asm  |  8 +-
 .../curve/bn254/field_arithmetic/power.asm    |  5 -
 .../curve/bn254/field_arithmetic/utils.asm    | 99 +++++++++++++++++++
 evm/src/cpu/kernel/tests/bn254.rs             | 56 ++++++-----
 7 files changed, 202 insertions(+), 65 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 1ffe5211..d07ab02e 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -1,7 +1,7 @@
 use std::str::FromStr;
 
-use itertools::Itertools;
 use ethereum_types::U256;
+use itertools::Itertools;
 use rand::{thread_rng, Rng};
 
 pub const BN_BASE: U256 = U256([
@@ -38,6 +38,27 @@ pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
 pub type Curve = [Fp; 2];
 pub type TwistedCurve = [Fp2; 2];
 
+pub fn curve_generator() -> Curve {
+    [U256::one(), U256::from(2)]
+}
+
+pub fn twisted_curve_generator() -> TwistedCurve {
+    [
+        [
+            U256::from_str("0x1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed")
+                .unwrap(),
+            U256::from_str("0x198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2")
+                .unwrap(),
+        ],
+        [
+            U256::from_str("0x12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa")
+                .unwrap(),
+            U256::from_str("0x90689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b")
+                .unwrap(),
+        ],
+    ]
+}
+
 const ZERO: Fp = U256([0, 0, 0, 0]);
 
 fn embed_fp2(x: Fp) -> Fp2 {
@@ -707,6 +728,18 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
     acc
 }
 
-// pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
+pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
+    let mut out = miller_loop(p, q);
 
-// }
+    let inv = inv_fp12(out);
+    out = frob_fp12(6, out);
+    out = mul_fp12(out, inv);
+
+    let acc = frob_fp12(2, out);
+    out = mul_fp12(out, acc);
+
+    let pow = power(out);
+    out = frob_fp12(3, out);
+
+    mul_fp12(out, pow)
+}
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index ba73fed1..3ef6ddb7 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -78,9 +78,9 @@ miller_final:
     %jump(miller_zero_final)
 miller_end:
     // stack: m, times, O, P, Q, out, retdest
-    %pop2  %pop2  %pop2  %pop4
+    %pop2  %pop2  %pop2  %pop4  POP
     // stack:                    out, retdest
-    SWAP1  JUMP 
+    JUMP 
 
 
 miller_one:
@@ -126,12 +126,6 @@ miller_zero_final:
 ///     out = mul_fp12_sparse(out, line)
 ///     O += O
 
-global test_mul_tan:
-    // stack: out, f, retdest, 0xnm, times, O, P, Q, out
-    %store_fp12
-    // stack:         retdest, 0xnm, times, O, P, Q, out
-    %jump(mul_tangent)
-
 mul_tangent:
     // stack:                                              retdest, 0xnm, times, O, P, Q, out
     PUSH mul_tangent_2  DUP13  PUSH mul_tangent_1
@@ -169,12 +163,6 @@ after_double:
 ///     out = mul_fp12_sparse(out, line)
 ///     O += P
 
-global test_mul_cord:
-    // stack: out, f, 0xnm, times, O, P, Q, out
-    %store_fp12
-    // stack:         0xnm, times, O, P, Q, out
-    %jump(mul_cord)
-
 mul_cord:
     // stack:                            0xnm, times, O, P, Q, out
     PUSH mul_cord_1
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 27185084..7e0f6d77 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -1,25 +1,35 @@
-/// def tate(P : [Fp; 2], Q: [Fp2; 2]) -> Fp12:
+/// def tate(P: Curve, Q: TwistedCurve) -> Fp12:
 ///     out = miller_loop(P, Q)
 ///
 ///     inv = inv_fp12(out)
-///     out = frob_fp12_6(out)
+///     out = frob_fp12(6, out)
 ///     out = mul_fp12(out, inv)
 ///
-///     acc = frob_fp12_2_(out)
+///     acc = frob_fp12(2, out)
 ///     out = mul_fp12(out, acc)
 ///
-///     pow = fast_exp(out)
-///     out = frob_fp12_3(out) 
+///     pow = power(out)
+///     out = frob_fp12(3, out) 
 ///     out = mul_fp12(out, pow)
 ///
 ///     return out
 
+global test_tate:
+    // stack: ptr, P, Q, ptr, out, retdest
+    %store_fp6
+    // stack:            ptr, out, retdest
+    %jump(tate)
+
 global tate:
-    // stack:           ptr, out,            retdest
-    PUSH post_mllr   SWAP2   SWAP1
-    // stack:           ptr, out, post_mllr, retdest
+    // stack:                      ptr, out, retdest
+    DUP2
+    // stack:                 out, ptr, out, retdest
+    PUSH post_mllr
+    // stack:      post_mllr, out, ptr, out, retdest
+    SWAP2
+    // stack:      ptr, out, post_mllr, out, retdest
     %jump(miller_init)
-global post_mllr:
+global post_mllr:    
     // stack:                           out, retdest
     PUSH tate_inv
     // stack:                 tate_inv, out, retdest
@@ -29,6 +39,8 @@ global post_mllr:
     // stack:       out, 100, tate_inv, out, retdest
     %jump(inv_fp12)
 tate_inv:
+    // stack:                           out, retdest  {100: inv}
+    %frob_fp12_6
     // stack:                           out, retdest  {100: inv}
     PUSH tate_mul1
     // stack:                tate_mul1, out, retdest  {100: inv}
@@ -38,8 +50,6 @@ tate_inv:
     // stack:      100, out, tate_mul1, out, retdest  {100: inv}
     DUP2
     // stack: out, 100, out, tate_mul1, out, retdest  {100: inv}
-    %frob_fp12_6
-    // stack: out, 100, out, tate_mul1, out, retdest  {100: inv}
     %jump(mul_fp12)
 tate_mul1:
     // stack:                           out, retdest  {100: inv}
@@ -52,7 +62,9 @@ tate_mul1:
     DUP2
     // stack: out, 100, out, tate_mul2, out, retdest  {100: inv}
     %frob_fp12_2_
-    // stack: out, 100, out, tate_mul2, out, retdest  {100: inv} 
+    // stack:      100, out, tate_mul2, out, retdest  {100: acc} 
+    DUP2
+    // stack: out, 100, out, tate_mul2, out, retdest  {100: acc}
     %jump(mul_fp12)
 tate_mul2: 
     // stack:                           out, retdest  {100: acc}
@@ -60,8 +72,12 @@ tate_mul2:
     // stack:                 post_pow, out, retdest  {100: acc}
     PUSH 100
     // stack:            100, post_pow, out, retdest  {100: acc}
-    DUP3
-    // stack:       out, 100, post_pow, out, retdest  {100: acc}
+    PUSH 300
+    // stack:       300, 100, post_pow, out, retdest  {100: acc}
+    DUP4
+    // stack:  out, 300, 100, post_pow, out, retdest  {100: acc}
+    %move_fp12
+    // stack:       300, 100, post_pow, out, retdest  {100: acc, 300: out}
     %jump(power)
 post_pow: 
     // stack:                           out, retdest  {100: pow}
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index e8815fc3..e2fbb17b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -40,14 +40,14 @@ global inv_fp12:
     // stack:         inv, f, ptr, inv, retdest
     POP  %pop4  %pop4  %pop4
     // stack:                 ptr, inv, retdest
-    PUSH 100  PUSH check_inv
-    // stack: check_inv, 100, ptr, inv, retdest 
+    PUSH 50  PUSH check_inv
+    // stack: check_inv, 50, ptr, inv, retdest 
     SWAP3  SWAP1  SWAP2
-    // stack: ptr, inv, 100, check_inv, retdest 
+    // stack: ptr, inv, 50, check_inv, retdest 
     %jump(mul_fp12)
 global check_inv:
     // stack:        retdest
-    PUSH 100  %load_fp12
+    PUSH 50  %load_fp12
     // stack: unit?, retdest
     %assert_eq_unit_fp12
     // stack:        retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index e83732a5..af0eb48b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -227,8 +227,3 @@ power_loop_0_end:
     PUSH 200  PUSH power_return  SWAP2  DUP2 
     // stack: 200, ptr, 200, power_return
     %jump(mul_fp12)
-
-%macro check(lbl)
-    PUSH $lbl
-    %jump(ret_stack)
-%endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
index 7a664a94..f134c1aa 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
@@ -972,6 +972,105 @@
     // stack:                                                            
 %endmacro
 
+%macro move_fp12
+    // stack:              ptr, ptr'
+    DUP1  
+    // stack:       ind00, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x00, ptr, ptr'
+    DUP3
+    // stack: ind00', x00, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    DUP1  %add_const(1)
+    // stack:       ind01, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x01, ptr, ptr'
+    DUP3  %add_const(1)
+    // stack: ind01', x01, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    DUP1  %add_const(2)
+    // stack:       ind02, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x02, ptr, ptr'
+    DUP3  %add_const(2)
+    // stack: ind02', x02, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    DUP1  %add_const(3)
+    // stack:       ind03, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x03, ptr, ptr'
+    DUP3  %add_const(3)
+    // stack: ind03', x03, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    DUP1  %add_const(4)
+    // stack:       ind04, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x04, ptr, ptr'
+    DUP3  %add_const(4)
+    // stack: ind04', x04, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    DUP1  %add_const(5)
+    // stack:       ind05, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x05, ptr, ptr'
+    DUP3  %add_const(5)
+    // stack: ind05', x05, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    DUP1  %add_const(6)
+    // stack:       ind06, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x06, ptr, ptr'
+    DUP3  %add_const(6)
+    // stack: ind06', x06, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    DUP1  %add_const(7)
+    // stack:       ind07, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x07, ptr, ptr'
+    DUP3  %add_const(7)
+    // stack: ind07', x07, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    DUP1  %add_const(8)
+    // stack:       ind08, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x08, ptr, ptr'
+    DUP3  %add_const(8)
+    // stack: ind08', x08, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    DUP1  %add_const(9)
+    // stack:       ind09, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x09, ptr, ptr'
+    DUP3  %add_const(9)
+    // stack: ind09', x09, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    DUP1  %add_const(10)
+    // stack:       ind10, ptr, ptr'
+    %mload_kernel_general
+    // stack:         x10, ptr, ptr'
+    DUP3  %add_const(10)
+    // stack: ind10', x10, ptr, ptr'
+    %mstore_kernel_general
+    // stack:              ptr, ptr'
+    %add_const(11)
+    // stack:            ind11, ptr'
+    %mload_kernel_general
+    // stack:              x11, ptr'
+    DUP2  %add_const(11)
+    // stack:      ind11', x11, ptr'
+    %mstore_kernel_general
+%endmacro
+
 global ret_stack:
     // stack: out
     %load_fp12
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index a4710f08..cc29dc43 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -4,8 +4,8 @@ use anyhow::Result;
 use ethereum_types::U256;
 
 use crate::bn254::{
-    fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, miller_loop, mul_fp12, power, Curve, Fp12,
-    TwistedCurve,
+    curve_generator, fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, miller_loop, mul_fp12,
+    power, tate, twisted_curve_generator, Curve, Fp12, TwistedCurve,
 };
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::run_interpreter;
@@ -110,9 +110,11 @@ fn test_inv_fp12() -> Result<()> {
 
     let f: Fp12 = gen_fp12();
 
-    let mut stack = vec![ptr];
-    stack.extend(fp12_to_vec(f));
-    stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
+    let stack = make_stack(vec![
+        vec![ptr],
+        fp12_to_vec(f),
+        vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()],
+    ]);
 
     let output: Vec<U256> = get_output("test_inv_fp12", stack);
 
@@ -142,38 +144,42 @@ fn test_power() -> Result<()> {
     Ok(())
 }
 
-#[test]
-fn test_miller() -> Result<()> {
+fn make_tate_stack(p: Curve, q: TwistedCurve) -> Vec<U256> {
     let ptr = U256::from(300);
     let out = U256::from(400);
 
-    let p: Curve = [U256::one(), U256::from(2)];
-    let q: TwistedCurve = [
-        [
-            U256::from_str("0x1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed")
-                .unwrap(),
-            U256::from_str("0x198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2")
-                .unwrap(),
-        ],
-        [
-            U256::from_str("0x12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa")
-                .unwrap(),
-            U256::from_str("0x90689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b")
-                .unwrap(),
-        ],
-    ];
-
     let p_: Vec<U256> = p.into_iter().collect();
     let q_: Vec<U256> = q.into_iter().flatten().collect();
 
     let ret_stack = make_label("ret_stack");
 
-    let initial_stack = make_stack(vec![vec![ptr], p_, q_, vec![ptr, out, ret_stack]]);
+    make_stack(vec![vec![ptr], p_, q_, vec![ptr, out, ret_stack, out]])
+}
 
-    let output = get_output("test_miller", initial_stack);
+#[test]
+fn test_miller() -> Result<()> {
+    let p: Curve = curve_generator();
+    let q: TwistedCurve = twisted_curve_generator();
+
+    let stack = make_tate_stack(p, q);
+    let output = get_output("test_miller", stack);
     let expected = fp12_to_vec(miller_loop(p, q));
 
     assert_eq!(output, expected);
 
     Ok(())
 }
+
+#[test]
+fn test_tate() -> Result<()> {
+    let p: Curve = curve_generator();
+    let q: TwistedCurve = twisted_curve_generator();
+
+    let stack = make_tate_stack(p, q);
+    let output = get_output("test_tate", stack);
+    let expected = fp12_to_vec(tate(p, q));
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}

From cca75c7713fa2e883b7981ffa296a5f0a00e1d54 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 27 Dec 2022 18:42:34 -0800
Subject: [PATCH 092/201] remove redundant definition

---
 evm/src/arithmetic/mod.rs | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/evm/src/arithmetic/mod.rs b/evm/src/arithmetic/mod.rs
index 1493b292..bb58edf3 100644
--- a/evm/src/arithmetic/mod.rs
+++ b/evm/src/arithmetic/mod.rs
@@ -1,7 +1,6 @@
-use std::str::FromStr;
-
 use ethereum_types::U256;
 
+use crate::bn254::BN_BASE;
 use crate::util::{addmod, mulmod, submod};
 
 mod add;
@@ -78,9 +77,9 @@ impl BinaryOperator {
                     input1 >> input0
                 }
             }
-            BinaryOperator::AddFp254 => addmod(input0, input1, bn_base_order()),
-            BinaryOperator::MulFp254 => mulmod(input0, input1, bn_base_order()),
-            BinaryOperator::SubFp254 => submod(input0, input1, bn_base_order()),
+            BinaryOperator::AddFp254 => addmod(input0, input1, BN_BASE),
+            BinaryOperator::MulFp254 => mulmod(input0, input1, BN_BASE),
+            BinaryOperator::SubFp254 => submod(input0, input1, BN_BASE),
         }
     }
 }
@@ -152,7 +151,3 @@ impl Operation {
         }
     }
 }
-
-fn bn_base_order() -> U256 {
-    U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47").unwrap()
-}

From de8637ce8cba53b0d7a036bc8264ab70d0bec460 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 29 Dec 2022 14:03:52 -0800
Subject: [PATCH 093/201] name

---
 evm/src/bn254.rs                                    |  7 +++++++
 .../asm/curve/bn254/field_arithmetic/fp12_mul.asm   |  6 +++---
 .../asm/curve/bn254/field_arithmetic/power.asm      |  4 ++--
 .../asm/curve/bn254/field_arithmetic/utils.asm      |  2 +-
 evm/src/cpu/kernel/tests/bn254.rs                   | 13 ++++++++-----
 5 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index d07ab02e..39332da4 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -731,6 +731,8 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
 pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
     let mut out = miller_loop(p, q);
 
+    println!("POSTMILLER: {:#?}", out);
+
     let inv = inv_fp12(out);
     out = frob_fp12(6, out);
     out = mul_fp12(out, inv);
@@ -738,7 +740,12 @@ pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
     let acc = frob_fp12(2, out);
     out = mul_fp12(out, acc);
 
+    println!("PREPOWER: {:#?}", out);
+
     let pow = power(out);
+
+    println!("POWER: {:#?}", pow);
+
     out = frob_fp12(3, out);
 
     mul_fp12(out, pow)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index 5c2471d9..a6ec278b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -1,11 +1,11 @@
 /// Note: uncomment this to test
 
 global test_mul_fp12:
-    // stack: inA, f, f', inB, g, g', mul_dest, inA, inB, out, ret_stack, out
+    // stack: inA, f, f', inB, g, g', mul_dest, inA, inB, out, return_fp12_on_stack, out
     %store_fp12
-    // stack:             inB, g, g', mul_dest, inA, inB, out, ret_stack, out
+    // stack:             inB, g, g', mul_dest, inA, inB, out, return_fp12_on_stack, out
     %store_fp12
-    // stack:                         mul_dest, inA, inB, out, ret_stack, out
+    // stack:                         mul_dest, inA, inB, out, return_fp12_on_stack, out
     JUMP
 
 ///////////////////////////////////////
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index af0eb48b..8fc28e9e 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -1,7 +1,7 @@
 global test_pow:
-    // stack: ptr, f, ptr, out, ret_stack, out
+    // stack: ptr, f, ptr, out, return_fp12_on_stack, out
     %store_fp12
-    // stack:         ptr, out, ret_stack, out
+    // stack:         ptr, out, return_fp12_on_stack, out
     %jump(power)
 
 /// def power(acc):
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
index f134c1aa..ab02f897 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
@@ -1071,7 +1071,7 @@
     %mstore_kernel_general
 %endmacro
 
-global ret_stack:
+global return_fp12_on_stack:
     // stack: out
     %load_fp12
     %jump(0xdeadbeef)
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index cc29dc43..3a4ddc1e 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -46,7 +46,7 @@ fn make_mul_stack(f: Fp12, g: Fp12, mul_label: &str) -> Vec<U256> {
             in0,
             in1,
             out,
-            make_label("ret_stack"),
+            make_label("return_fp12_on_stack"),
             out,
         ],
     ])
@@ -133,7 +133,7 @@ fn test_power() -> Result<()> {
     let stack = make_stack(vec![
         vec![ptr],
         fp12_to_vec(f),
-        vec![ptr, out, make_label("ret_stack"), out],
+        vec![ptr, out, make_label("return_fp12_on_stack"), out],
     ]);
 
     let output: Vec<U256> = get_output("test_pow", stack);
@@ -151,9 +151,12 @@ fn make_tate_stack(p: Curve, q: TwistedCurve) -> Vec<U256> {
     let p_: Vec<U256> = p.into_iter().collect();
     let q_: Vec<U256> = q.into_iter().flatten().collect();
 
-    let ret_stack = make_label("ret_stack");
-
-    make_stack(vec![vec![ptr], p_, q_, vec![ptr, out, ret_stack, out]])
+    make_stack(vec![
+        vec![ptr],
+        p_,
+        q_,
+        vec![ptr, out, make_label("return_fp12_on_stack"), out],
+    ])
 }
 
 #[test]

From 77798f889e6eddbcc73e3a08bc9ca326c9cd5df9 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 29 Dec 2022 14:12:03 -0800
Subject: [PATCH 094/201] remove loop endpoint

---
 evm/src/bn254.rs                                            | 3 +--
 .../kernel/asm/curve/bn254/curve_arithmetic/constants.asm   | 2 +-
 .../cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm   | 6 +++---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 39332da4..3e11e02f 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -444,8 +444,7 @@ pub fn inv_fp12(f: Fp12) -> Fp12 {
 }
 
 pub fn power(f: Fp12) -> Fp12 {
-    const EXPS4: [(usize, usize, usize); 65] = [
-        (1, 1, 1),
+    const EXPS4: [(usize, usize, usize); 64] = [
         (1, 1, 0),
         (1, 1, 1),
         (1, 1, 1),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
index 6c3e0bad..06853df5 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@@ -14,7 +14,7 @@ global power_data_4:
     BYTES 010, 100, 110, 010, 110, 100, 110, 101, 101, 001
     BYTES 001, 110, 110, 110, 010, 110, 101, 001, 010, 010
     BYTES 110, 110, 110, 010, 101, 110, 101, 010, 101, 001
-    BYTES 000, 111, 111, 110, 111
+    BYTES 000, 111, 111, 110
 
 global power_data_2:
     BYTES 11, 01, 11, 10, 11, 10, 01, 10, 00, 01
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index 8fc28e9e..af7072d2 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -27,8 +27,8 @@ global power:
     // stack:    1, 1, 1, ptr, out, retdest
     %mstore_kernel_general(200)  %mstore_kernel_general(224)  %mstore_kernel_general(212)
     // stack:             ptr, out, retdest  {200: y0, 212: y2, 224: y4}
-    PUSH 65  PUSH 62  PUSH 65
-    // stack: 65, 62, 65, ptr, out, retdest  {200: y0, 212: y2, 224: y4}
+    PUSH 65  PUSH 62  PUSH 64
+    // stack: 64, 62, 65, ptr, out, retdest  {200: y0, 212: y2, 224: y4}
     %jump(power_loop_4)
 
 power_return:
@@ -75,7 +75,7 @@ power_return_5:
     %jump(mul_fp12)
 
 /// def power_loop_4():
-///     for i in range(65):
+///     for i in range(64):
 ///         abc = load(i, power_data_4)
 ///         if a:
 ///             y4 *= acc

From de494dcf3b4dad24407d6002d996109083741e56 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 29 Dec 2022 14:12:36 -0800
Subject: [PATCH 095/201] remove prints

---
 evm/src/bn254.rs | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 3e11e02f..701ced16 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -730,8 +730,6 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
 pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
     let mut out = miller_loop(p, q);
 
-    println!("POSTMILLER: {:#?}", out);
-
     let inv = inv_fp12(out);
     out = frob_fp12(6, out);
     out = mul_fp12(out, inv);
@@ -739,13 +737,7 @@ pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
     let acc = frob_fp12(2, out);
     out = mul_fp12(out, acc);
 
-    println!("PREPOWER: {:#?}", out);
-
     let pow = power(out);
-
-    println!("POWER: {:#?}", pow);
-
     out = frob_fp12(3, out);
-
     mul_fp12(out, pow)
 }

From 2b91a1a664f99de158be6ef57ba734d72d6e2602 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 30 Dec 2022 20:08:22 -0800
Subject: [PATCH 096/201] simplify miller loop

---
 .../bn254/curve_arithmetic/constants.asm      |  14 +-
 .../bn254/curve_arithmetic/miller_loop.asm    | 138 ++++++++----------
 2 files changed, 65 insertions(+), 87 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
index 06853df5..7aa437a8 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@@ -1,11 +1,11 @@
 global miller_data:
-    BYTES 0x60, 0x12, 0x22, 0x11, 0x51, 0x54, 0x14, 0x11
-    BYTES 0x13, 0x12, 0x34, 0x11, 0x32, 0x21, 0x42, 0x14
-    BYTES 0x12, 0x14, 0x51, 0x22, 0x15, 0x11, 0x12, 0x31
-    BYTES 0x11, 0x24, 0x11, 0x11, 0x26, 0x16, 0x21, 0x21
-    BYTES 0x21, 0x11, 0x13, 0x15, 0x11, 0x34, 0x21, 0x12
-    BYTES 0x11, 0x17, 0x21, 0x23, 0x12, 0x34, 0x11, 0x32
-    BYTES 0x32, 0x12, 0x13, 0x22, 0x15
+    BYTES 0xdc, 0x22, 0x42, 0x21, 0xa1, 0xa4, 0x24, 0x21
+    BYTES 0x23, 0x22, 0x64, 0x21, 0x62, 0x41, 0x82, 0x24
+    BYTES 0x22, 0x24, 0xa1, 0x42, 0x25, 0x21, 0x22, 0x61
+    BYTES 0x21, 0x44, 0x21, 0x21, 0x46, 0x26, 0x41, 0x41
+    BYTES 0x41, 0x21, 0x23, 0x25, 0x21, 0x64, 0x41, 0x22
+    BYTES 0x21, 0x27, 0x41, 0x43, 0x22, 0x64, 0x21, 0x62
+    BYTES 0x62, 0x22, 0x23, 0x42, 0x25
 
 global power_data_4:
     BYTES 111, 010, 011, 111, 110, 101, 001, 100, 001, 100
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 3ef6ddb7..925f9f2b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -10,14 +10,14 @@
 /// def miller_loop():
 ///     while times:
 ///         0xnm = load(miller_data)
-///         while 0xnm > 0x10:
+///         while 0xnm > 0x20:
 ///             miller_one()
 ///         while 0xnm:
 ///             miller_zero()
 ///         times -= 1
 ///
 /// def miller_one():
-///     0xnm -= 0x10
+///     0xnm -= 0x20
 ///     mul_tangent()
 ///     mul_cord()
 ///
@@ -32,7 +32,7 @@
 /// (3) counting the lengths of runs of 1s then 0s in exp, e.g.
 ///     exp = 1100010011110 => EXP = [(2,3), (1,2), (4,1)]
 /// (4) encoding each pair (n,m) as 0xnm:
-///     miller_data = [(0x10)n + m for (n,m) in EXP]
+///     miller_data = [(0x20)n + m for (n,m) in EXP]
 
 global test_miller:
     // stack: ptr, P, Q, ptr, out, retdest
@@ -54,11 +54,13 @@ global miller_init:
     // stack:     O, P, Q, out, retdest
     PUSH 53
     // stack: 53, O, P, Q, out, retdest
+    PUSH 0 // this placeholder lets miller_loop start with POP
 global miller_loop:
+    POP
     // stack:          times  , O, P, Q, out, retdest
     DUP1  ISZERO
     // stack:  break?, times  , O, P, Q, out, retdest
-    %jumpi(miller_final)
+    %jumpi(miller_end)
     // stack:          times  , O, P, Q, out, retdest
     %sub_const(1)
     // stack:          times-1, O, P, Q, out, retdest
@@ -67,29 +69,19 @@ global miller_loop:
     %mload_kernel_code(miller_data)
     // stack:    0xnm, times-1, O, P, Q, out, retdest
     %jump(miller_one)
-
-miller_loop_pop:
-    POP  %jump(miller_loop)
-
-miller_final:
-    // stack:     0, O, P, Q, out, retdest
-    PUSH 28
-    // stack: 28, 0, O, P, Q, out, retdest
-    %jump(miller_zero_final)
 miller_end:
-    // stack: m, times, O, P, Q, out, retdest
-    %pop2  %pop2  %pop2  %pop4  POP
-    // stack:                    out, retdest
+    // stack: times, O, P, Q, out, retdest
+    POP  %pop2  %pop2  %pop4  POP
+    // stack:                      retdest
     JUMP 
 
-
 miller_one:
     // stack:               0xnm, times, O, P, Q, out, retdest
-    DUP1  %lt_const(0x10) 
+    DUP1  %lt_const(0x20) 
     // stack:        skip?, 0xnm, times, O, P, Q, out, retdest
     %jumpi(miller_zero)
     // stack:               0xnm, times, O, P, Q, out, retdest
-    %sub_const(0x10)
+    %sub_const(0x20)
     // stack:           0x{n-1}m, times, O, P, Q, out, retdest
     PUSH mul_cord
     // stack: mul_cord, 0x{n-1}m, times, O, P, Q, out, retdest
@@ -99,7 +91,7 @@ miller_zero:
     // stack:              m  , times, O, P, Q, out, retdest
     DUP1  ISZERO
     // stack:       skip?, m  , times, O, P, Q, out, retdest
-    %jumpi(miller_loop_pop)
+    %jumpi(miller_loop)
     // stack:              m  , times, O, P, Q, out, retdest
     %sub_const(1)
     // stack:              m-1, times, O, P, Q, out, retdest
@@ -107,18 +99,6 @@ miller_zero:
     // stack: miller_zero, m-1, times, O, P, Q, out, retdest
     %jump(mul_tangent)
 
-miller_zero_final:
-    // stack:                    m  , times, O, P, Q, out, retdest
-    DUP1  ISZERO
-    // stack:             skip?, m  , times, O, P, Q, out, retdest
-    %jumpi(miller_end)
-    // stack:                    m  , times, O, P, Q, out, retdest
-    %sub_const(1)
-    // stack:                    m-1, times, O, P, Q, out, retdest
-    PUSH miller_zero_final
-    // stack: miller_zero_final, m-1, times, O, P, Q, out, retdest
-    %jump(mul_tangent)
-
 
 /// def mul_tangent()
 ///     out = square_fp12(out)
@@ -157,7 +137,6 @@ after_double:
     // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
     JUMP
 
-
 /// def mul_cord()
 ///     line = cord(P, O, Q)
 ///     out = mul_fp12_sparse(out, line)
@@ -196,53 +175,6 @@ after_add:
     %jump(miller_one)
 
 
-/// def cord(p1x, p1y, p2x, p2y, qx, qy):
-///     return sparse_store(
-///         p1y*p2x - p2y*p1x, 
-///         (p2y - p1y) * qx, 
-///         (p1x - p2x) * qy,
-///     )
-
-%macro cord
-    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    DUP1  DUP5  MULFP254
-    // stack:           p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    DUP3  DUP5  MULFP254
-    // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    SUBFP254
-    // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    %mstore_kernel_general(100)
-    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    SWAP3
-    // stack:                    p2y , p1y, p2x , p1x, qx, qx_, qy, qy_
-    SUBFP254
-    // stack:                    p2y - p1y, p2x , p1x, qx, qx_, qy, qy_
-    SWAP2
-    // stack:                    p1x , p2x, p2y - p1y, qx, qx_, qy, qy_
-    SUBFP254
-    // stack:                    p1x - p2x, p2y - p1y, qx, qx_, qy, qy_
-    SWAP4
-    // stack:                    qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
-    DUP5  MULFP254
-    // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
-    %mstore_kernel_general(108)
-    // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
-    SWAP1
-    // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
-    DUP2  MULFP254
-    // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
-    %mstore_kernel_general(102)
-    // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
-    MULFP254
-    // stack:                            (p2y - p1y)qx_, p1x - p2x, qy_
-    %mstore_kernel_general(103)
-    // stack:                                            p1x - p2x, qy_
-    MULFP254
-    // stack:                                           (p1x - p2x)*qy_
-    %mstore_kernel_general(109)
-%endmacro
-
-
 /// def tangent(px, py, qx, qy):
 ///     return sparse_store(
 ///         py**2 - 9, 
@@ -290,3 +222,49 @@ after_add:
     // stack:                                (2py)*qy_ 
     %mstore_kernel_general(109)
 %endmacro
+
+/// def cord(p1x, p1y, p2x, p2y, qx, qy):
+///     return sparse_store(
+///         p1y*p2x - p2y*p1x, 
+///         (p2y - p1y) * qx, 
+///         (p1x - p2x) * qy,
+///     )
+
+%macro cord
+    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    DUP1  DUP5  MULFP254
+    // stack:           p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    DUP3  DUP5  MULFP254
+    // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    SUBFP254
+    // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    %mstore_kernel_general(100)
+    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    SWAP3
+    // stack:                    p2y , p1y, p2x , p1x, qx, qx_, qy, qy_
+    SUBFP254
+    // stack:                    p2y - p1y, p2x , p1x, qx, qx_, qy, qy_
+    SWAP2
+    // stack:                    p1x , p2x, p2y - p1y, qx, qx_, qy, qy_
+    SUBFP254
+    // stack:                    p1x - p2x, p2y - p1y, qx, qx_, qy, qy_
+    SWAP4
+    // stack:                    qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
+    DUP5  MULFP254
+    // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
+    %mstore_kernel_general(108)
+    // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
+    SWAP1
+    // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
+    DUP2  MULFP254
+    // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
+    %mstore_kernel_general(102)
+    // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
+    MULFP254
+    // stack:                            (p2y - p1y)qx_, p1x - p2x, qy_
+    %mstore_kernel_general(103)
+    // stack:                                            p1x - p2x, qy_
+    MULFP254
+    // stack:                                           (p1x - p2x)*qy_
+    %mstore_kernel_general(109)
+%endmacro

From 5cd86b6652f3f0a67fe2b0f99fff14fe7bded04f Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 30 Dec 2022 20:58:04 -0800
Subject: [PATCH 097/201] names and format

---
 .../bn254/curve_arithmetic/constants.asm      |  87 +++++++----
 .../bn254/curve_arithmetic/miller_loop.asm    |   4 +-
 .../bn254/curve_arithmetic/tate_pairing.asm   |  84 +++++------
 .../curve/bn254/field_arithmetic/fp12_mul.asm | 138 ++++++------------
 .../curve/bn254/field_arithmetic/power.asm    |   2 +-
 5 files changed, 153 insertions(+), 162 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
index 7aa437a8..014f4a9a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@@ -1,35 +1,70 @@
 global miller_data:
-    BYTES 0xdc, 0x22, 0x42, 0x21, 0xa1, 0xa4, 0x24, 0x21
-    BYTES 0x23, 0x22, 0x64, 0x21, 0x62, 0x41, 0x82, 0x24
-    BYTES 0x22, 0x24, 0xa1, 0x42, 0x25, 0x21, 0x22, 0x61
-    BYTES 0x21, 0x44, 0x21, 0x21, 0x46, 0x26, 0x41, 0x41
-    BYTES 0x41, 0x21, 0x23, 0x25, 0x21, 0x64, 0x41, 0x22
-    BYTES 0x21, 0x27, 0x41, 0x43, 0x22, 0x64, 0x21, 0x62
-    BYTES 0x62, 0x22, 0x23, 0x42, 0x25
+    BYTES 0xdc, 0x22, 0x42, 0x21
+    BYTES 0xa1, 0xa4, 0x24, 0x21
+    BYTES 0x23, 0x22, 0x64, 0x21
+    BYTES 0x62, 0x41, 0x82, 0x24
+    BYTES 0x22, 0x24, 0xa1, 0x42
+    BYTES 0x25, 0x21, 0x22, 0x61
+    BYTES 0x21, 0x44, 0x21, 0x21
+    BYTES 0x46, 0x26, 0x41, 0x41
+    BYTES 0x41, 0x21, 0x23, 0x25
+    BYTES 0x21, 0x64, 0x41, 0x22
+    BYTES 0x21, 0x27, 0x41, 0x43
+    BYTES 0x22, 0x64, 0x21, 0x62
+    BYTES 0x62, 0x22, 0x23, 0x42
+    BYTES 0x25
 
 global power_data_4:
-    BYTES 111, 010, 011, 111, 110, 101, 001, 100, 001, 100
-    BYTES 110, 110, 110, 011, 011, 101, 011, 101, 101, 111
-    BYTES 000, 011, 011, 001, 011, 001, 101, 100, 100, 000
-    BYTES 010, 100, 110, 010, 110, 100, 110, 101, 101, 001
-    BYTES 001, 110, 110, 110, 010, 110, 101, 001, 010, 010
-    BYTES 110, 110, 110, 010, 101, 110, 101, 010, 101, 001
+    BYTES 111, 010, 011, 111
+    BYTES 110, 101, 001, 100
+    BYTES 001, 100, 110, 110
+    BYTES 110, 011, 011, 101
+    BYTES 011, 101, 101, 111
+    BYTES 000, 011, 011, 001
+    BYTES 011, 001, 101, 100
+    BYTES 100, 000, 010, 100
+    BYTES 110, 010, 110, 100
+    BYTES 110, 101, 101, 001
+    BYTES 001, 110, 110, 110
+    BYTES 010, 110, 101, 001
+    BYTES 010, 010, 110, 110
+    BYTES 110, 010, 101, 110
+    BYTES 101, 010, 101, 001
     BYTES 000, 111, 111, 110
 
 global power_data_2:
-    BYTES 11, 01, 11, 10, 11, 10, 01, 10, 00, 01
-    BYTES 10, 11, 01, 11, 10, 01, 00, 00, 00, 01
-    BYTES 10, 01, 01, 10, 00, 01, 11, 00, 01, 00
-    BYTES 10, 11, 11, 00, 11, 10, 11, 00, 11, 01
-    BYTES 11, 11, 11, 01, 01, 00, 00, 11, 00, 11
-    BYTES 11, 01, 01, 10, 11, 10, 11, 10, 10, 00
+    BYTES 11, 01, 11, 10
+    BYTES 11, 10, 01, 10
+    BYTES 00, 01, 10, 11
+    BYTES 01, 11, 10, 01
+    BYTES 00, 00, 00, 01
+    BYTES 10, 01, 01, 10
+    BYTES 00, 01, 11, 00
+    BYTES 01, 00, 10, 11
+    BYTES 11, 00, 11, 10
+    BYTES 11, 00, 11, 01
+    BYTES 11, 11, 11, 01
+    BYTES 01, 00, 00, 11
+    BYTES 00, 11, 11, 01
+    BYTES 01, 10, 11, 10
+    BYTES 11, 10, 10, 00
     BYTES 11, 10
 
 global power_data_0:
-    BYTES 0, 1, 1, 0, 0, 1, 1, 1, 1, 0
-    BYTES 0, 0, 1, 0, 0, 1, 1, 0, 1, 0
-    BYTES 1, 1, 1, 1, 0, 0, 1, 1, 1, 0
-    BYTES 1, 0, 1, 0, 0, 0, 0, 0, 1, 1
-    BYTES 0, 1, 0, 1, 0, 0, 1, 0, 0, 0
-    BYTES 1, 0, 1, 1, 1, 0, 1, 0, 1, 1
-    BYTES 0, 0, 1, 0, 0
+    BYTES 0, 1, 1, 0
+    BYTES 0, 1, 1, 1
+    BYTES 1, 0, 0, 0
+    BYTES 1, 0, 0, 1
+    BYTES 1, 0, 1, 0
+    BYTES 1, 1, 1, 1
+    BYTES 0, 0, 1, 1
+    BYTES 1, 0, 1, 0
+    BYTES 1, 0, 0, 0
+    BYTES 0, 0, 1, 1
+    BYTES 0, 1, 0, 1
+    BYTES 0, 0, 1, 0
+    BYTES 0, 0, 1, 0
+    BYTES 1, 1, 1, 0
+    BYTES 1, 0, 1, 1
+    BYTES 0, 0, 1, 0
+    BYTES 0
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 925f9f2b..844b38a5 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -60,7 +60,7 @@ global miller_loop:
     // stack:          times  , O, P, Q, out, retdest
     DUP1  ISZERO
     // stack:  break?, times  , O, P, Q, out, retdest
-    %jumpi(miller_end)
+    %jumpi(miller_return)
     // stack:          times  , O, P, Q, out, retdest
     %sub_const(1)
     // stack:          times-1, O, P, Q, out, retdest
@@ -69,7 +69,7 @@ global miller_loop:
     %mload_kernel_code(miller_data)
     // stack:    0xnm, times-1, O, P, Q, out, retdest
     %jump(miller_one)
-miller_end:
+miller_return:
     // stack: times, O, P, Q, out, retdest
     POP  %pop2  %pop2  %pop4  POP
     // stack:                      retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 7e0f6d77..5957d02a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -21,70 +21,70 @@ global test_tate:
     %jump(tate)
 
 global tate:
-    // stack:                      ptr, out, retdest
+    // stack:                        ptr, out, retdest
     DUP2
-    // stack:                 out, ptr, out, retdest
-    PUSH post_mllr
-    // stack:      post_mllr, out, ptr, out, retdest
+    // stack:                   out, ptr, out, retdest
+    PUSH post_miller
+    // stack:      post_miller, out, ptr, out, retdest
     SWAP2
-    // stack:      ptr, out, post_mllr, out, retdest
+    // stack:      ptr, out, post_miller, out, retdest
     %jump(miller_init)
-global post_mllr:    
-    // stack:                           out, retdest
-    PUSH tate_inv
-    // stack:                 tate_inv, out, retdest
+global post_miller:    
+    // stack:                             out, retdest
+    PUSH tate_mul_1
+    // stack:                 tate_mul_1, out, retdest
     PUSH 100 
-    // stack:            100, tate_inv, out, retdest
+    // stack:            100, tate_mul_1, out, retdest
     DUP3 
-    // stack:       out, 100, tate_inv, out, retdest
+    // stack:       out, 100, tate_mul_1, out, retdest
     %jump(inv_fp12)
-tate_inv:
-    // stack:                           out, retdest  {100: inv}
+tate_mul_1:
+    // stack:                             out, retdest  {100: inv}
     %frob_fp12_6
-    // stack:                           out, retdest  {100: inv}
-    PUSH tate_mul1
-    // stack:                tate_mul1, out, retdest  {100: inv}
+    // stack:                             out, retdest  {100: inv}
+    PUSH tate_mul_2
+    // stack:                 tate_mul_2, out, retdest  {100: inv}
     DUP2
-    // stack:           out, tate_mul1, out, retdest  {100: inv}
+    // stack:            out, tate_mul_2, out, retdest  {100: inv}
     PUSH 100 
-    // stack:      100, out, tate_mul1, out, retdest  {100: inv}
+    // stack:       100, out, tate_mul_2, out, retdest  {100: inv}
     DUP2
-    // stack: out, 100, out, tate_mul1, out, retdest  {100: inv}
+    // stack:  out, 100, out, tate_mul_2, out, retdest  {100: inv}
     %jump(mul_fp12)
-tate_mul1:
-    // stack:                           out, retdest  {100: inv}
-    PUSH tate_mul2
-    // stack:                tate_mul2, out, retdest  {100: inv}
+tate_mul_2:
+    // stack:                             out, retdest  {100: inv}
+    PUSH tate_power
+    // stack:                 tate_power, out, retdest  {100: inv}
     DUP2
-    // stack:           out, tate_mul2, out, retdest  {100: inv}
+    // stack:            out, tate_power, out, retdest  {100: inv}
     PUSH 100
-    // stack:      100, out, tate_mul2, out, retdest  {100: inv}       
-    DUP2
-    // stack: out, 100, out, tate_mul2, out, retdest  {100: inv}
+    // stack:       100, out, tate_power, out, retdest  {100: inv}       
+    DUP2 
+    // stack:  out, 100, out, tate_power, out, retdest  {100: inv}
     %frob_fp12_2_
-    // stack:      100, out, tate_mul2, out, retdest  {100: acc} 
+    // stack:       100, out, tate_power, out, retdest  {100: acc} 
     DUP2
-    // stack: out, 100, out, tate_mul2, out, retdest  {100: acc}
+    // stack:  out, 100, out, tate_power, out, retdest  {100: acc}
     %jump(mul_fp12)
-tate_mul2: 
-    // stack:                           out, retdest  {100: acc}
-    PUSH post_pow
-    // stack:                 post_pow, out, retdest  {100: acc}
+tate_power: 
+    // stack:                             out, retdest  {100: acc}
+    PUSH tate_return
+    // stack:                tate_return, out, retdest  {100: acc}
     PUSH 100
-    // stack:            100, post_pow, out, retdest  {100: acc}
+    // stack:           100, tate_return, out, retdest  {100: acc}
     PUSH 300
-    // stack:       300, 100, post_pow, out, retdest  {100: acc}
+    // stack:      300, 100, tate_return, out, retdest  {100: acc}
     DUP4
-    // stack:  out, 300, 100, post_pow, out, retdest  {100: acc}
+    // stack: out, 300, 100, tate_return, out, retdest  {100: acc}
     %move_fp12
-    // stack:       300, 100, post_pow, out, retdest  {100: acc, 300: out}
+    // stack:      300, 100, tate_return, out, retdest  {100: acc, 300: out}
     %jump(power)
-post_pow: 
-    // stack:                           out, retdest  {100: pow}
+tate_return: 
+    // stack:                             out, retdest  {100: pow}
     PUSH 100
-    // stack:                      100, out, retdest  {100: pow}
+    // stack:                        100, out, retdest  {100: pow}
     DUP2
-    // stack:                 out, 100, out, retdest  {100: pow}
+    // stack:                   out, 100, out, retdest  {100: pow}
     %frob_fp12_3
-    // stack:                 out, 100, out, retdest  {100: pow}
+    // stack:                   out, 100, out, retdest  {100: pow}
     %jump(mul_fp12)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index a6ec278b..11e68887 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -63,14 +63,14 @@ global mul_fp12:
     // stack:                      inB', f', inA, inB, out 
     %load_fp6
     // stack:                        g', f', inA, inB, out 
-    PUSH ret_1
-    // stack:                 ret_1, g', f', inA, inB, out 
+    PUSH mul_fp12_1
+    // stack:            mul_fp12_1, g', f', inA, inB, out 
     %dup_fp6_7
-    // stack:             f', ret_1, g', f', inA, inB, out 
+    // stack:        f', mul_fp12_1, g', f', inA, inB, out 
     %dup_fp6_7
-    // stack:         g', f', ret_1, g', f', inA, inB, out 
+    // stack:    g', f', mul_fp12_1, g', f', inA, inB, out 
     %jump(mul_fp6)
-ret_1:
+mul_fp12_1:
     // stack:                f'g', g'  , f', inA, inB, out 
     %dup_fp6_0
     // stack:          f'g', f'g', g'  , f', inA, inB, out 
@@ -92,29 +92,29 @@ ret_1:
     // stack:           g+g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %swap_fp6_hole
     // stack:              g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    PUSH ret_2
-    // stack:       ret_2, g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    PUSH mul_fp12_2
+    // stack:  mul_fp12_2, g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     SWAP7
-    // stack:       inA, g, ret_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:  inA, g, mul_fp12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %load_fp6
-    // stack:         f, g, ret_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:    f, g, mul_fp12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %jump(mul_fp6)
-ret_2:    
+mul_fp12_2:    
     // stack:                  fg, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %store_fp6(12)
     // stack:                      g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %swap_fp6
     // stack:                      f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    PUSH ret_3
-    // stack:               ret_3, f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    PUSH mul_fp12_3
+    // stack:          mul_fp12_3, f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     SWAP13
-    // stack:               inA, f', g+g', ret_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:          inA, f', g+g', mul_fp12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %load_fp6
-    // stack:                  f,f', g+g', ret_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:             f,f', g+g', mul_fp12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %add_fp6
-    // stack:                  f+f', g+g', ret_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:             f+f', g+g', mul_fp12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %jump(mul_fp6)
-ret_3:
+mul_fp12_3:
     // stack:                       (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %load_fp6(12)
     // stack:                   fg, (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
@@ -272,50 +272,6 @@ global mul_fp12_sparse:
     // stack:                                                                    inA, inB, out
     %pop3  JUMP
 
-/// global mul_fp12_sparse_fast:
-///    // stack:                                                            inA, inB, out
-///    DUP2
-///    // stack:                                                       inB, inA, inB, out
-///    %load_fp12_sparse
-///    // stack:                                               g0, G1, G1', inA, inB, out
-///    DUP6  %offset_fp6
-///    // stack:                                         inA', g0, G1, G1', inA, inB, out
-///    %load_fp6
-///    // stack:                                           f', g0, G1, G1', inA, inB, out
-///    DUP12
-///    // stack:                                      inA, f', g0, G1, G1', inA, inB, out
-///    %load_fp6
-///    // stack:                                        f, f', g0, G1, G1', inA, inB, out
-///    %clone_mul_fp_fp6
-///    // stack:                                 (g0)f, f, f', g0, G1, G1', inA, inB, out
-///    %clone_mul_fp2_fp6_sh
-///    // stack:                     (G1)sh(f) , (g0)f, f, f', g0, G1, G1', inA, inB, out
-///    %add_fp6
-///    // stack:                     (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out
-///    %clone_mul_fp2_fp6_sh2
-///    // stack:      (G1')sh2(f') , (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
-///    %add_fp6
-///    // stack:      (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
-///    DUP26
-///    // stack: out, (G1')sh2(f') + (G1)sh(f) + (g0)f, f, f', g0, G1, G1', inA, inB, out 
-///    %store_fp6
-///    // stack:                                        f, f', g0, G1, G1', inA, inB, out 
-///    %semiclone_mul_fp2_fp6_sh
-///    // stack:                               (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    %clone_mul_fp2_fp6_sh
-///    // stack:                  (G1)sh(f') , (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    %add_fp6
-///    // stack:                  (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    %clone_mul_fp_fp6
-///    // stack:         (g0)f' , (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    %add_fp6
-///    // stack:         (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    DUP20  offset_fp6
-///    // stack:   out', (g0)f' + (G1)sh(f') + (G1')sh(f), f', g0, G1, G1', inA, inB, out 
-///    %store_fp6
-///    // stack:                                           f', g0, G1, G1', inA, inB, out 
-///    %pop14
-
 
 /////////////////////////
 ///// FP12 SQUARING /////
@@ -366,56 +322,56 @@ global square_fp12_test:
     %jump(square_fp12)
 
 global square_fp12:
-    // stack:                                                    inp, out
+    // stack:                                                                   inp, out
     DUP1
-    // stack:                                               inp, inp, out
+    // stack:                                                              inp, inp, out
     %load_fp6 
-    // stack:                                                 f, inp, out
-    PUSH post_sq2
-    // stack:                                       post_sq2, f, inp, out
+    // stack:                                                                f, inp, out
+    PUSH square_fp12_3
+    // stack:                                                 square_fp12_3, f, inp, out
     SWAP7
-    // stack:                                       inp, f, post_sq2, out
-    PUSH post_sq1
-    // stack:                             post_sq1, inp, f, post_sq2, out 
+    // stack:                                                 inp, f, square_fp12_3, out
+    PUSH square_fp12_2
+    // stack:                                  square_fp12_2, inp, f, square_fp12_3, out 
     %dup_fp6_2
-    // stack:                         f , post_sq1, inp, f, post_sq2, out
+    // stack:                              f , square_fp12_2, inp, f, square_fp12_3, out
     DUP16  %offset_fp6
-    // stack:                   out', f , post_sq1, inp, f, post_sq2, out
-    PUSH post_mul
-    // stack:         post_mul, out', f , post_sq1, inp, f, post_sq2, out
+    // stack:                        out', f , square_fp12_2, inp, f, square_fp12_3, out
+    PUSH square_fp12_1
+    // stack:         square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
     DUP10  %offset_fp6
-    // stack:   inp', post_mul, out', f , post_sq1, inp, f, post_sq2, out
+    // stack:   inp', square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
     %load_fp6
-    // stack:     f', post_mul, out', f , post_sq1, inp, f, post_sq2, out
+    // stack:     f', square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
     %swap_fp6_hole_2
-    // stack:     f , post_mul, out', f', post_sq1, inp, f, post_sq2, out
+    // stack:     f , square_fp12_1, out', f', square_fp12_2, inp, f, square_fp12_3, out
     %dup_fp6_8
-    // stack: f', f , post_mul, out', f', post_sq1, inp, f, post_sq2, out
+    // stack: f', f , square_fp12_1, out', f', square_fp12_2, inp, f, square_fp12_3, out
     %jump(mul_fp6)
-post_mul:
-    // stack:              f'f, out', f', post_sq1, inp, f, post_sq2, out
+square_fp12_1:
+    // stack:                   f'f, out', f', square_fp12_2, inp, f, square_fp12_3, out
     DUP7
-    // stack:        out', f'f, out', f', post_sq1, inp, f, post_sq2, out
+    // stack:             out', f'f, out', f', square_fp12_2, inp, f, square_fp12_3, out
     %store_fp6_double
-    // stack:                   out', f', post_sq1, inp, f, post_sq2, out
+    // stack:                        out', f', square_fp12_2, inp, f, square_fp12_3, out
     POP
-    // stack:                         f', post_sq1, inp, f, post_sq2, out
+    // stack:                              f', square_fp12_2, inp, f, square_fp12_3, out
     %jump(square_fp6)
-post_sq1:
-    // stack:                                 f'f', inp, f, post_sq2, out
+square_fp12_2:
+    // stack:                                           f'f', inp, f, square_fp12_3, out
     %sh
-    // stack:                             sh(f'f'), inp, f, post_sq2, out
+    // stack:                                       sh(f'f'), inp, f, square_fp12_3, out
     %swap_fp6_hole
-    // stack:                             f, inp, sh(f'f'), post_sq2, out
+    // stack:                                       f, inp, sh(f'f'), square_fp12_3, out
     SWAP6  SWAP13  SWAP6
-    // stack:                             f, post_sq2, sh(f'f'), inp, out
+    // stack:                                       f, square_fp12_3, sh(f'f'), inp, out
     %jump(square_fp6)
-post_sq2:
-    // stack:                                     ff , sh(f'f'), inp, out
+square_fp12_3:
+    // stack:                                                    ff , sh(f'f'), inp, out
     %add_fp6
-    // stack:                                     ff + sh(f'f'), inp, out
+    // stack:                                                    ff + sh(f'f'), inp, out
     DUP8
-    // stack:                                out, ff + sh(f'f'), inp, out
+    // stack:                                               out, ff + sh(f'f'), inp, out
     %store_fp6
-    // stack:                                                    inp, out
+    // stack:                                                                   inp, out
     %pop2  JUMP
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index af7072d2..a0d38a04 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -74,6 +74,7 @@ power_return_5:
     // stack:                      out, 212, out, retdest  {236: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 
+
 /// def power_loop_4():
 ///     for i in range(64):
 ///         abc = load(i, power_data_4)
@@ -197,7 +198,6 @@ power_loop_2_end:
     // stack: ptr, 212, 212, power_loop_0, k, ptr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 
-
 power_loop_0:
     // stack:                                 k  , ptr
     DUP1  ISZERO

From 698ab6e749f3bc5503fb5bf010f365ec075a3e53 Mon Sep 17 00:00:00 2001
From: Dima V <50062893+typ3c4t@users.noreply.github.com>
Date: Wed, 11 Jan 2023 05:43:56 -0800
Subject: [PATCH 098/201] Update evm/src/bn254.rs

Co-authored-by: Jacqueline Nabaglo <jakub@mirprotocol.org>
---
 evm/src/bn254.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 701ced16..01151ca7 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -5,10 +5,10 @@ use itertools::Itertools;
 use rand::{thread_rng, Rng};
 
 pub const BN_BASE: U256 = U256([
-    4332616871279656263,
-    10917124144477883021,
-    13281191951274694749,
-    3486998266802970665,
+    0x3c208c16d87cfd47,
+    0x97816a916871ca8d,
+    0xb85045b68181585d,
+    0x30644e72e131a029,
 ]);
 
 pub type Fp = U256;

From 068f74854a7eea1163593bf57ba519c0aca2faa8 Mon Sep 17 00:00:00 2001
From: Dima V <50062893+typ3c4t@users.noreply.github.com>
Date: Wed, 11 Jan 2023 05:45:08 -0800
Subject: [PATCH 099/201] Update evm/src/witness/util.rs

Co-authored-by: Jacqueline Nabaglo <jakub@mirprotocol.org>
---
 evm/src/witness/util.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs
index 697ebb80..eb8988a0 100644
--- a/evm/src/witness/util.rs
+++ b/evm/src/witness/util.rs
@@ -39,7 +39,7 @@ pub(crate) fn stack_peek<F: Field>(state: &GenerationState<F>, i: usize) -> Opti
     )))
 }
 
-/// Peak at the entire stack.
+/// Peek at the entire stack.
 pub(crate) fn stack_peeks<F: Field>(state: &GenerationState<F>) -> Option<Vec<U256>> {
     let n = state.registers.stack_len;
     let mut stack: Vec<U256> = vec![];

From 82ce8153ccf40f9570f80f30cb71429e465939af Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 11 Jan 2023 17:54:43 +0400
Subject: [PATCH 100/201] \n

---
 evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm | 1 -
 1 file changed, 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
index ab02f897..ea713056 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
@@ -1075,4 +1075,3 @@ global return_fp12_on_stack:
     // stack: out
     %load_fp12
     %jump(0xdeadbeef)
-    
\ No newline at end of file

From 42f98a09ae42ce048d1b85d0e4cac1ad0a90da85 Mon Sep 17 00:00:00 2001
From: Dima V <50062893+typ3c4t@users.noreply.github.com>
Date: Wed, 11 Jan 2023 05:58:26 -0800
Subject: [PATCH 101/201] Update evm/src/bn254.rs

Co-authored-by: Jacqueline Nabaglo <jakub@mirprotocol.org>
---
 evm/src/bn254.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 01151ca7..28bc27fb 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -59,7 +59,7 @@ pub fn twisted_curve_generator() -> TwistedCurve {
     ]
 }
 
-const ZERO: Fp = U256([0, 0, 0, 0]);
+const ZERO: Fp = U256::zero();
 
 fn embed_fp2(x: Fp) -> Fp2 {
     [x, ZERO]

From 32f24819dd332e7d33d8ef12d643ca93f8a55fcb Mon Sep 17 00:00:00 2001
From: Dima V <50062893+typ3c4t@users.noreply.github.com>
Date: Thu, 12 Jan 2023 20:36:47 -0800
Subject: [PATCH 102/201] Update
 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm

Co-authored-by: Jacqueline Nabaglo <jakub@mirprotocol.org>
---
 .../cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
index 3da6f931..e090e4e9 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@@ -249,7 +249,7 @@ global ec_double:
     // stack: retdest
     PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
     // stack: u256::MAX, retdest
-    PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+    DUP1
     // stack: u256::MAX, u256::MAX, retdest
     SWAP2
     // stack: retdest, u256::MAX, u256::MAX

From 70d7fb13522d678a2818125c38319ed82c4b5bce Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 13 Jan 2023 08:47:15 +0400
Subject: [PATCH 103/201] cleaner inv

---
 .../curve/bn254/field_arithmetic/inverse.asm  | 36 ++++-----
 evm/src/generation/prover_input.rs            | 78 +++++--------------
 2 files changed, 36 insertions(+), 78 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index e2fbb17b..3de39144 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -29,17 +29,17 @@ global test_inv_fp12:
     %jump(inv_fp12)
 
 global inv_fp12:
-    // stack:                 ptr, inv, retdest
+    // stack:                ptr, inv, retdest
     DUP1  %load_fp12
-    // stack:              f, ptr, inv, retdest
+    // stack:             f, ptr, inv, retdest
     DUP14
-    // stack:         inv, f, ptr, inv, retdest
+    // stack:        inv, f, ptr, inv, retdest
     %prover_inv_fp12
-    // stack:   f^-1, inv, f, ptr, inv, retdest
+    // stack:  f^-1, inv, f, ptr, inv, retdest
     DUP13  %store_fp12
-    // stack:         inv, f, ptr, inv, retdest
+    // stack:        inv, f, ptr, inv, retdest
     POP  %pop4  %pop4  %pop4
-    // stack:                 ptr, inv, retdest
+    // stack:                ptr, inv, retdest
     PUSH 50  PUSH check_inv
     // stack: check_inv, 50, ptr, inv, retdest 
     SWAP3  SWAP1  SWAP2
@@ -54,18 +54,18 @@ global check_inv:
     JUMP
 
 %macro prover_inv_fp12
-    PROVER_INPUT(ffe::bn254_base::ext_inv11)
-    PROVER_INPUT(ffe::bn254_base::ext_inv10)
-    PROVER_INPUT(ffe::bn254_base::ext_inv9)
-    PROVER_INPUT(ffe::bn254_base::ext_inv8)
-    PROVER_INPUT(ffe::bn254_base::ext_inv7)
-    PROVER_INPUT(ffe::bn254_base::ext_inv6)
-    PROVER_INPUT(ffe::bn254_base::ext_inv5)
-    PROVER_INPUT(ffe::bn254_base::ext_inv4)
-    PROVER_INPUT(ffe::bn254_base::ext_inv3)
-    PROVER_INPUT(ffe::bn254_base::ext_inv2)
-    PROVER_INPUT(ffe::bn254_base::ext_inv1)
-    PROVER_INPUT(ffe::bn254_base::ext_inv0)
+    PROVER_INPUT(ffe::bn254_base::ext_11)
+    PROVER_INPUT(ffe::bn254_base::ext_10)
+    PROVER_INPUT(ffe::bn254_base::ext_9)
+    PROVER_INPUT(ffe::bn254_base::ext_8)
+    PROVER_INPUT(ffe::bn254_base::ext_7)
+    PROVER_INPUT(ffe::bn254_base::ext_6)
+    PROVER_INPUT(ffe::bn254_base::ext_5)
+    PROVER_INPUT(ffe::bn254_base::ext_4)
+    PROVER_INPUT(ffe::bn254_base::ext_3)
+    PROVER_INPUT(ffe::bn254_base::ext_2)
+    PROVER_INPUT(ffe::bn254_base::ext_1)
+    PROVER_INPUT(ffe::bn254_base::ext_0)
 %endmacro
 
 %macro assert_eq_unit_fp12
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 2b9860ca..7352d6f1 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -7,10 +7,6 @@ use crate::bn254::{fp12_to_array, inv_fp12, vec_to_fp12};
 use crate::generation::prover_input::EvmField::{
     Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar,
 };
-use crate::generation::prover_input::FieldExtOp::{
-    ExtInv0, ExtInv1, ExtInv10, ExtInv11, ExtInv2, ExtInv3, ExtInv4, ExtInv5, ExtInv6, ExtInv7,
-    ExtInv8, ExtInv9,
-};
 use crate::generation::prover_input::FieldOp::{Inverse, Sqrt};
 use crate::generation::state::GenerationState;
 use crate::witness::util::{stack_peek, stack_peeks};
@@ -60,9 +56,25 @@ impl<F: Field> GenerationState<F> {
     /// Finite field extension operations.
     fn run_ffe(&self, input_fn: &ProverInputFn) -> U256 {
         let field = EvmField::from_str(input_fn.0[1].as_str()).unwrap();
-        let op = FieldExtOp::from_str(input_fn.0[2].as_str()).unwrap();
+        let component = input_fn.0[2].as_str();
         let xs = stack_peeks(self).expect("Empty stack");
-        field.extop(op, xs)
+        // TODO: This sucks... come back later
+        let n = match component {
+            "ext_0" => 0,
+            "ext_1" => 1,
+            "ext_2" => 2,
+            "ext_3" => 3,
+            "ext_4" => 4,
+            "ext_5" => 5,
+            "ext_6" => 6,
+            "ext_7" => 7,
+            "ext_8" => 8,
+            "ext_9" => 9,
+            "ext_10" => 10,
+            "ext_11" => 11,
+            _ => panic!("out of bounds")
+        };
+        field.ext_inv(n, xs)
     }
 
     /// MPT data.
@@ -114,21 +126,6 @@ enum FieldOp {
     Sqrt,
 }
 
-enum FieldExtOp {
-    ExtInv0,
-    ExtInv1,
-    ExtInv2,
-    ExtInv3,
-    ExtInv4,
-    ExtInv5,
-    ExtInv6,
-    ExtInv7,
-    ExtInv8,
-    ExtInv9,
-    ExtInv10,
-    ExtInv11,
-}
-
 impl FromStr for EvmField {
     type Err = ();
 
@@ -155,28 +152,6 @@ impl FromStr for FieldOp {
     }
 }
 
-impl FromStr for FieldExtOp {
-    type Err = ();
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        Ok(match s {
-            "ext_inv0" => ExtInv0,
-            "ext_inv1" => ExtInv1,
-            "ext_inv2" => ExtInv2,
-            "ext_inv3" => ExtInv3,
-            "ext_inv4" => ExtInv4,
-            "ext_inv5" => ExtInv5,
-            "ext_inv6" => ExtInv6,
-            "ext_inv7" => ExtInv7,
-            "ext_inv8" => ExtInv8,
-            "ext_inv9" => ExtInv9,
-            "ext_inv10" => ExtInv10,
-            "ext_inv11" => ExtInv11,
-            _ => panic!("Unrecognized field extension operation."),
-        })
-    }
-}
-
 impl EvmField {
     fn order(&self) -> U256 {
         match self {
@@ -203,23 +178,6 @@ impl EvmField {
         }
     }
 
-    fn extop(&self, op: FieldExtOp, xs: Vec<U256>) -> U256 {
-        match op {
-            FieldExtOp::ExtInv0 => self.ext_inv(0, xs),
-            FieldExtOp::ExtInv1 => self.ext_inv(1, xs),
-            FieldExtOp::ExtInv2 => self.ext_inv(2, xs),
-            FieldExtOp::ExtInv3 => self.ext_inv(3, xs),
-            FieldExtOp::ExtInv4 => self.ext_inv(4, xs),
-            FieldExtOp::ExtInv5 => self.ext_inv(5, xs),
-            FieldExtOp::ExtInv6 => self.ext_inv(6, xs),
-            FieldExtOp::ExtInv7 => self.ext_inv(7, xs),
-            FieldExtOp::ExtInv8 => self.ext_inv(8, xs),
-            FieldExtOp::ExtInv9 => self.ext_inv(9, xs),
-            FieldExtOp::ExtInv10 => self.ext_inv(10, xs),
-            FieldExtOp::ExtInv11 => self.ext_inv(11, xs),
-        }
-    }
-
     fn inverse(&self, x: U256) -> U256 {
         let n = self.order();
         assert!(x < n);

From 4f38c3a7312df395301728f178fc3fe0a767f205 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 13 Jan 2023 08:48:51 +0400
Subject: [PATCH 104/201] name

---
 evm/src/generation/prover_input.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 7352d6f1..81ec775f 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -74,7 +74,7 @@ impl<F: Field> GenerationState<F> {
             "ext_11" => 11,
             _ => panic!("out of bounds")
         };
-        field.ext_inv(n, xs)
+        field.inverse_fp12(n, xs)
     }
 
     /// MPT data.
@@ -195,7 +195,7 @@ impl EvmField {
         modexp(x, q, n)
     }
 
-    fn ext_inv(&self, n: usize, xs: Vec<U256>) -> U256 {
+    fn inverse_fp12(&self, n: usize, xs: Vec<U256>) -> U256 {
         let offset = 12 - n;
         let vec: Vec<U256> = xs[offset..].to_vec();
         let f = fp12_to_array(inv_fp12(vec_to_fp12(vec)));

From 81861095d307b8984c32c6a089755d945ea59adf Mon Sep 17 00:00:00 2001
From: Dima V <50062893+typ3c4t@users.noreply.github.com>
Date: Thu, 12 Jan 2023 20:57:54 -0800
Subject: [PATCH 105/201] Update
 evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm

Co-authored-by: Jacqueline Nabaglo <jakub@mirprotocol.org>
---
 .../curve/bn254/field_arithmetic/inverse.asm  | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index e2fbb17b..e881ddda 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -70,15 +70,15 @@ global check_inv:
 
 %macro assert_eq_unit_fp12
     %assert_eq_const(1)
-    %assert_eq_const(0)
-    %assert_eq_const(0)
-    %assert_eq_const(0)
-    %assert_eq_const(0)
-    %assert_eq_const(0)
-    %assert_eq_const(0)
-    %assert_eq_const(0)
-    %assert_eq_const(0)
-    %assert_eq_const(0)
-    %assert_eq_const(0)
-    %assert_eq_const(0)
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
 %endmacro

From 446a0d3f569ab65602801fe844f3dd1864ac6879 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 13 Jan 2023 08:58:17 +0400
Subject: [PATCH 106/201] name

---
 .../curve/bn254/field_arithmetic/inverse.asm  | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 3de39144..281670d2 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -54,18 +54,18 @@ global check_inv:
     JUMP
 
 %macro prover_inv_fp12
-    PROVER_INPUT(ffe::bn254_base::ext_11)
-    PROVER_INPUT(ffe::bn254_base::ext_10)
-    PROVER_INPUT(ffe::bn254_base::ext_9)
-    PROVER_INPUT(ffe::bn254_base::ext_8)
-    PROVER_INPUT(ffe::bn254_base::ext_7)
-    PROVER_INPUT(ffe::bn254_base::ext_6)
-    PROVER_INPUT(ffe::bn254_base::ext_5)
-    PROVER_INPUT(ffe::bn254_base::ext_4)
-    PROVER_INPUT(ffe::bn254_base::ext_3)
-    PROVER_INPUT(ffe::bn254_base::ext_2)
-    PROVER_INPUT(ffe::bn254_base::ext_1)
-    PROVER_INPUT(ffe::bn254_base::ext_0)
+    PROVER_INPUT(ffe::bn254_base::component_11)
+    PROVER_INPUT(ffe::bn254_base::component_10)
+    PROVER_INPUT(ffe::bn254_base::component_9)
+    PROVER_INPUT(ffe::bn254_base::component_8)
+    PROVER_INPUT(ffe::bn254_base::component_7)
+    PROVER_INPUT(ffe::bn254_base::component_6)
+    PROVER_INPUT(ffe::bn254_base::component_5)
+    PROVER_INPUT(ffe::bn254_base::component_4)
+    PROVER_INPUT(ffe::bn254_base::component_3)
+    PROVER_INPUT(ffe::bn254_base::component_2)
+    PROVER_INPUT(ffe::bn254_base::component_1)
+    PROVER_INPUT(ffe::bn254_base::component_0)
 %endmacro
 
 %macro assert_eq_unit_fp12

From 800ceb600007e9cd46de1e84adc97395e0ee15a2 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 13 Jan 2023 09:02:39 +0400
Subject: [PATCH 107/201] zero name

---
 evm/src/bn254.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/evm/src/bn254.rs b/evm/src/bn254.rs
index 28bc27fb..5f71c845 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254.rs
@@ -59,14 +59,14 @@ pub fn twisted_curve_generator() -> TwistedCurve {
     ]
 }
 
-const ZERO: Fp = U256::zero();
+const FP_ZERO: Fp = U256::zero();
 
 fn embed_fp2(x: Fp) -> Fp2 {
-    [x, ZERO]
+    [x, FP_ZERO]
 }
 
 fn embed_fp2_fp6(a: Fp2) -> Fp6 {
-    [a, embed_fp2(ZERO), embed_fp2(ZERO)]
+    [a, embed_fp2(FP_ZERO), embed_fp2(FP_ZERO)]
 }
 
 fn embed_fp6(x: Fp) -> Fp6 {
@@ -74,7 +74,7 @@ fn embed_fp6(x: Fp) -> Fp6 {
 }
 
 fn embed_fp12(x: Fp) -> Fp12 {
-    [embed_fp6(x), embed_fp6(ZERO)]
+    [embed_fp6(x), embed_fp6(FP_ZERO)]
 }
 
 fn gen_fp() -> Fp {
@@ -200,7 +200,7 @@ fn sub_fp6(c: Fp6, d: Fp6) -> Fp6 {
 }
 
 fn neg_fp6(a: Fp6) -> Fp6 {
-    sub_fp6(embed_fp6(ZERO), a)
+    sub_fp6(embed_fp6(FP_ZERO), a)
 }
 
 fn mul_fp6(c: Fp6, d: Fp6) -> Fp6 {
@@ -232,8 +232,8 @@ fn sh(c: Fp6) -> Fp6 {
 
 fn sparse_embed(g0: Fp, g1: Fp2, g2: Fp2) -> Fp12 {
     [
-        [embed_fp2(g0), g1, embed_fp2(ZERO)],
-        [embed_fp2(ZERO), g2, embed_fp2(ZERO)],
+        [embed_fp2(g0), g1, embed_fp2(FP_ZERO)],
+        [embed_fp2(FP_ZERO), g2, embed_fp2(FP_ZERO)],
     ]
 }
 

From 2a2880b7eab7eb9cb0f0087ae696bf15e24ca9aa Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 13 Jan 2023 09:06:23 +0400
Subject: [PATCH 108/201] name

---
 evm/src/cpu/kernel/tests/bn254.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 3a4ddc1e..3e193366 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -10,7 +10,7 @@ use crate::bn254::{
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::run_interpreter;
 
-fn make_label(lbl: &str) -> U256 {
+fn get_address_from_label(lbl: &str) -> U256 {
     U256::from(KERNEL.global_labels[lbl])
 }
 
@@ -42,11 +42,11 @@ fn make_mul_stack(f: Fp12, g: Fp12, mul_label: &str) -> Vec<U256> {
         vec![in1],
         fp12_to_vec(g),
         vec![
-            make_label(mul_label),
+            get_address_from_label(mul_label),
             in0,
             in1,
             out,
-            make_label("return_fp12_on_stack"),
+            get_address_from_label("return_fp12_on_stack"),
             out,
         ],
     ])
@@ -133,7 +133,7 @@ fn test_power() -> Result<()> {
     let stack = make_stack(vec![
         vec![ptr],
         fp12_to_vec(f),
-        vec![ptr, out, make_label("return_fp12_on_stack"), out],
+        vec![ptr, out, get_address_from_label("return_fp12_on_stack"), out],
     ]);
 
     let output: Vec<U256> = get_output("test_pow", stack);
@@ -155,7 +155,7 @@ fn make_tate_stack(p: Curve, q: TwistedCurve) -> Vec<U256> {
         vec![ptr],
         p_,
         q_,
-        vec![ptr, out, make_label("return_fp12_on_stack"), out],
+        vec![ptr, out, get_address_from_label("return_fp12_on_stack"), out],
     ])
 }
 

From ea8cfc95b0b6f65950fa5bba592cf645408352a4 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 13 Jan 2023 09:06:51 +0400
Subject: [PATCH 109/201] name

---
 evm/src/cpu/kernel/tests/bn254.rs  | 14 ++++++++++++--
 evm/src/generation/prover_input.rs |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 3e193366..75a6ae39 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -133,7 +133,12 @@ fn test_power() -> Result<()> {
     let stack = make_stack(vec![
         vec![ptr],
         fp12_to_vec(f),
-        vec![ptr, out, get_address_from_label("return_fp12_on_stack"), out],
+        vec![
+            ptr,
+            out,
+            get_address_from_label("return_fp12_on_stack"),
+            out,
+        ],
     ]);
 
     let output: Vec<U256> = get_output("test_pow", stack);
@@ -155,7 +160,12 @@ fn make_tate_stack(p: Curve, q: TwistedCurve) -> Vec<U256> {
         vec![ptr],
         p_,
         q_,
-        vec![ptr, out, get_address_from_label("return_fp12_on_stack"), out],
+        vec![
+            ptr,
+            out,
+            get_address_from_label("return_fp12_on_stack"),
+            out,
+        ],
     ])
 }
 
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 81ec775f..962bce7b 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -72,7 +72,7 @@ impl<F: Field> GenerationState<F> {
             "ext_9" => 9,
             "ext_10" => 10,
             "ext_11" => 11,
-            _ => panic!("out of bounds")
+            _ => panic!("out of bounds"),
         };
         field.inverse_fp12(n, xs)
     }

From 3fbc8bff3e77af70e598b0936cf4bd0e59ce1d4d Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 13 Jan 2023 09:09:27 +0400
Subject: [PATCH 110/201] move comment

---
 .../asm/curve/bn254/curve_arithmetic/constants.asm       | 9 +++++++++
 .../asm/curve/bn254/curve_arithmetic/miller_loop.asm     | 9 ---------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
index 014f4a9a..93878b57 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@@ -1,3 +1,12 @@
+/// the following is defined by
+/// (1) taking the binary expansion of N254, the order of the elliptic curve group
+/// (2) popping the first and last elements, then appending a 0:
+///     exp = bin(N254)[1:-1] + [0]
+/// (3) counting the lengths of runs of 1s then 0s in exp, e.g.
+///     if exp = 1100010011110, then EXP = [(2,3), (1,2), (4,1)]
+/// (4) encoding each pair (n,m) as 0xnm:
+///     miller_data = [(0x20)n + m for (n,m) in EXP]
+
 global miller_data:
     BYTES 0xdc, 0x22, 0x42, 0x21
     BYTES 0xa1, 0xa4, 0x24, 0x21
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 844b38a5..5d2104e6 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -25,15 +25,6 @@
 ///     0xnm -= 1
 ///     mul_tangent()
 
-/// Note: miller_data was defined by
-/// (1) taking the binary expansion of N254, the size of the elliptic curve
-/// (2) popping the first and last elements, then appending a 0:
-///     exp = bin(N254)[1:-1] + [0]
-/// (3) counting the lengths of runs of 1s then 0s in exp, e.g.
-///     exp = 1100010011110 => EXP = [(2,3), (1,2), (4,1)]
-/// (4) encoding each pair (n,m) as 0xnm:
-///     miller_data = [(0x20)n + m for (n,m) in EXP]
-
 global test_miller:
     // stack: ptr, P, Q, ptr, out, retdest
     %store_fp6

From ccd4a38ab4bb98dff5f52eb8cc14dc8a44ed2ae4 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 17 Jan 2023 15:57:46 +0700
Subject: [PATCH 111/201] remove make_stack

---
 evm/src/cpu/kernel/tests/bn254.rs  | 85 +++++++++++++-----------------
 evm/src/generation/prover_input.rs | 24 ++++-----
 2 files changed, 48 insertions(+), 61 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 75a6ae39..1f6a9638 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -14,14 +14,6 @@ fn get_address_from_label(lbl: &str) -> U256 {
     U256::from(KERNEL.global_labels[lbl])
 }
 
-fn make_stack(vecs: Vec<Vec<U256>>) -> Vec<U256> {
-    let mut stack = vec![];
-    for vec in vecs {
-        stack.extend(vec)
-    }
-    stack
-}
-
 fn get_output(lbl: &str, stack: Vec<U256>) -> Vec<U256> {
     let label = KERNEL.global_labels[lbl];
     let mut input = stack;
@@ -36,20 +28,19 @@ fn make_mul_stack(f: Fp12, g: Fp12, mul_label: &str) -> Vec<U256> {
     let in1 = U256::from(76);
     let out = U256::from(88);
 
-    make_stack(vec![
-        vec![in0],
-        fp12_to_vec(f),
-        vec![in1],
-        fp12_to_vec(g),
-        vec![
-            get_address_from_label(mul_label),
-            in0,
-            in1,
-            out,
-            get_address_from_label("return_fp12_on_stack"),
-            out,
-        ],
-    ])
+    let mut stack = vec![in0];
+    stack.extend(fp12_to_vec(f));
+    stack.extend(vec![in1]);
+    stack.extend(fp12_to_vec(g));
+    stack.extend(vec![
+        get_address_from_label(mul_label),
+        in0,
+        in1,
+        out,
+        get_address_from_label("return_fp12_on_stack"),
+        out,
+    ]);
+    stack
 }
 
 #[test]
@@ -83,7 +74,9 @@ fn test_frob_fp12() -> Result<()> {
 
     let f: Fp12 = gen_fp12();
 
-    let stack = make_stack(vec![vec![ptr], fp12_to_vec(f), vec![ptr]]);
+    let mut stack = vec![ptr];
+    stack.extend(fp12_to_vec(f));
+    stack.extend(vec![ptr]);
 
     let out_frob1: Vec<U256> = get_output("test_frob_fp12_1", stack.clone());
     let out_frob2: Vec<U256> = get_output("test_frob_fp12_2", stack.clone());
@@ -109,12 +102,9 @@ fn test_inv_fp12() -> Result<()> {
     let inv = U256::from(300);
 
     let f: Fp12 = gen_fp12();
-
-    let stack = make_stack(vec![
-        vec![ptr],
-        fp12_to_vec(f),
-        vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()],
-    ]);
+    let mut stack = vec![ptr];
+    stack.extend(fp12_to_vec(f));
+    stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
 
     let output: Vec<U256> = get_output("test_inv_fp12", stack);
 
@@ -130,15 +120,13 @@ fn test_power() -> Result<()> {
 
     let f: Fp12 = gen_fp12();
 
-    let stack = make_stack(vec![
-        vec![ptr],
-        fp12_to_vec(f),
-        vec![
-            ptr,
-            out,
-            get_address_from_label("return_fp12_on_stack"),
-            out,
-        ],
+    let mut stack = vec![ptr];
+    stack.extend(fp12_to_vec(f));
+    stack.extend(vec![
+        ptr,
+        out,
+        get_address_from_label("return_fp12_on_stack"),
+        out,
     ]);
 
     let output: Vec<U256> = get_output("test_pow", stack);
@@ -156,17 +144,16 @@ fn make_tate_stack(p: Curve, q: TwistedCurve) -> Vec<U256> {
     let p_: Vec<U256> = p.into_iter().collect();
     let q_: Vec<U256> = q.into_iter().flatten().collect();
 
-    make_stack(vec![
-        vec![ptr],
-        p_,
-        q_,
-        vec![
-            ptr,
-            out,
-            get_address_from_label("return_fp12_on_stack"),
-            out,
-        ],
-    ])
+    let mut stack = vec![ptr];
+    stack.extend(p_);
+    stack.extend(q_);
+    stack.extend(vec![
+        ptr,
+        out,
+        get_address_from_label("return_fp12_on_stack"),
+        out,
+    ]);
+    stack
 }
 
 #[test]
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 962bce7b..bcbc5558 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -60,18 +60,18 @@ impl<F: Field> GenerationState<F> {
         let xs = stack_peeks(self).expect("Empty stack");
         // TODO: This sucks... come back later
         let n = match component {
-            "ext_0" => 0,
-            "ext_1" => 1,
-            "ext_2" => 2,
-            "ext_3" => 3,
-            "ext_4" => 4,
-            "ext_5" => 5,
-            "ext_6" => 6,
-            "ext_7" => 7,
-            "ext_8" => 8,
-            "ext_9" => 9,
-            "ext_10" => 10,
-            "ext_11" => 11,
+            "component_0" => 0,
+            "component_1" => 1,
+            "component_2" => 2,
+            "component_3" => 3,
+            "component_4" => 4,
+            "component_5" => 5,
+            "component_6" => 6,
+            "component_7" => 7,
+            "component_8" => 8,
+            "component_9" => 9,
+            "component_10" => 10,
+            "component_11" => 11,
             _ => panic!("out of bounds"),
         };
         field.inverse_fp12(n, xs)

From 31c5db91a5c05a73507e760b43bef7e5ed8f93f7 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 17 Jan 2023 16:11:15 +0700
Subject: [PATCH 112/201] rename module

---
 evm/src/arithmetic/mod.rs                 | 2 +-
 evm/src/{bn254.rs => bn254_arithmetic.rs} | 5 +++++
 evm/src/bn254_curve_pairing.rs            | 0
 evm/src/cpu/kernel/interpreter.rs         | 2 +-
 evm/src/cpu/kernel/tests/bn254.rs         | 2 +-
 evm/src/generation/prover_input.rs        | 2 +-
 evm/src/lib.rs                            | 2 +-
 7 files changed, 10 insertions(+), 5 deletions(-)
 rename evm/src/{bn254.rs => bn254_arithmetic.rs} (99%)
 create mode 100644 evm/src/bn254_curve_pairing.rs

diff --git a/evm/src/arithmetic/mod.rs b/evm/src/arithmetic/mod.rs
index bb58edf3..8a71888b 100644
--- a/evm/src/arithmetic/mod.rs
+++ b/evm/src/arithmetic/mod.rs
@@ -1,6 +1,6 @@
 use ethereum_types::U256;
 
-use crate::bn254::BN_BASE;
+use crate::bn254_arithmetic::BN_BASE;
 use crate::util::{addmod, mulmod, submod};
 
 mod add;
diff --git a/evm/src/bn254.rs b/evm/src/bn254_arithmetic.rs
similarity index 99%
rename from evm/src/bn254.rs
rename to evm/src/bn254_arithmetic.rs
index 5f71c845..d7e3ca3e 100644
--- a/evm/src/bn254.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -1,3 +1,4 @@
+use std::ops::{Add, Div, Mul, Sub};
 use std::str::FromStr;
 
 use ethereum_types::U256;
@@ -11,6 +12,10 @@ pub const BN_BASE: U256 = U256([
     0x30644e72e131a029,
 ]);
 
+// pub struct Fp {
+//     val: U256,
+// }
+
 pub type Fp = U256;
 pub type Fp2 = [U256; 2];
 pub type Fp6 = [Fp2; 3];
diff --git a/evm/src/bn254_curve_pairing.rs b/evm/src/bn254_curve_pairing.rs
new file mode 100644
index 00000000..e69de29b
diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index 40fd7dbc..c8c5786b 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -7,7 +7,7 @@ use ethereum_types::{U256, U512};
 use keccak_hash::keccak;
 use plonky2::field::goldilocks_field::GoldilocksField;
 
-use crate::bn254::BN_BASE;
+use crate::bn254_arithmetic::BN_BASE;
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::constants::context_metadata::ContextMetadata;
 use crate::cpu::kernel::constants::global_metadata::GlobalMetadata;
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 1f6a9638..04b81675 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -3,7 +3,7 @@ use std::str::FromStr;
 use anyhow::Result;
 use ethereum_types::U256;
 
-use crate::bn254::{
+use crate::bn254_arithmetic::{
     curve_generator, fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, miller_loop, mul_fp12,
     power, tate, twisted_curve_generator, Curve, Fp12, TwistedCurve,
 };
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index bcbc5558..37363715 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -3,7 +3,7 @@ use std::str::FromStr;
 use ethereum_types::{BigEndianHash, H256, U256};
 use plonky2::field::types::Field;
 
-use crate::bn254::{fp12_to_array, inv_fp12, vec_to_fp12};
+use crate::bn254_arithmetic::{fp12_to_array, inv_fp12, vec_to_fp12};
 use crate::generation::prover_input::EvmField::{
     Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar,
 };
diff --git a/evm/src/lib.rs b/evm/src/lib.rs
index bd9ba261..226a8b77 100644
--- a/evm/src/lib.rs
+++ b/evm/src/lib.rs
@@ -8,7 +8,7 @@
 
 pub mod all_stark;
 pub mod arithmetic;
-pub mod bn254;
+pub mod bn254_arithmetic;
 pub mod config;
 pub mod constraint_consumer;
 pub mod cpu;

From be19cb81e3fb7736d302c02959d671fc3a0a201b Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 17 Jan 2023 23:58:36 +0700
Subject: [PATCH 113/201] struct impl style arithmetic

---
 evm/src/bn254_arithmetic.rs        | 1139 ++++++++++++----------------
 evm/src/bn254_curve_pairing.rs     |  322 ++++++++
 evm/src/cpu/kernel/tests/bn254.rs  |  187 +++--
 evm/src/generation/prover_input.rs |   17 +-
 4 files changed, 900 insertions(+), 765 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index d7e3ca3e..f6c192b7 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -1,8 +1,6 @@
-use std::ops::{Add, Div, Mul, Sub};
-use std::str::FromStr;
+use std::ops::{Add, Div, Mul, Neg, Sub};
 
 use ethereum_types::U256;
-use itertools::Itertools;
 use rand::{thread_rng, Rng};
 
 pub const BN_BASE: U256 = U256([
@@ -12,737 +10,554 @@ pub const BN_BASE: U256 = U256([
     0x30644e72e131a029,
 ]);
 
-// pub struct Fp {
-//     val: U256,
-// }
-
-pub type Fp = U256;
-pub type Fp2 = [U256; 2];
-pub type Fp6 = [Fp2; 3];
-pub type Fp12 = [Fp6; 2];
-
-pub fn fp12_to_vec(f: Fp12) -> Vec<U256> {
-    f.into_iter().flatten().flatten().collect()
+#[derive(Debug, Copy, Clone)]
+pub struct Fp {
+    val: U256,
 }
 
-pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
-    let [[[f0, f1], [f2, f3], [f4, f5]], [[f6, f7], [f8, f9], [f10, f11]]] = f;
-    [f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11]
+impl Add for Fp {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        Fp {
+            val: (self.val + other.val) % BN_BASE,
+        }
+    }
 }
 
-pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
-    xs.into_iter()
-        .tuples::<(U256, U256)>()
-        .map(|(v1, v2)| [v1, v2])
-        .tuples()
-        .map(|(a1, a2, a3, a4, a5, a6)| [[a1, a2, a3], [a4, a5, a6]])
-        .next()
-        .unwrap()
+impl Neg for Fp {
+    type Output = Self;
+
+    fn neg(self) -> Self::Output {
+        Fp {
+            val: (BN_BASE - self.val) % BN_BASE,
+        }
+    }
 }
 
-pub type Curve = [Fp; 2];
-pub type TwistedCurve = [Fp2; 2];
+impl Sub for Fp {
+    type Output = Self;
 
-pub fn curve_generator() -> Curve {
-    [U256::one(), U256::from(2)]
+    fn sub(self, other: Self) -> Self {
+        Fp {
+            val: (BN_BASE + self.val - other.val) % BN_BASE,
+        }
+    }
 }
 
-pub fn twisted_curve_generator() -> TwistedCurve {
-    [
-        [
-            U256::from_str("0x1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed")
-                .unwrap(),
-            U256::from_str("0x198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2")
-                .unwrap(),
-        ],
-        [
-            U256::from_str("0x12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa")
-                .unwrap(),
-            U256::from_str("0x90689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b")
-                .unwrap(),
-        ],
-    ]
+impl Mul for Fp {
+    type Output = Self;
+
+    fn mul(self, other: Self) -> Self {
+        Fp {
+            val: U256::try_from((self.val).full_mul(other.val) % BN_BASE).unwrap(),
+        }
+    }
 }
 
-const FP_ZERO: Fp = U256::zero();
+impl Div for Fp {
+    type Output = Self;
 
-fn embed_fp2(x: Fp) -> Fp2 {
-    [x, FP_ZERO]
+    fn div(self, rhs: Self) -> Self::Output {
+        let inv = exp_fp(self, BN_BASE - 2);
+        rhs * inv
+    }
 }
 
-fn embed_fp2_fp6(a: Fp2) -> Fp6 {
-    [a, embed_fp2(FP_ZERO), embed_fp2(FP_ZERO)]
-}
-
-fn embed_fp6(x: Fp) -> Fp6 {
-    embed_fp2_fp6(embed_fp2(x))
-}
-
-fn embed_fp12(x: Fp) -> Fp12 {
-    [embed_fp6(x), embed_fp6(FP_ZERO)]
-}
-
-fn gen_fp() -> Fp {
-    let mut rng = thread_rng();
-    let x64 = rng.gen::<u64>();
-    U256([x64, x64, x64, x64]) % BN_BASE
-}
-
-fn gen_fp2() -> Fp2 {
-    [gen_fp(), gen_fp()]
-}
-
-fn gen_fp6() -> Fp6 {
-    [gen_fp2(), gen_fp2(), gen_fp2()]
-}
-
-pub fn gen_fp12() -> Fp12 {
-    [gen_fp6(), gen_fp6()]
-}
-
-pub fn gen_fp12_sparse() -> Fp12 {
-    sparse_embed(gen_fp(), [gen_fp(), gen_fp()], [gen_fp(), gen_fp()])
-}
-
-fn add_fp(x: Fp, y: Fp) -> Fp {
-    (x + y) % BN_BASE
-}
-
-fn add3_fp(x: Fp, y: Fp, z: Fp) -> Fp {
-    (x + y + z) % BN_BASE
-}
-
-fn mul_fp(x: Fp, y: Fp) -> Fp {
-    U256::try_from(x.full_mul(y) % BN_BASE).unwrap()
-}
-
-fn sub_fp(x: Fp, y: Fp) -> Fp {
-    (BN_BASE + x - y) % BN_BASE
-}
-
-fn neg_fp(x: Fp) -> Fp {
-    (BN_BASE - x) % BN_BASE
-}
+const FP_ZERO: Fp = Fp { val: U256::zero() };
 
 fn exp_fp(x: Fp, e: U256) -> Fp {
     let mut current = x;
-    let mut product = U256::one();
+    let mut product = Fp { val: U256::one() };
 
     for j in 0..256 {
         if e.bit(j) {
-            product = U256::try_from(product.full_mul(current) % BN_BASE).unwrap();
+            product = product * current;
         }
-        current = U256::try_from(current.full_mul(current) % BN_BASE).unwrap();
+        current = current * current;
     }
     product
 }
 
-fn inv_fp(x: Fp) -> Fp {
-    exp_fp(x, BN_BASE - 2)
+#[derive(Debug, Copy, Clone)]
+pub struct Fp2 {
+    re: Fp,
+    im: Fp,
 }
 
-fn div_fp(x: Fp, y: Fp) -> Fp {
-    mul_fp(x, inv_fp(y))
+impl Add for Fp2 {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        Fp2 {
+            re: self.re + other.re,
+            im: self.im + other.im,
+        }
+    }
+}
+
+impl Neg for Fp2 {
+    type Output = Self;
+
+    fn neg(self) -> Self::Output {
+        Fp2 {
+            re: -self.re,
+            im: -self.im,
+        }
+    }
+}
+
+impl Sub for Fp2 {
+    type Output = Self;
+
+    fn sub(self, other: Self) -> Self {
+        Fp2 {
+            re: self.re - other.re,
+            im: self.im - other.im,
+        }
+    }
+}
+
+impl Mul for Fp2 {
+    type Output = Self;
+
+    fn mul(self, other: Self) -> Self {
+        Fp2 {
+            re: self.re * other.re - self.im * other.im,
+            im: self.re * other.im + self.im * other.re,
+        }
+    }
+}
+
+impl Div for Fp2 {
+    type Output = Self;
+
+    fn div(self, rhs: Self) -> Self::Output {
+        let norm = self.re * self.re + self.im * self.im;
+        let inv = Fp2 {
+            re: self.re / norm,
+            im: -self.im / norm,
+        };
+        rhs * inv
+    }
+}
+
+const FP2_ZERO: Fp2 = Fp2 {
+    re: FP_ZERO,
+    im: FP_ZERO,
+};
+
+fn flatten_fp2(a: Fp2) -> [U256; 2] {
+    [a.re.val, a.im.val]
+}
+
+fn embed_fp2(x: Fp) -> Fp2 {
+    Fp2 { re: x, im: FP_ZERO }
 }
 
 fn conj_fp2(a: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    [a, neg_fp(a_)]
-}
-
-fn add_fp2(a: Fp2, b: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [add_fp(a, b), add_fp(a_, b_)]
-}
-
-fn add3_fp2(a: Fp2, b: Fp2, c: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    let [c, c_] = c;
-    [add3_fp(a, b, c), add3_fp(a_, b_, c_)]
-}
-
-fn sub_fp2(a: Fp2, b: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [sub_fp(a, b), sub_fp(a_, b_)]
-}
-
-fn mul_fp2(a: Fp2, b: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let [b, b_] = b;
-    [
-        sub_fp(mul_fp(a, b), mul_fp(a_, b_)),
-        add_fp(mul_fp(a, b_), mul_fp(a_, b)),
-    ]
+    Fp2 {
+        re: a.re,
+        im: -a.im,
+    }
 }
 
 fn i9(a: Fp2) -> Fp2 {
-    let [a, a_] = a;
-    let nine = U256::from(9);
-    [sub_fp(mul_fp(nine, a), a_), add_fp(a, mul_fp(nine, a_))]
+    let nine = Fp { val: U256::from(9) };
+    Fp2 {
+        re: nine * a.re - a.im,
+        im: a.re + nine * a.im,
+    }
 }
 
-fn add_fp6(c: Fp6, d: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let [d0, d1, d2] = d;
-
-    let e0 = add_fp2(c0, d0);
-    let e1 = add_fp2(c1, d1);
-    let e2 = add_fp2(c2, d2);
-    [e0, e1, e2]
+#[derive(Debug, Copy, Clone)]
+pub struct Fp6 {
+    t0: Fp2,
+    t1: Fp2,
+    t2: Fp2,
 }
 
-fn sub_fp6(c: Fp6, d: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let [d0, d1, d2] = d;
+impl Add for Fp6 {
+    type Output = Self;
 
-    let e0 = sub_fp2(c0, d0);
-    let e1 = sub_fp2(c1, d1);
-    let e2 = sub_fp2(c2, d2);
-    [e0, e1, e2]
+    fn add(self, other: Self) -> Self {
+        Fp6 {
+            t0: self.t0 + other.t0,
+            t1: self.t1 + other.t1,
+            t2: self.t2 + other.t2,
+        }
+    }
 }
 
-fn neg_fp6(a: Fp6) -> Fp6 {
-    sub_fp6(embed_fp6(FP_ZERO), a)
+impl Neg for Fp6 {
+    type Output = Self;
+
+    fn neg(self) -> Self::Output {
+        Fp6 {
+            t0: -self.t0,
+            t1: -self.t1,
+            t2: -self.t2,
+        }
+    }
 }
 
-fn mul_fp6(c: Fp6, d: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let [d0, d1, d2] = d;
+impl Sub for Fp6 {
+    type Output = Self;
 
-    let c0d0 = mul_fp2(c0, d0);
-    let c0d1 = mul_fp2(c0, d1);
-    let c0d2 = mul_fp2(c0, d2);
-    let c1d0 = mul_fp2(c1, d0);
-    let c1d1 = mul_fp2(c1, d1);
-    let c1d2 = mul_fp2(c1, d2);
-    let c2d0 = mul_fp2(c2, d0);
-    let c2d1 = mul_fp2(c2, d1);
-    let c2d2 = mul_fp2(c2, d2);
-    let cd12 = add_fp2(c1d2, c2d1);
+    fn sub(self, other: Self) -> Self {
+        Fp6 {
+            t0: self.t0 - other.t0,
+            t1: self.t1 - other.t1,
+            t2: self.t2 - other.t2,
+        }
+    }
+}
 
-    [
-        add_fp2(c0d0, i9(cd12)),
-        add3_fp2(c0d1, c1d0, i9(c2d2)),
-        add3_fp2(c0d2, c1d1, c2d0),
-    ]
+impl Mul for Fp6 {
+    type Output = Self;
+
+    fn mul(self, other: Self) -> Self {
+        Fp6 {
+            t0: self.t0 * other.t0 + i9(self.t1 * other.t2 + self.t2 * other.t1),
+            t1: self.t0 * other.t1 + self.t1 * other.t0 + i9(self.t2 * other.t2),
+            t2: self.t0 * other.t2 + self.t1 * other.t1 + self.t2 * other.t0,
+        }
+    }
 }
 
 fn sh(c: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    [i9(c2), c0, c1]
+    Fp6 {
+        t0: i9(c.t2),
+        t1: c.t0,
+        t2: c.t1,
+    }
 }
 
-fn sparse_embed(g0: Fp, g1: Fp2, g2: Fp2) -> Fp12 {
+// impl Div for Fp6 {
+//     type Output = Self;
+
+//     fn div(self, rhs: Self) -> Self::Output {
+//         let b = frob_fp6(1, self) * frob_fp6(3, self);
+//         let e = (b * frob_fp6(5, self)).t0;
+//         let n = (e * conj_fp2(e)).re;
+//         let d = e / embed_fp2(n);
+//         let f = frob_fp6(1, b);
+//         let inv = Fp6 {
+//             t0: d * f.t0,
+//             t1: d * f.t1,
+//             t2: d * f.t2,
+//         };
+//         rhs * inv
+//     }
+// }
+
+// pub fn inv_fp6(c: Fp6) -> Fp6 {
+//     let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
+//     let e = mul_fp6(b, frob_fp6(5, c))[0];
+//     let n = mul_fp2(e, conj_fp2(e))[0];
+//     let i = inv_fp(n);
+//     let d = mul_fp2(embed_fp2(i), e);
+//     let [f0, f1, f2] = frob_fp6(1, b);
+//     [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
+// }
+
+#[derive(Debug, Copy, Clone)]
+pub struct Fp12 {
+    z0: Fp6,
+    z1: Fp6,
+}
+
+impl Mul for Fp12 {
+    type Output = Self;
+
+    fn mul(self, other: Self) -> Self {
+        let h0 = self.z0 * other.z0;
+        let h1 = self.z1 * other.z1;
+        let h01 = (self.z0 + self.z1) * (other.z0 + other.z1);
+        Fp12 {
+            z0: h0 + sh(h1),
+            z1: h01 - (h0 + h1),
+        }
+    }
+}
+
+fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
+    let g00 = Fp2 {
+        re: g000,
+        im: FP_ZERO,
+    };
+
+    let g0 = Fp6 {
+        t0: g00,
+        t1: g01,
+        t2: FP2_ZERO,
+    };
+
+    let g1 = Fp6 {
+        t0: FP2_ZERO,
+        t1: g11,
+        t2: FP2_ZERO,
+    };
+
+    Fp12 { z0: g0, z1: g1 }
+}
+
+// pub fn inv_fp12(f: Fp12) -> Fp12 {
+//     let [f0, f1] = f;
+//     let a = mul_fp12(frob_fp12(1, f), frob_fp12(7, f))[0];
+//     let b = mul_fp6(a, frob_fp6(2, a));
+//     let c = mul_fp6(b, frob_fp6(4, a))[0];
+//     let n = mul_fp2(c, conj_fp2(c))[0];
+//     let i = inv_fp(n);
+//     let d = mul_fp2(embed_fp2(i), c);
+//     let [g0, g1, g2] = frob_fp6(1, b);
+//     let e = [mul_fp2(d, g0), mul_fp2(d, g1), mul_fp2(d, g2)];
+//     [mul_fp6(e, f0), neg_fp6(mul_fp6(e, f1))]
+// }
+
+pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
     [
-        [embed_fp2(g0), g1, embed_fp2(FP_ZERO)],
-        [embed_fp2(FP_ZERO), g2, embed_fp2(FP_ZERO)],
+        f.z0.t0.re.val,
+        f.z0.t0.im.val,
+        f.z0.t1.re.val,
+        f.z0.t1.im.val,
+        f.z0.t2.re.val,
+        f.z0.t2.im.val,
+        f.z1.t0.re.val,
+        f.z1.t0.im.val,
+        f.z1.t1.re.val,
+        f.z1.t1.im.val,
+        f.z1.t2.re.val,
+        f.z1.t2.im.val,
     ]
 }
 
-pub fn mul_fp12(f: Fp12, g: Fp12) -> Fp12 {
-    let [f0, f1] = f;
-    let [g0, g1] = g;
-
-    let h0 = mul_fp6(f0, g0);
-    let h1 = mul_fp6(f1, g1);
-    let h01 = mul_fp6(add_fp6(f0, f1), add_fp6(g0, g1));
-    [add_fp6(h0, sh(h1)), sub_fp6(h01, add_fp6(h0, h1))]
+pub fn fp12_to_vec(f: Fp12) -> Vec<U256> {
+    fp12_to_array(f).into_iter().collect()
 }
 
-fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
-    let [c0, c1, c2] = c;
-    let _c0 = conj_fp2(c0);
-    let _c1 = conj_fp2(c1);
-    let _c2 = conj_fp2(c2);
+// pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
+//     xs.into_iter()
+//         .tuples::<(U256, U256)>()
+//         .map(|(v1, v2)| [v1, v2])
+//         .tuples()
+//         .map(|(a1, a2, a3, a4, a5, a6)| [[a1, a2, a3], [a4, a5, a6]])
+//         .next()
+//         .unwrap()
+// }
 
-    let n = n % 6;
-    let frob_t1 = frob_t1(n);
-    let frob_t2 = frob_t2(n);
+// fn embed_fp2(x: Fp) -> Fp2 {
+//     [x, FP_ZERO]
+// }
 
-    if n % 2 != 0 {
-        [_c0, mul_fp2(frob_t1, _c1), mul_fp2(frob_t2, _c2)]
-    } else {
-        [c0, mul_fp2(frob_t1, c1), mul_fp2(frob_t2, c2)]
+// fn embed_fp2_fp6(a: Fp2) -> Fp6 {
+//     [a, embed_fp2(FP_ZERO), embed_fp2(FP_ZERO)]
+// }
+
+// fn embed_fp6(x: Fp) -> Fp6 {
+//     embed_fp2_fp6(embed_fp2(x))
+// }
+
+// fn embed_fp12(x: Fp) -> Fp12 {
+//     [embed_fp6(x), embed_fp6(FP_ZERO)]
+// }
+
+fn gen_fp() -> Fp {
+    let mut rng = thread_rng();
+    let x64 = rng.gen::<u64>();
+    let x256 = U256([x64, x64, x64, x64]) % BN_BASE;
+    Fp { val: x256 }
+}
+
+fn gen_fp2() -> Fp2 {
+    Fp2 {
+        re: gen_fp(),
+        im: gen_fp(),
     }
 }
 
-pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
-    let [f0, f1] = f;
-    let scale = embed_fp2_fp6(frob_z(n));
-
-    [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
-}
-
-fn frob_t1(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
-                .unwrap(),
-            U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
-                .unwrap(),
-            U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
-                .unwrap(),
-            U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
-                .unwrap(),
-        ],
-        _ => panic!(),
+fn gen_fp6() -> Fp6 {
+    Fp6 {
+        t0: gen_fp2(),
+        t1: gen_fp2(),
+        t2: gen_fp2(),
     }
 }
 
-fn frob_t2(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
-                .unwrap(),
-            U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
-                .unwrap(),
-            U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
-                .unwrap(),
-            U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
-                .unwrap(),
-        ],
-        _ => panic!(),
+pub fn gen_fp12() -> Fp12 {
+    Fp12 {
+        z0: gen_fp6(),
+        z1: gen_fp6(),
     }
 }
 
-fn frob_z(n: usize) -> Fp2 {
-    match n {
-        0 => [
-            U256::from_str("0x1").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        1 => [
-            U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
-                .unwrap(),
-            U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        3 => [
-            U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
-                .unwrap(),
-            U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        5 => [
-            U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
-                .unwrap(),
-            U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
-                .unwrap(),
-        ],
-        6 => [
-            U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
-                .unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        7 => [
-            U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
-                .unwrap(),
-            U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
-                .unwrap(),
-        ],
-        8 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        9 => [
-            U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
-                .unwrap(),
-            U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
-                .unwrap(),
-        ],
-        10 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
-            U256::from_str("0x0").unwrap(),
-        ],
-        11 => [
-            U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
-                .unwrap(),
-            U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    }
+pub fn gen_fp12_sparse() -> Fp12 {
+    sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
 }
 
-pub fn inv_fp2(a: Fp2) -> Fp2 {
-    let [a0, a1] = a;
-    let norm = inv_fp(mul_fp(a0, a0) + mul_fp(a1, a1));
-    [mul_fp(norm, a0), neg_fp(mul_fp(norm, a1))]
-}
+// fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
+//     let [c0, c1, c2] = c;
+//     let _c0 = conj_fp2(c0);
+//     let _c1 = conj_fp2(c1);
+//     let _c2 = conj_fp2(c2);
 
-pub fn inv_fp6(c: Fp6) -> Fp6 {
-    let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
-    let e = mul_fp6(b, frob_fp6(5, c))[0];
-    let n = mul_fp2(e, conj_fp2(e))[0];
-    let i = inv_fp(n);
-    let d = mul_fp2(embed_fp2(i), e);
-    let [f0, f1, f2] = frob_fp6(1, b);
-    [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
-}
+//     let n = n % 6;
+//     let frob_t1 = frob_t1(n);
+//     let frob_t2 = frob_t2(n);
 
-pub fn inv_fp12(f: Fp12) -> Fp12 {
-    let [f0, f1] = f;
-    let a = mul_fp12(frob_fp12(1, f), frob_fp12(7, f))[0];
-    let b = mul_fp6(a, frob_fp6(2, a));
-    let c = mul_fp6(b, frob_fp6(4, a))[0];
-    let n = mul_fp2(c, conj_fp2(c))[0];
-    let i = inv_fp(n);
-    let d = mul_fp2(embed_fp2(i), c);
-    let [g0, g1, g2] = frob_fp6(1, b);
-    let e = [mul_fp2(d, g0), mul_fp2(d, g1), mul_fp2(d, g2)];
-    [mul_fp6(e, f0), neg_fp6(mul_fp6(e, f1))]
-}
+//     if n % 2 != 0 {
+//         [_c0, mul_fp2(frob_t1, _c1), mul_fp2(frob_t2, _c2)]
+//     } else {
+//         [c0, mul_fp2(frob_t1, c1), mul_fp2(frob_t2, c2)]
+//     }
+// }
 
-pub fn power(f: Fp12) -> Fp12 {
-    const EXPS4: [(usize, usize, usize); 64] = [
-        (1, 1, 0),
-        (1, 1, 1),
-        (1, 1, 1),
-        (0, 0, 0),
-        (0, 0, 1),
-        (1, 0, 1),
-        (0, 1, 0),
-        (1, 0, 1),
-        (1, 1, 0),
-        (1, 0, 1),
-        (0, 1, 0),
-        (1, 1, 0),
-        (1, 1, 0),
-        (1, 1, 0),
-        (0, 1, 0),
-        (0, 1, 0),
-        (0, 0, 1),
-        (1, 0, 1),
-        (1, 1, 0),
-        (0, 1, 0),
-        (1, 1, 0),
-        (1, 1, 0),
-        (1, 1, 0),
-        (0, 0, 1),
-        (0, 0, 1),
-        (1, 0, 1),
-        (1, 0, 1),
-        (1, 1, 0),
-        (1, 0, 0),
-        (1, 1, 0),
-        (0, 1, 0),
-        (1, 1, 0),
-        (1, 0, 0),
-        (0, 1, 0),
-        (0, 0, 0),
-        (1, 0, 0),
-        (1, 0, 0),
-        (1, 0, 1),
-        (0, 0, 1),
-        (0, 1, 1),
-        (0, 0, 1),
-        (0, 1, 1),
-        (0, 1, 1),
-        (0, 0, 0),
-        (1, 1, 1),
-        (1, 0, 1),
-        (1, 0, 1),
-        (0, 1, 1),
-        (1, 0, 1),
-        (0, 1, 1),
-        (0, 1, 1),
-        (1, 1, 0),
-        (1, 1, 0),
-        (1, 1, 0),
-        (1, 0, 0),
-        (0, 0, 1),
-        (1, 0, 0),
-        (0, 0, 1),
-        (1, 0, 1),
-        (1, 1, 0),
-        (1, 1, 1),
-        (0, 1, 1),
-        (0, 1, 0),
-        (1, 1, 1),
-    ];
+// pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
+//     let [f0, f1] = f;
+//     let scale = embed_fp2_fp6(frob_z(n));
 
-    const EXPS2: [(usize, usize); 62] = [
-        (1, 0),
-        (1, 1),
-        (0, 0),
-        (1, 0),
-        (1, 0),
-        (1, 1),
-        (1, 0),
-        (1, 1),
-        (1, 0),
-        (0, 1),
-        (0, 1),
-        (1, 1),
-        (1, 1),
-        (0, 0),
-        (1, 1),
-        (0, 0),
-        (0, 0),
-        (0, 1),
-        (0, 1),
-        (1, 1),
-        (1, 1),
-        (1, 1),
-        (0, 1),
-        (1, 1),
-        (0, 0),
-        (1, 1),
-        (1, 0),
-        (1, 1),
-        (0, 0),
-        (1, 1),
-        (1, 1),
-        (1, 0),
-        (0, 0),
-        (0, 1),
-        (0, 0),
-        (1, 1),
-        (0, 1),
-        (0, 0),
-        (1, 0),
-        (0, 1),
-        (0, 1),
-        (1, 0),
-        (0, 1),
-        (0, 0),
-        (0, 0),
-        (0, 0),
-        (0, 1),
-        (1, 0),
-        (1, 1),
-        (0, 1),
-        (1, 1),
-        (1, 0),
-        (0, 1),
-        (0, 0),
-        (1, 0),
-        (0, 1),
-        (1, 0),
-        (1, 1),
-        (1, 0),
-        (1, 1),
-        (0, 1),
-        (1, 1),
-    ];
+//     [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
+// }
 
-    const EXPS0: [usize; 65] = [
-        0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,
-        0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1,
-        0, 0, 1, 1, 0,
-    ];
+// fn frob_t1(n: usize) -> Fp2 {
+//     match n {
+//         0 => [
+//             U256::from_str("0x1").unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         1 => [
+//             U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
+//                 .unwrap(),
+//             U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
+//                 .unwrap(),
+//         ],
+//         2 => [
+//             U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+//                 .unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         3 => [
+//             U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
+//                 .unwrap(),
+//             U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
+//                 .unwrap(),
+//         ],
+//         4 => [
+//             U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         5 => [
+//             U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
+//                 .unwrap(),
+//             U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
+//                 .unwrap(),
+//         ],
+//         _ => panic!(),
+//     }
+// }
 
-    let mut sq: Fp12 = f;
-    let mut y0: Fp12 = embed_fp12(U256::one());
-    let mut y2: Fp12 = embed_fp12(U256::one());
-    let mut y4: Fp12 = embed_fp12(U256::one());
+// fn frob_t2(n: usize) -> Fp2 {
+//     match n {
+//         0 => [
+//             U256::from_str("0x1").unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         1 => [
+//             U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
+//                 .unwrap(),
+//             U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
+//                 .unwrap(),
+//         ],
+//         2 => [
+//             U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         3 => [
+//             U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
+//                 .unwrap(),
+//             U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
+//                 .unwrap(),
+//         ],
+//         4 => [
+//             U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+//                 .unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         5 => [
+//             U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
+//                 .unwrap(),
+//             U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
+//                 .unwrap(),
+//         ],
+//         _ => panic!(),
+//     }
+// }
 
-    for (a, b, c) in EXPS4 {
-        if a != 0 {
-            y4 = mul_fp12(y4, sq);
-        }
-        if b != 0 {
-            y2 = mul_fp12(y2, sq);
-        }
-        if c != 0 {
-            y0 = mul_fp12(y0, sq);
-        }
-        sq = mul_fp12(sq, sq);
-    }
-    y4 = mul_fp12(y4, sq);
-
-    for (a, b) in EXPS2 {
-        if a != 0 {
-            y2 = mul_fp12(y2, sq);
-        }
-        if b != 0 {
-            y0 = mul_fp12(y0, sq);
-        }
-        sq = mul_fp12(sq, sq);
-    }
-    y2 = mul_fp12(y2, sq);
-
-    for a in EXPS0 {
-        if a != 0 {
-            y0 = mul_fp12(y0, sq);
-        }
-        sq = mul_fp12(sq, sq);
-    }
-    y0 = mul_fp12(y0, sq);
-
-    y0 = inv_fp12(y0);
-
-    y4 = mul_fp12(y4, y2);
-    y4 = mul_fp12(y4, y2);
-    y4 = mul_fp12(y4, y0);
-
-    y4 = frob_fp12(1, y4);
-    y2 = frob_fp12(2, y2);
-
-    mul_fp12(mul_fp12(y4, y2), y0)
-}
-
-pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
-    let [px, py] = p;
-    let [qx, qy] = q;
-
-    let cx = neg_fp(mul_fp(U256::from(3), mul_fp(px, px)));
-    let cy = mul_fp(U256::from(2), py);
-
-    sparse_embed(
-        sub_fp(mul_fp(py, py), U256::from(9)),
-        mul_fp2(embed_fp2(cx), qx),
-        mul_fp2(embed_fp2(cy), qy),
-    )
-}
-
-pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
-    let [p1x, p1y] = p1;
-    let [p2x, p2y] = p2;
-    let [qx, qy] = q;
-
-    let cx = sub_fp(p2y, p1y);
-    let cy = sub_fp(p1x, p2x);
-
-    sparse_embed(
-        sub_fp(mul_fp(p1y, p2x), mul_fp(p2y, p1x)),
-        mul_fp2(embed_fp2(cx), qx),
-        mul_fp2(embed_fp2(cy), qy),
-    )
-}
-
-fn tangent_slope(p: Curve) -> Fp {
-    let [px, py] = p;
-    let num = mul_fp(mul_fp(px, px), U256::from(3));
-    let denom = mul_fp(py, U256::from(2));
-    div_fp(num, denom)
-}
-
-fn cord_slope(p: Curve, q: Curve) -> Fp {
-    let [px, py] = p;
-    let [qx, qy] = q;
-    let num = sub_fp(qy, py);
-    let denom = sub_fp(qx, px);
-    div_fp(num, denom)
-}
-
-fn third_point(m: Fp, p: Curve, q: Curve) -> Curve {
-    let [px, py] = p;
-    let [qx, _] = q;
-    let ox = sub_fp(mul_fp(m, m), add_fp(px, qx));
-    let oy = sub_fp(mul_fp(m, sub_fp(px, ox)), py);
-    [ox, oy]
-}
-
-fn curve_add(p: Curve, q: Curve) -> Curve {
-    if p == q {
-        curve_double(p)
-    } else {
-        third_point(cord_slope(p, q), p, q)
-    }
-}
-
-fn curve_double(p: Curve) -> Curve {
-    third_point(tangent_slope(p), p, p)
-}
-
-pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
-    const EXP: [usize; 253] = [
-        1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1,
-        1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
-        1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
-        1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
-        1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,
-        1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
-        0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
-        1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    ];
-
-    let mut o = p;
-    let mut acc = embed_fp12(U256::one());
-    let mut line;
-
-    for i in EXP {
-        acc = mul_fp12(acc, acc);
-        line = tangent(o, q);
-        acc = mul_fp12(line, acc);
-        o = curve_double(o);
-        if i != 0 {
-            line = cord(p, o, q);
-            acc = mul_fp12(line, acc);
-            o = curve_add(p, o);
-        }
-    }
-    acc
-}
-
-pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
-    let mut out = miller_loop(p, q);
-
-    let inv = inv_fp12(out);
-    out = frob_fp12(6, out);
-    out = mul_fp12(out, inv);
-
-    let acc = frob_fp12(2, out);
-    out = mul_fp12(out, acc);
-
-    let pow = power(out);
-    out = frob_fp12(3, out);
-    mul_fp12(out, pow)
-}
+// fn frob_z(n: usize) -> Fp2 {
+//     match n {
+//         0 => [
+//             U256::from_str("0x1").unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         1 => [
+//             U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
+//                 .unwrap(),
+//             U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
+//                 .unwrap(),
+//         ],
+//         2 => [
+//             U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
+//                 .unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         3 => [
+//             U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
+//                 .unwrap(),
+//             U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
+//                 .unwrap(),
+//         ],
+//         4 => [
+//             U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+//                 .unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         5 => [
+//             U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
+//                 .unwrap(),
+//             U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
+//                 .unwrap(),
+//         ],
+//         6 => [
+//             U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
+//                 .unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         7 => [
+//             U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
+//                 .unwrap(),
+//             U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
+//                 .unwrap(),
+//         ],
+//         8 => [
+//             U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         9 => [
+//             U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
+//                 .unwrap(),
+//             U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
+//                 .unwrap(),
+//         ],
+//         10 => [
+//             U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
+//             U256::from_str("0x0").unwrap(),
+//         ],
+//         11 => [
+//             U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
+//                 .unwrap(),
+//             U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
+//                 .unwrap(),
+//         ],
+//         _ => panic!(),
+//     }
+// }
diff --git a/evm/src/bn254_curve_pairing.rs b/evm/src/bn254_curve_pairing.rs
index e69de29b..dba6c5dd 100644
--- a/evm/src/bn254_curve_pairing.rs
+++ b/evm/src/bn254_curve_pairing.rs
@@ -0,0 +1,322 @@
+// pub type Curve = [Fp; 2];
+// pub type TwistedCurve = [Fp2; 2];
+
+// pub fn curve_generator() -> Curve {
+//     [U256::one(), U256::from(2)]
+// }
+
+// pub fn twisted_curve_generator() -> TwistedCurve {
+//     [
+//         [
+//             U256::from_str("0x1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed")
+//                 .unwrap(),
+//             U256::from_str("0x198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2")
+//                 .unwrap(),
+//         ],
+//         [
+//             U256::from_str("0x12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa")
+//                 .unwrap(),
+//             U256::from_str("0x90689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b")
+//                 .unwrap(),
+//         ],
+//     ]
+// }
+
+pub fn power(f: Fp12) -> Fp12 {
+    const EXPS4: [(usize, usize, usize); 64] = [
+        (1, 1, 0),
+        (1, 1, 1),
+        (1, 1, 1),
+        (0, 0, 0),
+        (0, 0, 1),
+        (1, 0, 1),
+        (0, 1, 0),
+        (1, 0, 1),
+        (1, 1, 0),
+        (1, 0, 1),
+        (0, 1, 0),
+        (1, 1, 0),
+        (1, 1, 0),
+        (1, 1, 0),
+        (0, 1, 0),
+        (0, 1, 0),
+        (0, 0, 1),
+        (1, 0, 1),
+        (1, 1, 0),
+        (0, 1, 0),
+        (1, 1, 0),
+        (1, 1, 0),
+        (1, 1, 0),
+        (0, 0, 1),
+        (0, 0, 1),
+        (1, 0, 1),
+        (1, 0, 1),
+        (1, 1, 0),
+        (1, 0, 0),
+        (1, 1, 0),
+        (0, 1, 0),
+        (1, 1, 0),
+        (1, 0, 0),
+        (0, 1, 0),
+        (0, 0, 0),
+        (1, 0, 0),
+        (1, 0, 0),
+        (1, 0, 1),
+        (0, 0, 1),
+        (0, 1, 1),
+        (0, 0, 1),
+        (0, 1, 1),
+        (0, 1, 1),
+        (0, 0, 0),
+        (1, 1, 1),
+        (1, 0, 1),
+        (1, 0, 1),
+        (0, 1, 1),
+        (1, 0, 1),
+        (0, 1, 1),
+        (0, 1, 1),
+        (1, 1, 0),
+        (1, 1, 0),
+        (1, 1, 0),
+        (1, 0, 0),
+        (0, 0, 1),
+        (1, 0, 0),
+        (0, 0, 1),
+        (1, 0, 1),
+        (1, 1, 0),
+        (1, 1, 1),
+        (0, 1, 1),
+        (0, 1, 0),
+        (1, 1, 1),
+    ];
+
+    const EXPS2: [(usize, usize); 62] = [
+        (1, 0),
+        (1, 1),
+        (0, 0),
+        (1, 0),
+        (1, 0),
+        (1, 1),
+        (1, 0),
+        (1, 1),
+        (1, 0),
+        (0, 1),
+        (0, 1),
+        (1, 1),
+        (1, 1),
+        (0, 0),
+        (1, 1),
+        (0, 0),
+        (0, 0),
+        (0, 1),
+        (0, 1),
+        (1, 1),
+        (1, 1),
+        (1, 1),
+        (0, 1),
+        (1, 1),
+        (0, 0),
+        (1, 1),
+        (1, 0),
+        (1, 1),
+        (0, 0),
+        (1, 1),
+        (1, 1),
+        (1, 0),
+        (0, 0),
+        (0, 1),
+        (0, 0),
+        (1, 1),
+        (0, 1),
+        (0, 0),
+        (1, 0),
+        (0, 1),
+        (0, 1),
+        (1, 0),
+        (0, 1),
+        (0, 0),
+        (0, 0),
+        (0, 0),
+        (0, 1),
+        (1, 0),
+        (1, 1),
+        (0, 1),
+        (1, 1),
+        (1, 0),
+        (0, 1),
+        (0, 0),
+        (1, 0),
+        (0, 1),
+        (1, 0),
+        (1, 1),
+        (1, 0),
+        (1, 1),
+        (0, 1),
+        (1, 1),
+    ];
+
+    const EXPS0: [usize; 65] = [
+        0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,
+        0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1,
+        0, 0, 1, 1, 0,
+    ];
+
+    let mut sq: Fp12 = f;
+    let mut y0: Fp12 = embed_fp12(U256::one());
+    let mut y2: Fp12 = embed_fp12(U256::one());
+    let mut y4: Fp12 = embed_fp12(U256::one());
+
+    for (a, b, c) in EXPS4 {
+        if a != 0 {
+            y4 = mul_fp12(y4, sq);
+        }
+        if b != 0 {
+            y2 = mul_fp12(y2, sq);
+        }
+        if c != 0 {
+            y0 = mul_fp12(y0, sq);
+        }
+        sq = mul_fp12(sq, sq);
+    }
+    y4 = mul_fp12(y4, sq);
+
+    for (a, b) in EXPS2 {
+        if a != 0 {
+            y2 = mul_fp12(y2, sq);
+        }
+        if b != 0 {
+            y0 = mul_fp12(y0, sq);
+        }
+        sq = mul_fp12(sq, sq);
+    }
+    y2 = mul_fp12(y2, sq);
+
+    for a in EXPS0 {
+        if a != 0 {
+            y0 = mul_fp12(y0, sq);
+        }
+        sq = mul_fp12(sq, sq);
+    }
+    y0 = mul_fp12(y0, sq);
+
+    y0 = inv_fp12(y0);
+
+    y4 = mul_fp12(y4, y2);
+    y4 = mul_fp12(y4, y2);
+    y4 = mul_fp12(y4, y0);
+
+    y4 = frob_fp12(1, y4);
+    y2 = frob_fp12(2, y2);
+
+    mul_fp12(mul_fp12(y4, y2), y0)
+}
+
+pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
+    let [px, py] = p;
+    let [qx, qy] = q;
+
+    let cx = neg_fp(mul_fp(U256::from(3), mul_fp(px, px)));
+    let cy = mul_fp(U256::from(2), py);
+
+    sparse_embed(
+        sub_fp(mul_fp(py, py), U256::from(9)),
+        mul_fp2(embed_fp2(cx), qx),
+        mul_fp2(embed_fp2(cy), qy),
+    )
+}
+
+pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
+    let [p1x, p1y] = p1;
+    let [p2x, p2y] = p2;
+    let [qx, qy] = q;
+
+    let cx = sub_fp(p2y, p1y);
+    let cy = sub_fp(p1x, p2x);
+
+    sparse_embed(
+        sub_fp(mul_fp(p1y, p2x), mul_fp(p2y, p1x)),
+        mul_fp2(embed_fp2(cx), qx),
+        mul_fp2(embed_fp2(cy), qy),
+    )
+}
+
+fn tangent_slope(p: Curve) -> Fp {
+    let [px, py] = p;
+    let num = mul_fp(mul_fp(px, px), U256::from(3));
+    let denom = mul_fp(py, U256::from(2));
+    div_fp(num, denom)
+}
+
+fn cord_slope(p: Curve, q: Curve) -> Fp {
+    let [px, py] = p;
+    let [qx, qy] = q;
+    let num = sub_fp(qy, py);
+    let denom = sub_fp(qx, px);
+    div_fp(num, denom)
+}
+
+fn third_point(m: Fp, p: Curve, q: Curve) -> Curve {
+    let [px, py] = p;
+    let [qx, _] = q;
+    let ox = sub_fp(mul_fp(m, m), add_fp(px, qx));
+    let oy = sub_fp(mul_fp(m, sub_fp(px, ox)), py);
+    [ox, oy]
+}
+
+fn curve_add(p: Curve, q: Curve) -> Curve {
+    if p == q {
+        curve_double(p)
+    } else {
+        third_point(cord_slope(p, q), p, q)
+    }
+}
+
+fn curve_double(p: Curve) -> Curve {
+    third_point(tangent_slope(p), p, p)
+}
+
+pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
+    const EXP: [usize; 253] = [
+        1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1,
+        1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
+        1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
+        1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
+        1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,
+        1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
+        0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
+        1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    ];
+
+    let mut o = p;
+    let mut acc = embed_fp12(U256::one());
+    let mut line;
+
+    for i in EXP {
+        acc = mul_fp12(acc, acc);
+        line = tangent(o, q);
+        acc = mul_fp12(line, acc);
+        o = curve_double(o);
+        if i != 0 {
+            line = cord(p, o, q);
+            acc = mul_fp12(line, acc);
+            o = curve_add(p, o);
+        }
+    }
+    acc
+}
+
+pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
+    let mut out = miller_loop(p, q);
+
+    let inv = inv_fp12(out);
+    out = frob_fp12(6, out);
+    out = mul_fp12(out, inv);
+
+    let acc = frob_fp12(2, out);
+    out = mul_fp12(out, acc);
+
+    let pow = power(out);
+    out = frob_fp12(3, out);
+    mul_fp12(out, pow)
+}
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 04b81675..be65142a 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -1,12 +1,9 @@
-use std::str::FromStr;
+// use std::str::FromStr;
 
 use anyhow::Result;
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{
-    curve_generator, fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, miller_loop, mul_fp12,
-    power, tate, twisted_curve_generator, Curve, Fp12, TwistedCurve,
-};
+use crate::bn254_arithmetic::{fp12_to_vec, gen_fp12, gen_fp12_sparse, Fp12};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::run_interpreter;
 
@@ -57,9 +54,9 @@ fn test_mul_fp12() -> Result<()> {
     let out_sparse: Vec<U256> = get_output("test_mul_fp12", sparse);
     let out_square: Vec<U256> = get_output("test_mul_fp12", square);
 
-    let exp_normal: Vec<U256> = fp12_to_vec(mul_fp12(f, g));
-    let exp_sparse: Vec<U256> = fp12_to_vec(mul_fp12(f, h));
-    let exp_square: Vec<U256> = fp12_to_vec(mul_fp12(f, f));
+    let exp_normal: Vec<U256> = fp12_to_vec(f * g);
+    let exp_sparse: Vec<U256> = fp12_to_vec(f * h);
+    let exp_square: Vec<U256> = fp12_to_vec(f * f);
 
     assert_eq!(out_normal, exp_normal);
     assert_eq!(out_sparse, exp_sparse);
@@ -68,118 +65,118 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-#[test]
-fn test_frob_fp12() -> Result<()> {
-    let ptr = U256::from(100);
+// #[test]
+// fn test_frob_fp12() -> Result<()> {
+//     let ptr = U256::from(100);
 
-    let f: Fp12 = gen_fp12();
+//     let f: Fp12 = gen_fp12();
 
-    let mut stack = vec![ptr];
-    stack.extend(fp12_to_vec(f));
-    stack.extend(vec![ptr]);
+//     let mut stack = vec![ptr];
+//     stack.extend(fp12_to_vec(f));
+//     stack.extend(vec![ptr]);
 
-    let out_frob1: Vec<U256> = get_output("test_frob_fp12_1", stack.clone());
-    let out_frob2: Vec<U256> = get_output("test_frob_fp12_2", stack.clone());
-    let out_frob3: Vec<U256> = get_output("test_frob_fp12_3", stack.clone());
-    let out_frob6: Vec<U256> = get_output("test_frob_fp12_6", stack);
+//     let out_frob1: Vec<U256> = get_output("test_frob_fp12_1", stack.clone());
+//     let out_frob2: Vec<U256> = get_output("test_frob_fp12_2", stack.clone());
+//     let out_frob3: Vec<U256> = get_output("test_frob_fp12_3", stack.clone());
+//     let out_frob6: Vec<U256> = get_output("test_frob_fp12_6", stack);
 
-    let exp_frob1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
-    let exp_frob2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
-    let exp_frob3: Vec<U256> = fp12_to_vec(frob_fp12(3, f));
-    let exp_frob6: Vec<U256> = fp12_to_vec(frob_fp12(6, f));
+//     let exp_frob1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
+//     let exp_frob2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
+//     let exp_frob3: Vec<U256> = fp12_to_vec(frob_fp12(3, f));
+//     let exp_frob6: Vec<U256> = fp12_to_vec(frob_fp12(6, f));
 
-    assert_eq!(out_frob1, exp_frob1);
-    assert_eq!(out_frob2, exp_frob2);
-    assert_eq!(out_frob3, exp_frob3);
-    assert_eq!(out_frob6, exp_frob6);
+//     assert_eq!(out_frob1, exp_frob1);
+//     assert_eq!(out_frob2, exp_frob2);
+//     assert_eq!(out_frob3, exp_frob3);
+//     assert_eq!(out_frob6, exp_frob6);
 
-    Ok(())
-}
+//     Ok(())
+// }
 
-#[test]
-fn test_inv_fp12() -> Result<()> {
-    let ptr = U256::from(200);
-    let inv = U256::from(300);
+// #[test]
+// fn test_inv_fp12() -> Result<()> {
+//     let ptr = U256::from(200);
+//     let inv = U256::from(300);
 
-    let f: Fp12 = gen_fp12();
-    let mut stack = vec![ptr];
-    stack.extend(fp12_to_vec(f));
-    stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
+//     let f: Fp12 = gen_fp12();
+//     let mut stack = vec![ptr];
+//     stack.extend(fp12_to_vec(f));
+//     stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
 
-    let output: Vec<U256> = get_output("test_inv_fp12", stack);
+//     let output: Vec<U256> = get_output("test_inv_fp12", stack);
 
-    assert_eq!(output, vec![]);
+//     assert_eq!(output, vec![]);
 
-    Ok(())
-}
+//     Ok(())
+// }
 
-#[test]
-fn test_power() -> Result<()> {
-    let ptr = U256::from(300);
-    let out = U256::from(400);
+// #[test]
+// fn test_power() -> Result<()> {
+//     let ptr = U256::from(300);
+//     let out = U256::from(400);
 
-    let f: Fp12 = gen_fp12();
+//     let f: Fp12 = gen_fp12();
 
-    let mut stack = vec![ptr];
-    stack.extend(fp12_to_vec(f));
-    stack.extend(vec![
-        ptr,
-        out,
-        get_address_from_label("return_fp12_on_stack"),
-        out,
-    ]);
+//     let mut stack = vec![ptr];
+//     stack.extend(fp12_to_vec(f));
+//     stack.extend(vec![
+//         ptr,
+//         out,
+//         get_address_from_label("return_fp12_on_stack"),
+//         out,
+//     ]);
 
-    let output: Vec<U256> = get_output("test_pow", stack);
-    let expected: Vec<U256> = fp12_to_vec(power(f));
+//     let output: Vec<U256> = get_output("test_pow", stack);
+//     let expected: Vec<U256> = fp12_to_vec(power(f));
 
-    assert_eq!(output, expected);
+//     assert_eq!(output, expected);
 
-    Ok(())
-}
+//     Ok(())
+// }
 
-fn make_tate_stack(p: Curve, q: TwistedCurve) -> Vec<U256> {
-    let ptr = U256::from(300);
-    let out = U256::from(400);
+// fn make_tate_stack(p: Curve, q: TwistedCurve) -> Vec<U256> {
+//     let ptr = U256::from(300);
+//     let out = U256::from(400);
 
-    let p_: Vec<U256> = p.into_iter().collect();
-    let q_: Vec<U256> = q.into_iter().flatten().collect();
+//     let p_: Vec<U256> = p.into_iter().collect();
+//     let q_: Vec<U256> = q.into_iter().flatten().collect();
 
-    let mut stack = vec![ptr];
-    stack.extend(p_);
-    stack.extend(q_);
-    stack.extend(vec![
-        ptr,
-        out,
-        get_address_from_label("return_fp12_on_stack"),
-        out,
-    ]);
-    stack
-}
+//     let mut stack = vec![ptr];
+//     stack.extend(p_);
+//     stack.extend(q_);
+//     stack.extend(vec![
+//         ptr,
+//         out,
+//         get_address_from_label("return_fp12_on_stack"),
+//         out,
+//     ]);
+//     stack
+// }
 
-#[test]
-fn test_miller() -> Result<()> {
-    let p: Curve = curve_generator();
-    let q: TwistedCurve = twisted_curve_generator();
+// #[test]
+// fn test_miller() -> Result<()> {
+//     let p: Curve = curve_generator();
+//     let q: TwistedCurve = twisted_curve_generator();
 
-    let stack = make_tate_stack(p, q);
-    let output = get_output("test_miller", stack);
-    let expected = fp12_to_vec(miller_loop(p, q));
+//     let stack = make_tate_stack(p, q);
+//     let output = get_output("test_miller", stack);
+//     let expected = fp12_to_vec(miller_loop(p, q));
 
-    assert_eq!(output, expected);
+//     assert_eq!(output, expected);
 
-    Ok(())
-}
+//     Ok(())
+// }
 
-#[test]
-fn test_tate() -> Result<()> {
-    let p: Curve = curve_generator();
-    let q: TwistedCurve = twisted_curve_generator();
+// #[test]
+// fn test_tate() -> Result<()> {
+//     let p: Curve = curve_generator();
+//     let q: TwistedCurve = twisted_curve_generator();
 
-    let stack = make_tate_stack(p, q);
-    let output = get_output("test_tate", stack);
-    let expected = fp12_to_vec(tate(p, q));
+//     let stack = make_tate_stack(p, q);
+//     let output = get_output("test_tate", stack);
+//     let expected = fp12_to_vec(tate(p, q));
 
-    assert_eq!(output, expected);
+//     assert_eq!(output, expected);
 
-    Ok(())
-}
+//     Ok(())
+// }
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 37363715..8ccb2407 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -3,7 +3,7 @@ use std::str::FromStr;
 use ethereum_types::{BigEndianHash, H256, U256};
 use plonky2::field::types::Field;
 
-use crate::bn254_arithmetic::{fp12_to_array, inv_fp12, vec_to_fp12};
+// use crate::bn254_arithmetic::{fp12_to_array, inv_fp12, vec_to_fp12};
 use crate::generation::prover_input::EvmField::{
     Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar,
 };
@@ -74,7 +74,8 @@ impl<F: Field> GenerationState<F> {
             "component_11" => 11,
             _ => panic!("out of bounds"),
         };
-        field.inverse_fp12(n, xs)
+        // field.inverse_fp12(n, xs)
+        todo!()
     }
 
     /// MPT data.
@@ -195,12 +196,12 @@ impl EvmField {
         modexp(x, q, n)
     }
 
-    fn inverse_fp12(&self, n: usize, xs: Vec<U256>) -> U256 {
-        let offset = 12 - n;
-        let vec: Vec<U256> = xs[offset..].to_vec();
-        let f = fp12_to_array(inv_fp12(vec_to_fp12(vec)));
-        f[n]
-    }
+    // fn inverse_fp12(&self, n: usize, xs: Vec<U256>) -> U256 {
+    //     let offset = 12 - n;
+    //     let vec: Vec<U256> = xs[offset..].to_vec();
+    //     let f = fp12_to_array(inv_fp12(vec_to_fp12(vec)));
+    //     f[n]
+    // }
 }
 
 fn modexp(x: U256, e: U256, n: U256) -> U256 {

From ecde3d13b10c7e5184c669d814ae8362bb37a334 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 18 Jan 2023 11:10:26 +0700
Subject: [PATCH 114/201] frob tests

---
 evm/src/bn254_arithmetic.rs       | 379 +++++++++++++++---------------
 evm/src/cpu/kernel/tests/bn254.rs |  45 ++--
 evm/src/witness/util.rs           |   2 +-
 3 files changed, 210 insertions(+), 216 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index f6c192b7..81abe5fe 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -1,4 +1,5 @@
 use std::ops::{Add, Div, Mul, Neg, Sub};
+use std::str::FromStr;
 
 use ethereum_types::U256;
 use rand::{thread_rng, Rng};
@@ -151,7 +152,7 @@ fn flatten_fp2(a: Fp2) -> [U256; 2] {
     [a.re.val, a.im.val]
 }
 
-fn embed_fp2(x: Fp) -> Fp2 {
+fn embed_fp_fp2(x: Fp) -> Fp2 {
     Fp2 { re: x, im: FP_ZERO }
 }
 
@@ -225,14 +226,6 @@ impl Mul for Fp6 {
     }
 }
 
-fn sh(c: Fp6) -> Fp6 {
-    Fp6 {
-        t0: i9(c.t2),
-        t1: c.t0,
-        t2: c.t1,
-    }
-}
-
 // impl Div for Fp6 {
 //     type Output = Self;
 
@@ -261,6 +254,22 @@ fn sh(c: Fp6) -> Fp6 {
 //     [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
 // }
 
+fn embed_fp2_fp6(x: Fp2) -> Fp6 {
+    Fp6 {
+        t0: x,
+        t1: FP2_ZERO,
+        t2: FP2_ZERO,
+    }
+}
+
+fn sh(c: Fp6) -> Fp6 {
+    Fp6 {
+        t0: i9(c.t2),
+        t1: c.t0,
+        t2: c.t1,
+    }
+}
+
 #[derive(Debug, Copy, Clone)]
 pub struct Fp12 {
     z0: Fp6,
@@ -282,13 +291,8 @@ impl Mul for Fp12 {
 }
 
 fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
-    let g00 = Fp2 {
-        re: g000,
-        im: FP_ZERO,
-    };
-
     let g0 = Fp6 {
-        t0: g00,
+        t0: embed_fp_fp2(g000),
         t1: g01,
         t2: FP2_ZERO,
     };
@@ -346,22 +350,6 @@ pub fn fp12_to_vec(f: Fp12) -> Vec<U256> {
 //         .unwrap()
 // }
 
-// fn embed_fp2(x: Fp) -> Fp2 {
-//     [x, FP_ZERO]
-// }
-
-// fn embed_fp2_fp6(a: Fp2) -> Fp6 {
-//     [a, embed_fp2(FP_ZERO), embed_fp2(FP_ZERO)]
-// }
-
-// fn embed_fp6(x: Fp) -> Fp6 {
-//     embed_fp2_fp6(embed_fp2(x))
-// }
-
-// fn embed_fp12(x: Fp) -> Fp12 {
-//     [embed_fp6(x), embed_fp6(FP_ZERO)]
-// }
-
 fn gen_fp() -> Fp {
     let mut rng = thread_rng();
     let x64 = rng.gen::<u64>();
@@ -395,169 +383,176 @@ pub fn gen_fp12_sparse() -> Fp12 {
     sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
 }
 
-// fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
-//     let [c0, c1, c2] = c;
-//     let _c0 = conj_fp2(c0);
-//     let _c1 = conj_fp2(c1);
-//     let _c2 = conj_fp2(c2);
+fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
+    let n = n % 6;
+    let frob_t1 = frob_t1(n);
+    let frob_t2 = frob_t2(n);
 
-//     let n = n % 6;
-//     let frob_t1 = frob_t1(n);
-//     let frob_t2 = frob_t2(n);
+    if n % 2 != 0 {
+        Fp6 {
+            t0: conj_fp2(c.t0),
+            t1: frob_t1 * conj_fp2(c.t1),
+            t2: frob_t2 * conj_fp2(c.t2),
+        }
+    } else {
+        Fp6 {
+            t0: c.t0,
+            t1: frob_t1 * c.t1,
+            t2: frob_t2 * c.t2,
+        }
+    }
+}
 
-//     if n % 2 != 0 {
-//         [_c0, mul_fp2(frob_t1, _c1), mul_fp2(frob_t2, _c2)]
-//     } else {
-//         [c0, mul_fp2(frob_t1, c1), mul_fp2(frob_t2, c2)]
-//     }
-// }
+pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
+    let scale = embed_fp2_fp6(frob_z(n));
+    Fp12 {
+        z0: frob_fp6(n, f.z0),
+        z1: scale * frob_fp6(n, f.z1),
+    }
+}
 
-// pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
-//     let [f0, f1] = f;
-//     let scale = embed_fp2_fp6(frob_z(n));
+fn frob_t1(n: usize) -> Fp2 {
+    let pair = match n {
+        0 => [U256::one(), U256::zero()],
+        1 => [
+            U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
+                .unwrap(),
+            U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::zero(),
+        ],
+        3 => [
+            U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
+                .unwrap(),
+            U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::zero(),
+        ],
+        5 => [
+            U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
+                .unwrap(),
+            U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    };
+    Fp2 {
+        re: Fp { val: pair[0] },
+        im: Fp { val: pair[1] },
+    }
+}
 
-//     [frob_fp6(n, f0), mul_fp6(scale, frob_fp6(n, f1))]
-// }
+fn frob_t2(n: usize) -> Fp2 {
+    let pair = match n {
+        0 => [U256::one(), U256::zero()],
+        1 => [
+            U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
+                .unwrap(),
+            U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::zero(),
+        ],
+        3 => [
+            U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
+                .unwrap(),
+            U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::zero(),
+        ],
+        5 => [
+            U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
+                .unwrap(),
+            U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    };
+    Fp2 {
+        re: Fp { val: pair[0] },
+        im: Fp { val: pair[1] },
+    }
+}
 
-// fn frob_t1(n: usize) -> Fp2 {
-//     match n {
-//         0 => [
-//             U256::from_str("0x1").unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         1 => [
-//             U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
-//                 .unwrap(),
-//             U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
-//                 .unwrap(),
-//         ],
-//         2 => [
-//             U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-//                 .unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         3 => [
-//             U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
-//                 .unwrap(),
-//             U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
-//                 .unwrap(),
-//         ],
-//         4 => [
-//             U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         5 => [
-//             U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
-//                 .unwrap(),
-//             U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
-//                 .unwrap(),
-//         ],
-//         _ => panic!(),
-//     }
-// }
-
-// fn frob_t2(n: usize) -> Fp2 {
-//     match n {
-//         0 => [
-//             U256::from_str("0x1").unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         1 => [
-//             U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
-//                 .unwrap(),
-//             U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
-//                 .unwrap(),
-//         ],
-//         2 => [
-//             U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         3 => [
-//             U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
-//                 .unwrap(),
-//             U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
-//                 .unwrap(),
-//         ],
-//         4 => [
-//             U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-//                 .unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         5 => [
-//             U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
-//                 .unwrap(),
-//             U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
-//                 .unwrap(),
-//         ],
-//         _ => panic!(),
-//     }
-// }
-
-// fn frob_z(n: usize) -> Fp2 {
-//     match n {
-//         0 => [
-//             U256::from_str("0x1").unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         1 => [
-//             U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
-//                 .unwrap(),
-//             U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
-//                 .unwrap(),
-//         ],
-//         2 => [
-//             U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
-//                 .unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         3 => [
-//             U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
-//                 .unwrap(),
-//             U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
-//                 .unwrap(),
-//         ],
-//         4 => [
-//             U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-//                 .unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         5 => [
-//             U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
-//                 .unwrap(),
-//             U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
-//                 .unwrap(),
-//         ],
-//         6 => [
-//             U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
-//                 .unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         7 => [
-//             U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
-//                 .unwrap(),
-//             U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
-//                 .unwrap(),
-//         ],
-//         8 => [
-//             U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         9 => [
-//             U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
-//                 .unwrap(),
-//             U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
-//                 .unwrap(),
-//         ],
-//         10 => [
-//             U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
-//             U256::from_str("0x0").unwrap(),
-//         ],
-//         11 => [
-//             U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
-//                 .unwrap(),
-//             U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
-//                 .unwrap(),
-//         ],
-//         _ => panic!(),
-//     }
-// }
+fn frob_z(n: usize) -> Fp2 {
+    let pair = match n {
+        0 => [U256::one(), U256::zero()],
+        1 => [
+            U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
+                .unwrap(),
+            U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
+                .unwrap(),
+        ],
+        2 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
+                .unwrap(),
+            U256::zero(),
+        ],
+        3 => [
+            U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
+                .unwrap(),
+            U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
+                .unwrap(),
+        ],
+        4 => [
+            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
+                .unwrap(),
+            U256::zero(),
+        ],
+        5 => [
+            U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
+                .unwrap(),
+            U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
+                .unwrap(),
+        ],
+        6 => [
+            U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
+                .unwrap(),
+            U256::zero(),
+        ],
+        7 => [
+            U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
+                .unwrap(),
+            U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
+                .unwrap(),
+        ],
+        8 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
+            U256::zero(),
+        ],
+        9 => [
+            U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
+                .unwrap(),
+            U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
+                .unwrap(),
+        ],
+        10 => [
+            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
+            U256::zero(),
+        ],
+        11 => [
+            U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
+                .unwrap(),
+            U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
+                .unwrap(),
+        ],
+        _ => panic!(),
+    };
+    Fp2 {
+        re: Fp { val: pair[0] },
+        im: Fp { val: pair[1] },
+    }
+}
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index be65142a..cc391ac6 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -3,7 +3,7 @@
 use anyhow::Result;
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{fp12_to_vec, gen_fp12, gen_fp12_sparse, Fp12};
+use crate::bn254_arithmetic::{fp12_to_vec, gen_fp12, gen_fp12_sparse, frob_fp12, Fp12};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::run_interpreter;
 
@@ -65,33 +65,32 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-// #[test]
-// fn test_frob_fp12() -> Result<()> {
-//     let ptr = U256::from(100);
+#[test]
+fn test_frob_fp12() -> Result<()> {
+    let ptr = U256::from(100);
+    let f: Fp12 = gen_fp12();
 
-//     let f: Fp12 = gen_fp12();
+    let mut stack = vec![ptr];
+    stack.extend(fp12_to_vec(f));
+    stack.extend(vec![ptr]);
 
-//     let mut stack = vec![ptr];
-//     stack.extend(fp12_to_vec(f));
-//     stack.extend(vec![ptr]);
+    let out_frob1: Vec<U256> = get_output("test_frob_fp12_1", stack.clone());
+    let out_frob2: Vec<U256> = get_output("test_frob_fp12_2", stack.clone());
+    let out_frob3: Vec<U256> = get_output("test_frob_fp12_3", stack.clone());
+    let out_frob6: Vec<U256> = get_output("test_frob_fp12_6", stack);
 
-//     let out_frob1: Vec<U256> = get_output("test_frob_fp12_1", stack.clone());
-//     let out_frob2: Vec<U256> = get_output("test_frob_fp12_2", stack.clone());
-//     let out_frob3: Vec<U256> = get_output("test_frob_fp12_3", stack.clone());
-//     let out_frob6: Vec<U256> = get_output("test_frob_fp12_6", stack);
+    let exp_frob1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
+    let exp_frob2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
+    let exp_frob3: Vec<U256> = fp12_to_vec(frob_fp12(3, f));
+    let exp_frob6: Vec<U256> = fp12_to_vec(frob_fp12(6, f));
 
-//     let exp_frob1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
-//     let exp_frob2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
-//     let exp_frob3: Vec<U256> = fp12_to_vec(frob_fp12(3, f));
-//     let exp_frob6: Vec<U256> = fp12_to_vec(frob_fp12(6, f));
+    assert_eq!(out_frob1, exp_frob1);
+    assert_eq!(out_frob2, exp_frob2);
+    assert_eq!(out_frob3, exp_frob3);
+    assert_eq!(out_frob6, exp_frob6);
 
-//     assert_eq!(out_frob1, exp_frob1);
-//     assert_eq!(out_frob2, exp_frob2);
-//     assert_eq!(out_frob3, exp_frob3);
-//     assert_eq!(out_frob6, exp_frob6);
-
-//     Ok(())
-// }
+    Ok(())
+}
 
 // #[test]
 // fn test_inv_fp12() -> Result<()> {
diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs
index 78d74251..9aa0cb03 100644
--- a/evm/src/witness/util.rs
+++ b/evm/src/witness/util.rs
@@ -45,7 +45,7 @@ pub(crate) fn stack_peeks<F: Field>(state: &GenerationState<F>) -> Option<Vec<U2
     let mut stack: Vec<U256> = vec![];
     for i in 0..n {
         stack.extend(vec![state.memory.get(MemoryAddress::new(
-            state.registers.effective_context(),
+            state.registers.code_context(),
             Segment::Stack,
             n - 1 - i,
         ))])

From 37ad340774002b5a0c0603113086a64b9cae8358 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 18 Jan 2023 12:56:30 +0700
Subject: [PATCH 115/201] frob format

---
 evm/src/bn254_arithmetic.rs       | 254 +++++++++++++++++++++---------
 evm/src/cpu/kernel/tests/bn254.rs |   2 +-
 2 files changed, 181 insertions(+), 75 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 81abe5fe..c6db5d94 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -385,8 +385,8 @@ pub fn gen_fp12_sparse() -> Fp12 {
 
 fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
     let n = n % 6;
-    let frob_t1 = frob_t1(n);
-    let frob_t2 = frob_t2(n);
+    let frob_t1 = FROB_T1[n];
+    let frob_t2 = FROB_T2[n];
 
     if n % 2 != 0 {
         Fp6 {
@@ -411,81 +411,187 @@ pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
     }
 }
 
-fn frob_t1(n: usize) -> Fp2 {
-    let pair = match n {
-        0 => [U256::one(), U256::zero()],
-        1 => [
-            U256::from_str("0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d")
-                .unwrap(),
-            U256::from_str("0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::zero(),
-        ],
-        3 => [
-            U256::from_str("0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d")
-                .unwrap(),
-            U256::from_str("0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::zero(),
-        ],
-        5 => [
-            U256::from_str("0x28be74d4bb943f51699582b87809d9caf71614d4b0b71f3a62e913ee1dada9e4")
-                .unwrap(),
-            U256::from_str("0x14a88ae0cb747b99c2b86abcbe01477a54f40eb4c3f6068dedae0bcec9c7aac7")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    };
+const FROB_T1: [Fp2; 6] = [
     Fp2 {
-        re: Fp { val: pair[0] },
-        im: Fp { val: pair[1] },
-    }
-}
+        re: Fp { val: U256::one() },
+        im: Fp { val: U256::zero() },
+    },
+    Fp2 {
+        re: Fp {
+            val: U256([
+                0x99e39557176f553d,
+                0xb78cc310c2c3330c,
+                0x4c0bec3cf559b143,
+                0x2fb347984f7911f7,
+            ]),
+        },
+        im: Fp {
+            val: U256([
+                0x1665d51c640fcba2,
+                0x32ae2a1d0b7c9dce,
+                0x4ba4cc8bd75a0794,
+                0x16c9e55061ebae20,
+            ]),
+        },
+    },
+    Fp2 {
+        re: Fp {
+            val: U256([
+                0xe4bd44e5607cfd48,
+                0xc28f069fbb966e3d,
+                0x5e6dd9e7e0acccb0,
+                0x30644e72e131a029,
+            ]),
+        },
+        im: Fp { val: U256::zero() },
+    },
+    Fp2 {
+        re: Fp {
+            val: U256([
+                0x7b746ee87bdcfb6d,
+                0x805ffd3d5d6942d3,
+                0xbaff1c77959f25ac,
+                0x856e078b755ef0a,
+            ]),
+        },
+        im: Fp {
+            val: U256([
+                0x380cab2baaa586de,
+                0x0fdf31bf98ff2631,
+                0xa9f30e6dec26094f,
+                0x4f1de41b3d1766f,
+            ]),
+        },
+    },
+    Fp2 {
+        re: Fp {
+            val: U256([
+                0x5763473177fffffe,
+                0xd4f263f1acdb5c4f,
+                0x59e26bcea0d48bac,
+                0x0,
+            ]),
+        },
+        im: Fp { val: U256::zero() },
+    },
+    Fp2 {
+        re: Fp {
+            val: U256([
+                0x62e913ee1dada9e4,
+                0xf71614d4b0b71f3a,
+                0x699582b87809d9ca,
+                0x28be74d4bb943f51,
+            ]),
+        },
+        im: Fp {
+            val: U256([
+                0xedae0bcec9c7aac7,
+                0x54f40eb4c3f6068d,
+                0xc2b86abcbe01477a,
+                0x14a88ae0cb747b99,
+            ]),
+        },
+    },
+];
 
-fn frob_t2(n: usize) -> Fp2 {
-    let pair = match n {
-        0 => [U256::one(), U256::zero()],
-        1 => [
-            U256::from_str("0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762")
-                .unwrap(),
-            U256::from_str("0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::zero(),
-        ],
-        3 => [
-            U256::from_str("0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066")
-                .unwrap(),
-            U256::from_str("0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::zero(),
-        ],
-        5 => [
-            U256::from_str("0x1ee972ae6a826a7d1d9da40771b6f589de1afb54342c724fa97bda050992657f")
-                .unwrap(),
-            U256::from_str("0x10de546ff8d4ab51d2b513cdbb25772454326430418536d15721e37e70c255c9")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    };
+const FROB_T2: [Fp2; 6] = [
     Fp2 {
-        re: Fp { val: pair[0] },
-        im: Fp { val: pair[1] },
-    }
-}
+        re: Fp { val: U256::one() },
+        im: Fp { val: U256::zero() },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x848a1f55921ea762,
+                    0xd33365f7be94ec72,
+                    0x80f3c0b75a181e84,
+                    0x5b54f5e64eea801,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0xc13b4711cd2b8126,
+                    0x3685d2ea1bdec763,
+                    0x9f3a80b03b0b1c92,
+                    0x2c145edbe7fd8aee,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x5763473177fffffe,
+                    0xd4f263f1acdb5c4f,
+                    0x59e26bcea0d48bac,
+                    0x0,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x0e1a92bc3ccbf066,
+                    0xe633094575b06bcb,
+                    0x19bee0f7b5b2444e,
+                    0xbc58c6611c08dab,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0x5fe3ed9d730c239f,
+                    0xa44a9e08737f96e5,
+                    0xfeb0f6ef0cd21d04,
+                    0x23d5e999e1910a12,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xe4bd44e5607cfd48,
+                    0xc28f069fbb966e3d,
+                    0x5e6dd9e7e0acccb0,
+                    0x30644e72e131a029,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xa97bda050992657f,
+                    0xde1afb54342c724f,
+                    0x1d9da40771b6f589,
+                    0x1ee972ae6a826a7d,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0x5721e37e70c255c9,
+                    0x54326430418536d1,
+                    0xd2b513cdbb257724,
+                    0x10de546ff8d4ab51,
+                ]),
+            }
+        },
+    },
+];
 
 fn frob_z(n: usize) -> Fp2 {
     let pair = match n {
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index cc391ac6..f1e9f354 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -3,7 +3,7 @@
 use anyhow::Result;
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{fp12_to_vec, gen_fp12, gen_fp12_sparse, frob_fp12, Fp12};
+use crate::bn254_arithmetic::{fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, Fp12};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::run_interpreter;
 

From fe91e119205af15384502eae44bb5bf479b020b4 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 18 Jan 2023 13:11:23 +0700
Subject: [PATCH 116/201] frob format

---
 evm/src/bn254_arithmetic.rs | 300 +++++++++++++++++++++++++-----------
 1 file changed, 214 insertions(+), 86 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index c6db5d94..40aa0ff8 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -1,5 +1,4 @@
 use std::ops::{Add, Div, Mul, Neg, Sub};
-use std::str::FromStr;
 
 use ethereum_types::U256;
 use rand::{thread_rng, Rng};
@@ -148,14 +147,6 @@ const FP2_ZERO: Fp2 = Fp2 {
     im: FP_ZERO,
 };
 
-fn flatten_fp2(a: Fp2) -> [U256; 2] {
-    [a.re.val, a.im.val]
-}
-
-fn embed_fp_fp2(x: Fp) -> Fp2 {
-    Fp2 { re: x, im: FP_ZERO }
-}
-
 fn conj_fp2(a: Fp2) -> Fp2 {
     Fp2 {
         re: a.re,
@@ -254,11 +245,11 @@ impl Mul for Fp6 {
 //     [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
 // }
 
-fn embed_fp2_fp6(x: Fp2) -> Fp6 {
+fn mul_fp2_fp6(x: Fp2, f: Fp6) -> Fp6 {
     Fp6 {
-        t0: x,
-        t1: FP2_ZERO,
-        t2: FP2_ZERO,
+        t0: x * f.t0,
+        t1: x * f.t1,
+        t2: x * f.t2,
     }
 }
 
@@ -292,7 +283,10 @@ impl Mul for Fp12 {
 
 fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
     let g0 = Fp6 {
-        t0: embed_fp_fp2(g000),
+        t0: Fp2 {
+            re: g000,
+            im: FP_ZERO,
+        },
         t1: g01,
         t2: FP2_ZERO,
     };
@@ -404,10 +398,10 @@ fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
 }
 
 pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
-    let scale = embed_fp2_fp6(frob_z(n));
+    let n = n % 12;
     Fp12 {
         z0: frob_fp6(n, f.z0),
-        z1: scale * frob_fp6(n, f.z1),
+        z1: mul_fp2_fp6(FROB_Z[n], frob_fp6(n, f.z1)),
     }
 }
 
@@ -451,7 +445,7 @@ const FROB_T1: [Fp2; 6] = [
                 0x7b746ee87bdcfb6d,
                 0x805ffd3d5d6942d3,
                 0xbaff1c77959f25ac,
-                0x856e078b755ef0a,
+                0x0856e078b755ef0a,
             ]),
         },
         im: Fp {
@@ -459,7 +453,7 @@ const FROB_T1: [Fp2; 6] = [
                 0x380cab2baaa586de,
                 0x0fdf31bf98ff2631,
                 0xa9f30e6dec26094f,
-                0x4f1de41b3d1766f,
+                0x04f1de41b3d1766f,
             ]),
         },
     },
@@ -593,72 +587,206 @@ const FROB_T2: [Fp2; 6] = [
     },
 ];
 
-fn frob_z(n: usize) -> Fp2 {
-    let pair = match n {
-        0 => [U256::one(), U256::zero()],
-        1 => [
-            U256::from_str("0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470")
-                .unwrap(),
-            U256::from_str("0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac")
-                .unwrap(),
-        ],
-        2 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49")
-                .unwrap(),
-            U256::zero(),
-        ],
-        3 => [
-            U256::from_str("0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f")
-                .unwrap(),
-            U256::from_str("0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101")
-                .unwrap(),
-        ],
-        4 => [
-            U256::from_str("0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48")
-                .unwrap(),
-            U256::zero(),
-        ],
-        5 => [
-            U256::from_str("0x757cab3a41d3cdc072fc0af59c61f302cfa95859526b0d41264475e420ac20f")
-                .unwrap(),
-            U256::from_str("0xca6b035381e35b618e9b79ba4e2606ca20b7dfd71573c93e85845e34c4a5b9c")
-                .unwrap(),
-        ],
-        6 => [
-            U256::from_str("0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46")
-                .unwrap(),
-            U256::zero(),
-        ],
-        7 => [
-            U256::from_str("0x1ddf9756b8cbf849cf96a5d90a9accfd3b2f4c893f42a9166615563bfbb318d7")
-                .unwrap(),
-            U256::from_str("0xbfab77f2c36b843121dc8b86f6c4ccf2307d819d98302a771c39bb757899a9b")
-                .unwrap(),
-        ],
-        8 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe").unwrap(),
-            U256::zero(),
-        ],
-        9 => [
-            U256::from_str("0x1687cca314aebb6dc866e529b0d4adcd0e34b703aa1bf84253b10eddb9a856c8")
-                .unwrap(),
-            U256::from_str("0x2fb855bcd54a22b6b18456d34c0b44c0187dc4add09d90a0c58be1eae3bc3c46")
-                .unwrap(),
-        ],
-        10 => [
-            U256::from_str("0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177ffffff").unwrap(),
-            U256::zero(),
-        ],
-        11 => [
-            U256::from_str("0x290c83bf3d14634db120850727bb392d6a86d50bd34b19b929bc44b896723b38")
-                .unwrap(),
-            U256::from_str("0x23bd9e3da9136a739f668e1adc9ef7f0f575ec93f71a8df953c846338c32a1ab")
-                .unwrap(),
-        ],
-        _ => panic!(),
-    };
+const FROB_Z: [Fp2; 12] = [
     Fp2 {
-        re: Fp { val: pair[0] },
-        im: Fp { val: pair[1] },
-    }
-}
+        re: { Fp { val: U256::one() } },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xd60b35dadcc9e470,
+                    0x5c521e08292f2176,
+                    0xe8b99fdd76e68b60,
+                    0x1284b71c2865a7df,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0xca5cf05f80f362ac,
+                    0x747992778eeec7e5,
+                    0xa6327cfe12150b8e,
+                    0x246996f3b4fae7e6,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xe4bd44e5607cfd49,
+                    0xc28f069fbb966e3d,
+                    0x5e6dd9e7e0acccb0,
+                    0x30644e72e131a029,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xe86f7d391ed4a67f,
+                    0x894cb38dbe55d24a,
+                    0xefe9608cd0acaa90,
+                    0x19dc81cfcc82e4bb,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0x7694aa2bf4c0c101,
+                    0x7f03a5e397d439ec,
+                    0x06cbeee33576139d,
+                    0xabf8b60be77d73,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0xe4bd44e5607cfd48,
+                    0xc28f069fbb966e3d,
+                    0x5e6dd9e7e0acccb0,
+                    0x30644e72e131a029,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x1264475e420ac20f,
+                    0x2cfa95859526b0d4,
+                    0x072fc0af59c61f30,
+                    0x757cab3a41d3cdc,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0xe85845e34c4a5b9c,
+                    0xa20b7dfd71573c93,
+                    0x18e9b79ba4e2606c,
+                    0xca6b035381e35b6,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x3c208c16d87cfd46,
+                    0x97816a916871ca8d,
+                    0xb85045b68181585d,
+                    0x30644e72e131a029,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x6615563bfbb318d7,
+                    0x3b2f4c893f42a916,
+                    0xcf96a5d90a9accfd,
+                    0x1ddf9756b8cbf849,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0x71c39bb757899a9b,
+                    0x2307d819d98302a7,
+                    0x121dc8b86f6c4ccf,
+                    0xbfab77f2c36b843,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x5763473177fffffe,
+                    0xd4f263f1acdb5c4f,
+                    0x59e26bcea0d48bac,
+                    0x0,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x53b10eddb9a856c8,
+                    0x0e34b703aa1bf842,
+                    0xc866e529b0d4adcd,
+                    0x1687cca314aebb6d,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0xc58be1eae3bc3c46,
+                    0x187dc4add09d90a0,
+                    0xb18456d34c0b44c0,
+                    0x2fb855bcd54a22b6,
+                ]),
+            }
+        },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x5763473177ffffff,
+                    0xd4f263f1acdb5c4f,
+                    0x59e26bcea0d48bac,
+                    0x0,
+                ]),
+            }
+        },
+        im: { Fp { val: U256::zero() } },
+    },
+    Fp2 {
+        re: {
+            Fp {
+                val: U256([
+                    0x29bc44b896723b38,
+                    0x6a86d50bd34b19b9,
+                    0xb120850727bb392d,
+                    0x290c83bf3d14634d,
+                ]),
+            }
+        },
+        im: {
+            Fp {
+                val: U256([
+                    0x53c846338c32a1ab,
+                    0xf575ec93f71a8df9,
+                    0x9f668e1adc9ef7f0,
+                    0x23bd9e3da9136a73,
+                ]),
+            }
+        },
+    },
+];

From 9977ae03bdd14896de0cf81802d9163c919454e3 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 18 Jan 2023 14:41:09 +0700
Subject: [PATCH 117/201] new inverse

---
 evm/src/bn254_arithmetic.rs        | 149 +++++++++++++++++++----------
 evm/src/bn254_curve_pairing.rs     |  70 ++++++++++----
 evm/src/cpu/kernel/tests/bn254.rs  |  26 ++---
 evm/src/generation/prover_input.rs |  17 ++--
 evm/src/lib.rs                     |   1 +
 5 files changed, 172 insertions(+), 91 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 40aa0ff8..a174278e 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -1,6 +1,7 @@
 use std::ops::{Add, Div, Mul, Neg, Sub};
 
 use ethereum_types::U256;
+use itertools::Itertools;
 use rand::{thread_rng, Rng};
 
 pub const BN_BASE: U256 = U256([
@@ -10,7 +11,7 @@ pub const BN_BASE: U256 = U256([
     0x30644e72e131a029,
 ]);
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp {
     val: U256,
 }
@@ -59,8 +60,8 @@ impl Div for Fp {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
-        let inv = exp_fp(self, BN_BASE - 2);
-        rhs * inv
+        let inv = exp_fp(rhs, BN_BASE - 2);
+        self * inv
     }
 }
 
@@ -79,7 +80,7 @@ fn exp_fp(x: Fp, e: U256) -> Fp {
     product
 }
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp2 {
     re: Fp,
     im: Fp,
@@ -133,12 +134,12 @@ impl Div for Fp2 {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
-        let norm = self.re * self.re + self.im * self.im;
+        let norm = rhs.re * rhs.re + rhs.im * rhs.im;
         let inv = Fp2 {
-            re: self.re / norm,
-            im: -self.im / norm,
+            re: rhs.re / norm,
+            im: -rhs.im / norm,
         };
-        rhs * inv
+        self * inv
     }
 }
 
@@ -154,6 +155,14 @@ fn conj_fp2(a: Fp2) -> Fp2 {
     }
 }
 
+fn normalize_fp2(a: Fp2) -> Fp2 {
+    let norm = a.re * a.re + a.im * a.im;
+    Fp2 {
+        re: a.re / norm,
+        im: a.im / norm,
+    }
+}
+
 fn i9(a: Fp2) -> Fp2 {
     let nine = Fp { val: U256::from(9) };
     Fp2 {
@@ -162,7 +171,7 @@ fn i9(a: Fp2) -> Fp2 {
     }
 }
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp6 {
     t0: Fp2,
     t1: Fp2,
@@ -217,23 +226,17 @@ impl Mul for Fp6 {
     }
 }
 
-// impl Div for Fp6 {
-//     type Output = Self;
+impl Div for Fp6 {
+    type Output = Self;
 
-//     fn div(self, rhs: Self) -> Self::Output {
-//         let b = frob_fp6(1, self) * frob_fp6(3, self);
-//         let e = (b * frob_fp6(5, self)).t0;
-//         let n = (e * conj_fp2(e)).re;
-//         let d = e / embed_fp2(n);
-//         let f = frob_fp6(1, b);
-//         let inv = Fp6 {
-//             t0: d * f.t0,
-//             t1: d * f.t1,
-//             t2: d * f.t2,
-//         };
-//         rhs * inv
-//     }
-// }
+    fn div(self, rhs: Self) -> Self::Output {
+        let b = frob_fp6(1, rhs) * frob_fp6(3, rhs);
+        let e = normalize_fp2((b * frob_fp6(5, rhs)).t0);
+        let f = frob_fp6(1, b);
+        let inv = mul_fp2_fp6(e, f);
+        self * inv
+    }
+}
 
 // pub fn inv_fp6(c: Fp6) -> Fp6 {
 //     let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
@@ -245,6 +248,12 @@ impl Mul for Fp6 {
 //     [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
 // }
 
+pub const FP6_ZERO: Fp6 = Fp6 {
+    t0: FP2_ZERO,
+    t1: FP2_ZERO,
+    t2: FP2_ZERO,
+};
+
 fn mul_fp2_fp6(x: Fp2, f: Fp6) -> Fp6 {
     Fp6 {
         t0: x * f.t0,
@@ -261,7 +270,7 @@ fn sh(c: Fp6) -> Fp6 {
     }
 }
 
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp12 {
     z0: Fp6,
     z1: Fp6,
@@ -281,6 +290,49 @@ impl Mul for Fp12 {
     }
 }
 
+impl Div for Fp12 {
+    type Output = Self;
+
+    fn div(self, rhs: Self) -> Self::Output {
+        let a = (frob_fp12(1, rhs) * frob_fp12(7, rhs)).z0;
+        let b = a * frob_fp6(2, a);
+        let c = normalize_fp2((b * frob_fp6(4, a)).t0);
+        let g = frob_fp6(1, b);
+        let e = mul_fp2_fp6(c, g);
+        let inv = Fp12 {
+            z0: e * rhs.z0,
+            z1: -e * rhs.z1,
+        };
+        self * inv
+    }
+}
+
+// pub fn inv_fp12(f: Fp12) -> Fp12 {
+    // let a = (frob_fp12(1, self) * frob_fp12(7, self))[0];
+    // let b = a * frob_fp6(2, a);
+    // let c = (b * frob_fp6(4, a))[0];
+    // let d = c / norm(c);
+    // let [g0, g1, g2] = frob_fp6(1, b);
+    // let e = [d * g0, d * g1, d * g2];
+    // [e * self.z0, - e * self.z1]
+// }
+
+pub const UNIT_FP12: Fp12 = Fp12 {
+    z0: Fp6 {
+        t0: Fp2 {
+            re: Fp {val: U256::one()},
+            im: FP_ZERO
+        },
+        t1: FP2_ZERO,
+        t2: FP2_ZERO,
+    },
+    z1: FP6_ZERO,
+};
+
+pub fn inv_fp12(f: Fp12) -> Fp12 {
+    UNIT_FP12 / f
+}
+
 fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
     let g0 = Fp6 {
         t0: Fp2 {
@@ -300,19 +352,6 @@ fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
     Fp12 { z0: g0, z1: g1 }
 }
 
-// pub fn inv_fp12(f: Fp12) -> Fp12 {
-//     let [f0, f1] = f;
-//     let a = mul_fp12(frob_fp12(1, f), frob_fp12(7, f))[0];
-//     let b = mul_fp6(a, frob_fp6(2, a));
-//     let c = mul_fp6(b, frob_fp6(4, a))[0];
-//     let n = mul_fp2(c, conj_fp2(c))[0];
-//     let i = inv_fp(n);
-//     let d = mul_fp2(embed_fp2(i), c);
-//     let [g0, g1, g2] = frob_fp6(1, b);
-//     let e = [mul_fp2(d, g0), mul_fp2(d, g1), mul_fp2(d, g2)];
-//     [mul_fp6(e, f0), neg_fp6(mul_fp6(e, f1))]
-// }
-
 pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
     [
         f.z0.t0.re.val,
@@ -334,15 +373,29 @@ pub fn fp12_to_vec(f: Fp12) -> Vec<U256> {
     fp12_to_array(f).into_iter().collect()
 }
 
-// pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
-//     xs.into_iter()
-//         .tuples::<(U256, U256)>()
-//         .map(|(v1, v2)| [v1, v2])
-//         .tuples()
-//         .map(|(a1, a2, a3, a4, a5, a6)| [[a1, a2, a3], [a4, a5, a6]])
-//         .next()
-//         .unwrap()
-// }
+pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
+    xs.into_iter()
+        .tuples::<(U256, U256)>()
+        .map(|(v1, v2)| Fp2 {
+            re: Fp { val: v1 },
+            im: Fp { val: v2 },
+        })
+        .tuples()
+        .map(|(a1, a2, a3, a4, a5, a6)| Fp12 {
+            z0: Fp6 {
+                t0: a1,
+                t1: a2,
+                t2: a3,
+            },
+            z1: Fp6 {
+                t0: a4,
+                t1: a5,
+                t2: a6,
+            },
+        })
+        .next()
+        .unwrap()
+}
 
 fn gen_fp() -> Fp {
     let mut rng = thread_rng();
diff --git a/evm/src/bn254_curve_pairing.rs b/evm/src/bn254_curve_pairing.rs
index dba6c5dd..a6a44677 100644
--- a/evm/src/bn254_curve_pairing.rs
+++ b/evm/src/bn254_curve_pairing.rs
@@ -1,26 +1,54 @@
-// pub type Curve = [Fp; 2];
-// pub type TwistedCurve = [Fp2; 2];
+use ethereum_types::U256;
 
-// pub fn curve_generator() -> Curve {
-//     [U256::one(), U256::from(2)]
-// }
+use crate::bn254_arithmetic::{Fp, Fp12};
 
-// pub fn twisted_curve_generator() -> TwistedCurve {
-//     [
-//         [
-//             U256::from_str("0x1800deef121f1e76426a00665e5c4479674322d4f75edadd46debd5cd992f6ed")
-//                 .unwrap(),
-//             U256::from_str("0x198e9393920d483a7260bfb731fb5d25f1aa493335a9e71297e485b7aef312c2")
-//                 .unwrap(),
-//         ],
-//         [
-//             U256::from_str("0x12c85ea5db8c6deb4aab71808dcb408fe3d1e7690c43d37b4ce6cc0166fa7daa")
-//                 .unwrap(),
-//             U256::from_str("0x90689d0585ff075ec9e99ad690c3395bc4b313370b38ef355acdadcd122975b")
-//                 .unwrap(),
-//         ],
-//     ]
-// }
+pub type Curve = [Fp; 2];
+pub type TwistedCurve = [[Fp; 2]; 2];
+
+pub fn curve_generator() -> Curve {
+    [Fp { val: U256::one() }, Fp { val: U256::from(2) }]
+}
+
+pub fn twisted_curve_generator() -> TwistedCurve {
+    [
+        [
+            Fp {
+                val: U256([
+                    0x46debd5cd992f6ed,
+                    0x674322d4f75edadd,
+                    0x426a00665e5c4479,
+                    0x1800deef121f1e76,
+                ]),
+            },
+            Fp {
+                val: U256([
+                    0x97e485b7aef312c2,
+                    0xf1aa493335a9e712,
+                    0x7260bfb731fb5d25,
+                    0x198e9393920d483a,
+                ]),
+            },
+        ],
+        [
+            Fp {
+                val: U256([
+                    0x4ce6cc0166fa7daa,
+                    0xe3d1e7690c43d37b,
+                    0x4aab71808dcb408f,
+                    0x12c85ea5db8c6deb,
+                ]),
+            },
+            Fp {
+                val: U256([
+                    0x55acdadcd122975b,
+                    0xbc4b313370b38ef3,
+                    0xec9e99ad690c3395,
+                    0x90689d0585ff075,
+                ]),
+            },
+        ],
+    ]
+}
 
 pub fn power(f: Fp12) -> Fp12 {
     const EXPS4: [(usize, usize, usize); 64] = [
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index f1e9f354..235ebcec 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -1,4 +1,4 @@
-// use std::str::FromStr;
+use std::str::FromStr;
 
 use anyhow::Result;
 use ethereum_types::U256;
@@ -92,22 +92,22 @@ fn test_frob_fp12() -> Result<()> {
     Ok(())
 }
 
-// #[test]
-// fn test_inv_fp12() -> Result<()> {
-//     let ptr = U256::from(200);
-//     let inv = U256::from(300);
+#[test]
+fn test_inv_fp12() -> Result<()> {
+    let ptr = U256::from(200);
+    let inv = U256::from(300);
 
-//     let f: Fp12 = gen_fp12();
-//     let mut stack = vec![ptr];
-//     stack.extend(fp12_to_vec(f));
-//     stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
+    let f: Fp12 = gen_fp12();
+    let mut stack = vec![ptr];
+    stack.extend(fp12_to_vec(f));
+    stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
 
-//     let output: Vec<U256> = get_output("test_inv_fp12", stack);
+    let output: Vec<U256> = get_output("test_inv_fp12", stack);
 
-//     assert_eq!(output, vec![]);
+    assert_eq!(output, vec![]);
 
-//     Ok(())
-// }
+    Ok(())
+}
 
 // #[test]
 // fn test_power() -> Result<()> {
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 373304e7..9f305e41 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -4,7 +4,7 @@ use anyhow::{bail, Error};
 use ethereum_types::{BigEndianHash, H256, U256};
 use plonky2::field::types::Field;
 
-// use crate::bn254_arithmetic::{fp12_to_array, inv_fp12, vec_to_fp12};
+use crate::bn254_arithmetic::{fp12_to_array, inv_fp12, vec_to_fp12};
 use crate::generation::prover_input::EvmField::{
     Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar,
 };
@@ -75,8 +75,7 @@ impl<F: Field> GenerationState<F> {
             "component_11" => 11,
             _ => panic!("out of bounds"),
         };
-        // field.inverse_fp12(n, xs)
-        todo!()
+        field.inverse_fp12(n, xs)
     }
 
     /// MPT data.
@@ -197,12 +196,12 @@ impl EvmField {
         modexp(x, q, n)
     }
 
-    // fn inverse_fp12(&self, n: usize, xs: Vec<U256>) -> U256 {
-    //     let offset = 12 - n;
-    //     let vec: Vec<U256> = xs[offset..].to_vec();
-    //     let f = fp12_to_array(inv_fp12(vec_to_fp12(vec)));
-    //     f[n]
-    // }
+    fn inverse_fp12(&self, n: usize, xs: Vec<U256>) -> U256 {
+        let offset = 12 - n;
+        let vec: Vec<U256> = xs[offset..].to_vec();
+        let f = fp12_to_array(inv_fp12(vec_to_fp12(vec)));
+        f[n]
+    }
 }
 
 fn modexp(x: U256, e: U256, n: U256) -> U256 {
diff --git a/evm/src/lib.rs b/evm/src/lib.rs
index 226a8b77..689e6c57 100644
--- a/evm/src/lib.rs
+++ b/evm/src/lib.rs
@@ -9,6 +9,7 @@
 pub mod all_stark;
 pub mod arithmetic;
 pub mod bn254_arithmetic;
+// pub mod bn254_curve_pairing;
 pub mod config;
 pub mod constraint_consumer;
 pub mod cpu;

From bc9c431e3ba18a3f5d9cf660ecf54418e6bdfdd3 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 18 Jan 2023 14:42:30 +0700
Subject: [PATCH 118/201] remove comments

---
 evm/src/bn254_arithmetic.rs | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index a174278e..96103b8b 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -238,16 +238,6 @@ impl Div for Fp6 {
     }
 }
 
-// pub fn inv_fp6(c: Fp6) -> Fp6 {
-//     let b = mul_fp6(frob_fp6(1, c), frob_fp6(3, c));
-//     let e = mul_fp6(b, frob_fp6(5, c))[0];
-//     let n = mul_fp2(e, conj_fp2(e))[0];
-//     let i = inv_fp(n);
-//     let d = mul_fp2(embed_fp2(i), e);
-//     let [f0, f1, f2] = frob_fp6(1, b);
-//     [mul_fp2(d, f0), mul_fp2(d, f1), mul_fp2(d, f2)]
-// }
-
 pub const FP6_ZERO: Fp6 = Fp6 {
     t0: FP2_ZERO,
     t1: FP2_ZERO,
@@ -307,16 +297,6 @@ impl Div for Fp12 {
     }
 }
 
-// pub fn inv_fp12(f: Fp12) -> Fp12 {
-    // let a = (frob_fp12(1, self) * frob_fp12(7, self))[0];
-    // let b = a * frob_fp6(2, a);
-    // let c = (b * frob_fp6(4, a))[0];
-    // let d = c / norm(c);
-    // let [g0, g1, g2] = frob_fp6(1, b);
-    // let e = [d * g0, d * g1, d * g2];
-    // [e * self.z0, - e * self.z1]
-// }
-
 pub const UNIT_FP12: Fp12 = Fp12 {
     z0: Fp6 {
         t0: Fp2 {

From 0daaa3bf4e06fc26da33185bf50fd2d9cc8de09b Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 18 Jan 2023 14:48:05 +0700
Subject: [PATCH 119/201] org

---
 evm/src/bn254_arithmetic.rs | 47 ++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 96103b8b..2f172a9d 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -65,7 +65,8 @@ impl Div for Fp {
     }
 }
 
-const FP_ZERO: Fp = Fp { val: U256::zero() };
+const ZERO_FP: Fp = Fp { val: U256::zero() };
+const UNIT_FP: Fp = Fp { val: U256::one() };
 
 fn exp_fp(x: Fp, e: U256) -> Fp {
     let mut current = x;
@@ -143,9 +144,14 @@ impl Div for Fp2 {
     }
 }
 
-const FP2_ZERO: Fp2 = Fp2 {
-    re: FP_ZERO,
-    im: FP_ZERO,
+const ZERO_FP2: Fp2 = Fp2 {
+    re: ZERO_FP,
+    im: ZERO_FP,
+};
+
+const UNIT_FP2: Fp2 = Fp2 {
+    re: UNIT_FP,
+    im: ZERO_FP,
 };
 
 fn conj_fp2(a: Fp2) -> Fp2 {
@@ -238,10 +244,16 @@ impl Div for Fp6 {
     }
 }
 
-pub const FP6_ZERO: Fp6 = Fp6 {
-    t0: FP2_ZERO,
-    t1: FP2_ZERO,
-    t2: FP2_ZERO,
+pub const ZERO_FP6: Fp6 = Fp6 {
+    t0: ZERO_FP2,
+    t1: ZERO_FP2,
+    t2: ZERO_FP2,
+};
+
+pub const UNIT_FP6: Fp6 = Fp6 {
+    t0: UNIT_FP2,
+    t1: ZERO_FP2,
+    t2: ZERO_FP2,
 };
 
 fn mul_fp2_fp6(x: Fp2, f: Fp6) -> Fp6 {
@@ -298,15 +310,8 @@ impl Div for Fp12 {
 }
 
 pub const UNIT_FP12: Fp12 = Fp12 {
-    z0: Fp6 {
-        t0: Fp2 {
-            re: Fp {val: U256::one()},
-            im: FP_ZERO
-        },
-        t1: FP2_ZERO,
-        t2: FP2_ZERO,
-    },
-    z1: FP6_ZERO,
+    z0: UNIT_FP6,
+    z1: ZERO_FP6,
 };
 
 pub fn inv_fp12(f: Fp12) -> Fp12 {
@@ -317,16 +322,16 @@ fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
     let g0 = Fp6 {
         t0: Fp2 {
             re: g000,
-            im: FP_ZERO,
+            im: ZERO_FP,
         },
         t1: g01,
-        t2: FP2_ZERO,
+        t2: ZERO_FP2,
     };
 
     let g1 = Fp6 {
-        t0: FP2_ZERO,
+        t0: ZERO_FP2,
         t1: g11,
-        t2: FP2_ZERO,
+        t2: ZERO_FP2,
     };
 
     Fp12 { z0: g0, z1: g1 }

From 6e215386736f98e64198de1c531a200d5a54a6d0 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 18 Jan 2023 15:11:05 +0700
Subject: [PATCH 120/201] comments

---
 evm/src/bn254_arithmetic.rs | 76 +++++++++++++++++++++++--------------
 1 file changed, 48 insertions(+), 28 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 2f172a9d..5e795b45 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -81,6 +81,8 @@ fn exp_fp(x: Fp, e: U256) -> Fp {
     product
 }
 
+/// The degree 2 field extension Fp2 is given by adjoining i, the square root of -1, to Fp
+/// The arithmetic in this extension is standard complex arithmetic
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp2 {
     re: Fp,
@@ -154,6 +156,7 @@ const UNIT_FP2: Fp2 = Fp2 {
     im: ZERO_FP,
 };
 
+// This function takes the complex conjugate
 fn conj_fp2(a: Fp2) -> Fp2 {
     Fp2 {
         re: a.re,
@@ -161,6 +164,7 @@ fn conj_fp2(a: Fp2) -> Fp2 {
     }
 }
 
+// This function function normalizes the input to the complex unit circle
 fn normalize_fp2(a: Fp2) -> Fp2 {
     let norm = a.re * a.re + a.im * a.im;
     Fp2 {
@@ -169,6 +173,8 @@ fn normalize_fp2(a: Fp2) -> Fp2 {
     }
 }
 
+/// The degree 3 field extension Fp6 over Fp2 is given by adjoining t, where t^3 = 9 + i
+/// We begin by defining a helper function which multiplies an Fp2 element by 9 + i
 fn i9(a: Fp2) -> Fp2 {
     let nine = Fp { val: U256::from(9) };
     Fp2 {
@@ -177,6 +183,7 @@ fn i9(a: Fp2) -> Fp2 {
     }
 }
 
+// Fp6 has basis 1, t, t^2 over Fp2
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp6 {
     t0: Fp2,
@@ -264,6 +271,8 @@ fn mul_fp2_fp6(x: Fp2, f: Fp6) -> Fp6 {
     }
 }
 
+/// This function multiplies an Fp6 element by t, and hence shifts the bases,
+/// where the t^2 coefficient picks up a factor of 9+i as the 1 coefficient of the output
 fn sh(c: Fp6) -> Fp6 {
     Fp6 {
         t0: i9(c.t2),
@@ -272,6 +281,33 @@ fn sh(c: Fp6) -> Fp6 {
     }
 }
 
+/// The nth frobenius endomorphism is given by sending a field element r to r^(p^n)
+/// Hence an Fp6 element a + bt + ct^2 is sent to
+///     a^(p^n) + b^(p^n) * t^(p^n) + c^(p^n) * t^(2p^n)
+/// the constant arrays FROB_T1 and FROB_T2 record the values of t^(p^n) and t^(2p^n), respectively
+/// By the comment in conj_fp2, x^(p^n) = x when n is even and conj_fp2(x) when n is odd
+fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
+    let n = n % 6;
+    let frob_t1 = FROB_T1[n];
+    let frob_t2 = FROB_T2[n];
+
+    if n % 2 != 0 {
+        Fp6 {
+            t0: conj_fp2(c.t0),
+            t1: frob_t1 * conj_fp2(c.t1),
+            t2: frob_t2 * conj_fp2(c.t2),
+        }
+    } else {
+        Fp6 {
+            t0: c.t0,
+            t1: frob_t1 * c.t1,
+            t2: frob_t2 * c.t2,
+        }
+    }
+}
+
+/// The degree 2 field extension Fp12 over Fp6 is given by adjoining z, where z^2 = t.
+/// It thus has basis 1, z over Fp6
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp12 {
     z0: Fp6,
@@ -337,6 +373,18 @@ fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
     Fp12 { z0: g0, z1: g1 }
 }
 
+/// The nth frobenius endomorphism is given by sending a field element r to r^(p^n)
+/// Hence an Fp12 element a + bz is sent to
+///     a^(p^n) + b^(p^n) * z^(p^n)
+/// the constant array FROB_Z records the values of z^p^n
+pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
+    let n = n % 12;
+    Fp12 {
+        z0: frob_fp6(n, f.z0),
+        z1: mul_fp2_fp6(FROB_Z[n], frob_fp6(n, f.z1)),
+    }
+}
+
 pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
     [
         f.z0.t0.re.val,
@@ -415,34 +463,6 @@ pub fn gen_fp12_sparse() -> Fp12 {
     sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
 }
 
-fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
-    let n = n % 6;
-    let frob_t1 = FROB_T1[n];
-    let frob_t2 = FROB_T2[n];
-
-    if n % 2 != 0 {
-        Fp6 {
-            t0: conj_fp2(c.t0),
-            t1: frob_t1 * conj_fp2(c.t1),
-            t2: frob_t2 * conj_fp2(c.t2),
-        }
-    } else {
-        Fp6 {
-            t0: c.t0,
-            t1: frob_t1 * c.t1,
-            t2: frob_t2 * c.t2,
-        }
-    }
-}
-
-pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
-    let n = n % 12;
-    Fp12 {
-        z0: frob_fp6(n, f.z0),
-        z1: mul_fp2_fp6(FROB_Z[n], frob_fp6(n, f.z1)),
-    }
-}
-
 const FROB_T1: [Fp2; 6] = [
     Fp2 {
         re: Fp { val: U256::one() },

From 985e81603912f5be2270da7c18f3b1e6b5b9f2fe Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 18 Jan 2023 20:53:59 +0700
Subject: [PATCH 121/201] transmute + comments

---
 evm/src/bn254_arithmetic.rs | 84 ++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 44 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 5e795b45..1e0dfac3 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -1,4 +1,4 @@
-use std::ops::{Add, Div, Mul, Neg, Sub};
+use std::{ops::{Add, Div, Mul, Neg, Sub}, mem::transmute};
 
 use ethereum_types::U256;
 use itertools::Itertools;
@@ -164,7 +164,7 @@ fn conj_fp2(a: Fp2) -> Fp2 {
     }
 }
 
-// This function function normalizes the input to the complex unit circle
+// This function normalizes the input to the complex unit circle
 fn normalize_fp2(a: Fp2) -> Fp2 {
     let norm = a.re * a.re + a.im * a.im;
     Fp2 {
@@ -281,31 +281,6 @@ fn sh(c: Fp6) -> Fp6 {
     }
 }
 
-/// The nth frobenius endomorphism is given by sending a field element r to r^(p^n)
-/// Hence an Fp6 element a + bt + ct^2 is sent to
-///     a^(p^n) + b^(p^n) * t^(p^n) + c^(p^n) * t^(2p^n)
-/// the constant arrays FROB_T1 and FROB_T2 record the values of t^(p^n) and t^(2p^n), respectively
-/// By the comment in conj_fp2, x^(p^n) = x when n is even and conj_fp2(x) when n is odd
-fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
-    let n = n % 6;
-    let frob_t1 = FROB_T1[n];
-    let frob_t2 = FROB_T2[n];
-
-    if n % 2 != 0 {
-        Fp6 {
-            t0: conj_fp2(c.t0),
-            t1: frob_t1 * conj_fp2(c.t1),
-            t2: frob_t2 * conj_fp2(c.t2),
-        }
-    } else {
-        Fp6 {
-            t0: c.t0,
-            t1: frob_t1 * c.t1,
-            t2: frob_t2 * c.t2,
-        }
-    }
-}
-
 /// The degree 2 field extension Fp12 over Fp6 is given by adjoining z, where z^2 = t.
 /// It thus has basis 1, z over Fp6
 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -373,10 +348,44 @@ fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
     Fp12 { z0: g0, z1: g1 }
 }
 
-/// The nth frobenius endomorphism is given by sending a field element r to r^(p^n)
-/// Hence an Fp12 element a + bz is sent to
+/// The nth frobenius endomorphism of a finite field F of order p^q is given by sending x: F to x^(p^n)
+/// since any element x: F satisfies x^(p^q) = x = x^(p^0), these endomorphisms cycle modulo q
+///
+/// Thus in the case of Fp, there are no nontrivial such endomorphisms since x^p = x.
+///
+/// In the case of Fp2, the first and only nontrivial frobenius map sends a + bi to its complex conjugate:
+///     a^p + b^p(i^p) = a - bi
+/// since p == 3 mod 4, and i^3 = -i
+///
+/// An Fp6 element a + bt + ct^2 is sent to
+///     a^(p^n) + b^(p^n) * t^(p^n) + c^(p^n) * t^(2p^n)
+/// where the values of t^(p^n) and t^(2p^n) are precomputed in the constant arrays FROB_T1 and FROB_T2
+///
+///
+/// An Fp12 element a + bz is sent to
 ///     a^(p^n) + b^(p^n) * z^(p^n)
-/// the constant array FROB_Z records the values of z^p^n
+/// where the values of z^(p^n) are precomputed in the constant array FROB_Z
+
+fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
+    let n = n % 6;
+    let frob_t1 = FROB_T1[n];
+    let frob_t2 = FROB_T2[n];
+
+    if n % 2 != 0 {
+        Fp6 {
+            t0: conj_fp2(c.t0),
+            t1: frob_t1 * conj_fp2(c.t1),
+            t2: frob_t2 * conj_fp2(c.t2),
+        }
+    } else {
+        Fp6 {
+            t0: c.t0,
+            t1: frob_t1 * c.t1,
+            t2: frob_t2 * c.t2,
+        }
+    }
+}
+
 pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
     let n = n % 12;
     Fp12 {
@@ -386,20 +395,7 @@ pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
 }
 
 pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
-    [
-        f.z0.t0.re.val,
-        f.z0.t0.im.val,
-        f.z0.t1.re.val,
-        f.z0.t1.im.val,
-        f.z0.t2.re.val,
-        f.z0.t2.im.val,
-        f.z1.t0.re.val,
-        f.z1.t0.im.val,
-        f.z1.t1.re.val,
-        f.z1.t1.im.val,
-        f.z1.t2.re.val,
-        f.z1.t2.im.val,
-    ]
+    unsafe { transmute(f) }
 }
 
 pub fn fp12_to_vec(f: Fp12) -> Vec<U256> {

From fda4b4c1802a79f08ad0159b25f6ee67d46f246e Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 18 Jan 2023 21:42:16 +0700
Subject: [PATCH 122/201] more comments

---
 evm/src/bn254_arithmetic.rs | 55 ++++++++++++++++++++++++++++---------
 1 file changed, 42 insertions(+), 13 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 1e0dfac3..d756ed59 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -133,6 +133,8 @@ impl Mul for Fp2 {
     }
 }
 
+/// The inverse of a + bi is given by (a - bi)/(a^2 + b^2) since
+/// (a + bi)(a - bi)/(a^2 + b^2) = (a^2 + b^2)/(a^2 + b^2) = 1
 impl Div for Fp2 {
     type Output = Self;
 
@@ -239,14 +241,21 @@ impl Mul for Fp6 {
     }
 }
 
+/// Let x_n = x^(p^n); By Galois Theory, for x: Fp6, the product
+///     phi = x_0 * x_1 * x_2 * x_3 * x_4 * x_5
+/// lands in Fp, and hence the inverse of x (= x_0) is given by
+///     (x_1 * x_2 * x_3 * x_4 * x_5) / phi
+/// Since (x_n)_m = x_{n+m}, we save compute by rearranging the numerator:
+///     (x_1 * x_3) * x_5 * (x_1 * x_3)_1
+
 impl Div for Fp6 {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
-        let b = frob_fp6(1, rhs) * frob_fp6(3, rhs);
-        let e = normalize_fp2((b * frob_fp6(5, rhs)).t0);
-        let f = frob_fp6(1, b);
-        let inv = mul_fp2_fp6(e, f);
+        let prod_13 = frob_fp6(1, rhs) * frob_fp6(3, rhs);
+        let prod_135 = (prod_13 * frob_fp6(5, rhs)).t0;
+        let prod_24 = frob_fp6(1, prod_13);
+        let inv = mul_fp2_fp6(normalize_fp2(prod_135), prod_24);
         self * inv
     }
 }
@@ -303,19 +312,25 @@ impl Mul for Fp12 {
     }
 }
 
+/// By Galois Theory, for x: Fp12, the product
+///     phi = Prod_{i=0}^11 x_i
+/// lands in Fp, and hence the inverse of x (= x_0) is given by
+///     (Prod_{i=1}^11 x_i) / phi
+/// We note that x_6 = (a + bz)_6 = a - bz, which we denote as x'
+/// The remaining factors in the numerator can be efficiently rearranged as:
+///     [(x_1 * x_7) * (x_1 * x_7)_2] * (x_1 * x_7)_4 * [(x_1 * x_7) * (x_1 * x_7)_2]_1
+/// 
+/// Note that in the variable names below, we use a and b to denote 10 and 11
 impl Div for Fp12 {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
-        let a = (frob_fp12(1, rhs) * frob_fp12(7, rhs)).z0;
-        let b = a * frob_fp6(2, a);
-        let c = normalize_fp2((b * frob_fp6(4, a)).t0);
-        let g = frob_fp6(1, b);
-        let e = mul_fp2_fp6(c, g);
-        let inv = Fp12 {
-            z0: e * rhs.z0,
-            z1: -e * rhs.z1,
-        };
+        let prod_17 = (frob_fp12(1, rhs) * frob_fp12(7, rhs)).z0;
+        let prod_1379= prod_17 * frob_fp6(2, prod_17);
+        let prod_13579b = (prod_1379 * frob_fp6(4, prod_17)).t0;
+        let prod_248a = frob_fp6(1, prod_1379);
+        let prod_12345789ab = mul_fp2_fp6(normalize_fp2(prod_13579b), prod_248a);
+        let inv = mul_fp6_fp12(prod_12345789ab, conj_fp12(rhs));
         self * inv
     }
 }
@@ -325,6 +340,20 @@ pub const UNIT_FP12: Fp12 = Fp12 {
     z1: ZERO_FP6,
 };
 
+fn conj_fp12(f: Fp12) -> Fp12 {
+    Fp12 {
+        z0: f.z0,
+        z1: -f.z1,
+    }
+}
+
+fn mul_fp6_fp12(c: Fp6, f: Fp12) -> Fp12 {
+    Fp12 {
+        z0: c * f.z0,
+        z1: c * f.z1,
+    }
+}
+
 pub fn inv_fp12(f: Fp12) -> Fp12 {
     UNIT_FP12 / f
 }

From 23698b74747d8ce4f35612230e4f82b450988696 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 19 Jan 2023 00:08:56 +0700
Subject: [PATCH 123/201] more comments

---
 evm/src/bn254_arithmetic.rs | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index d756ed59..b9edca22 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -254,8 +254,9 @@ impl Div for Fp6 {
     fn div(self, rhs: Self) -> Self::Output {
         let prod_13 = frob_fp6(1, rhs) * frob_fp6(3, rhs);
         let prod_135 = (prod_13 * frob_fp6(5, rhs)).t0;
+        let prod_odds_over_phi = normalize_fp2(prod_135);
         let prod_24 = frob_fp6(1, prod_13);
-        let inv = mul_fp2_fp6(normalize_fp2(prod_135), prod_24);
+        let inv = mul_fp2_fp6(prod_odds_over_phi, prod_24);
         self * inv
     }
 }
@@ -316,9 +317,10 @@ impl Mul for Fp12 {
 ///     phi = Prod_{i=0}^11 x_i
 /// lands in Fp, and hence the inverse of x (= x_0) is given by
 ///     (Prod_{i=1}^11 x_i) / phi
-/// We note that x_6 = (a + bz)_6 = a - bz, which we denote as x'
-/// The remaining factors in the numerator can be efficiently rearranged as:
-///     [(x_1 * x_7) * (x_1 * x_7)_2] * (x_1 * x_7)_4 * [(x_1 * x_7) * (x_1 * x_7)_2]_1
+/// We note that the 6th Frobenius map gives the Fp12 conjugate:
+///     x_6 = (a + bz)_6 = a + b(z^(p^6)) = a - bz
+/// Letting prod_17 = x_1 * x_7, the remaining factors in the numerator can be expresed as:
+///     [(prod_17) * (prod_17)_2] * (prod_17)_4 * [(prod_17) * (prod_17)_2]_1
 /// 
 /// Note that in the variable names below, we use a and b to denote 10 and 11
 impl Div for Fp12 {
@@ -328,9 +330,10 @@ impl Div for Fp12 {
         let prod_17 = (frob_fp12(1, rhs) * frob_fp12(7, rhs)).z0;
         let prod_1379= prod_17 * frob_fp6(2, prod_17);
         let prod_13579b = (prod_1379 * frob_fp6(4, prod_17)).t0;
+        let prod_odds_over_phi = normalize_fp2(prod_13579b);
         let prod_248a = frob_fp6(1, prod_1379);
-        let prod_12345789ab = mul_fp2_fp6(normalize_fp2(prod_13579b), prod_248a);
-        let inv = mul_fp6_fp12(prod_12345789ab, conj_fp12(rhs));
+        let prod_penultimate = mul_fp2_fp6(prod_odds_over_phi, prod_248a);
+        let inv = mul_fp6_fp12(prod_penultimate, conj_fp12(rhs));
         self * inv
     }
 }

From d6167a630dffe133aa48198a3665af5abd144cde Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 19 Jan 2023 00:25:40 +0700
Subject: [PATCH 124/201] complete description

---
 evm/src/bn254_arithmetic.rs | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index b9edca22..cd646598 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -1,4 +1,5 @@
-use std::{ops::{Add, Div, Mul, Neg, Sub}, mem::transmute};
+use std::mem::transmute;
+use std::ops::{Add, Div, Mul, Neg, Sub};
 
 use ethereum_types::U256;
 use itertools::Itertools;
@@ -247,7 +248,10 @@ impl Mul for Fp6 {
 ///     (x_1 * x_2 * x_3 * x_4 * x_5) / phi
 /// Since (x_n)_m = x_{n+m}, we save compute by rearranging the numerator:
 ///     (x_1 * x_3) * x_5 * (x_1 * x_3)_1
-
+/// By Galois theory, both the following are in Fp2 and are complex conjugates
+///     x_1 * x_3 * x_5,  x_0 * x_2 * x_4
+/// Thus phi = norm(x_1 * x_3 * x_5), and hence the inverse is given by
+///     normalize((x_1 * x_3) * x_5) * (x_1 * x_3)_1
 impl Div for Fp6 {
     type Output = Self;
 
@@ -315,20 +319,24 @@ impl Mul for Fp12 {
 
 /// By Galois Theory, for x: Fp12, the product
 ///     phi = Prod_{i=0}^11 x_i
-/// lands in Fp, and hence the inverse of x (= x_0) is given by
+/// lands in Fp, and hence the inverse of x is given by
 ///     (Prod_{i=1}^11 x_i) / phi
 /// We note that the 6th Frobenius map gives the Fp12 conjugate:
 ///     x_6 = (a + bz)_6 = a + b(z^(p^6)) = a - bz
 /// Letting prod_17 = x_1 * x_7, the remaining factors in the numerator can be expresed as:
 ///     [(prod_17) * (prod_17)_2] * (prod_17)_4 * [(prod_17) * (prod_17)_2]_1
-/// 
+/// By Galois theory, both the following are in Fp2 and are complex conjugates
+///     prod_13579b,  prod_02468a
+/// Thus phi = norm(prod_13579b), and hence the inverse is given by
+///    conj_fp12(x) * normalize([(prod_17) * (prod_17)_2] * (prod_17)_4) * [(prod_17) * (prod_17)_2]_1
+///
 /// Note that in the variable names below, we use a and b to denote 10 and 11
 impl Div for Fp12 {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
         let prod_17 = (frob_fp12(1, rhs) * frob_fp12(7, rhs)).z0;
-        let prod_1379= prod_17 * frob_fp6(2, prod_17);
+        let prod_1379 = prod_17 * frob_fp6(2, prod_17);
         let prod_13579b = (prod_1379 * frob_fp6(4, prod_17)).t0;
         let prod_odds_over_phi = normalize_fp2(prod_13579b);
         let prod_248a = frob_fp6(1, prod_1379);

From 54676487e165ed3608fa0300a82bacc7979add69 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 19 Jan 2023 00:56:18 +0700
Subject: [PATCH 125/201] cleaner description

---
 evm/src/bn254_arithmetic.rs | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index cd646598..ccb2f32f 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -321,26 +321,24 @@ impl Mul for Fp12 {
 ///     phi = Prod_{i=0}^11 x_i
 /// lands in Fp, and hence the inverse of x is given by
 ///     (Prod_{i=1}^11 x_i) / phi
-/// We note that the 6th Frobenius map gives the Fp12 conjugate:
-///     x_6 = (a + bz)_6 = a + b(z^(p^6)) = a - bz
+/// The 6th Frob map is nontrivial but leaves Fp6 fixed and hence must be the conjugate:
+///     x_6 = (a + bz)_6 = a - bz
 /// Letting prod_17 = x_1 * x_7, the remaining factors in the numerator can be expresed as:
 ///     [(prod_17) * (prod_17)_2] * (prod_17)_4 * [(prod_17) * (prod_17)_2]_1
 /// By Galois theory, both the following are in Fp2 and are complex conjugates
-///     prod_13579b,  prod_02468a
-/// Thus phi = norm(prod_13579b), and hence the inverse is given by
-///    conj_fp12(x) * normalize([(prod_17) * (prod_17)_2] * (prod_17)_4) * [(prod_17) * (prod_17)_2]_1
-///
-/// Note that in the variable names below, we use a and b to denote 10 and 11
+///     prod_odds,  prod_evens
+/// Thus phi = norm(prod_odds), and hence the inverse is given by
+///    normalize(prod_odds) * prod_evens_except_six * conj_fp12(x)
 impl Div for Fp12 {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
         let prod_17 = (frob_fp12(1, rhs) * frob_fp12(7, rhs)).z0;
         let prod_1379 = prod_17 * frob_fp6(2, prod_17);
-        let prod_13579b = (prod_1379 * frob_fp6(4, prod_17)).t0;
-        let prod_odds_over_phi = normalize_fp2(prod_13579b);
-        let prod_248a = frob_fp6(1, prod_1379);
-        let prod_penultimate = mul_fp2_fp6(prod_odds_over_phi, prod_248a);
+        let prod_odds = (prod_1379 * frob_fp6(4, prod_17)).t0;
+        let prod_odds_over_phi = normalize_fp2(prod_odds);
+        let prod_evens_except_six = frob_fp6(1, prod_1379);
+        let prod_penultimate = mul_fp2_fp6(prod_odds_over_phi, prod_evens_except_six);
         let inv = mul_fp6_fp12(prod_penultimate, conj_fp12(rhs));
         self * inv
     }

From eb7d18da2e8f17ea3d67f47ad3f43812829de824 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 19 Jan 2023 00:59:51 +0700
Subject: [PATCH 126/201] fix clippy

---
 evm/src/bn254_arithmetic.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index ccb2f32f..ccea512c 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -47,6 +47,7 @@ impl Sub for Fp {
     }
 }
 
+#[allow(clippy::suspicious_arithmetic_impl)]
 impl Mul for Fp {
     type Output = Self;
 

From 7f135fc09050a1d52607ee4180cbc18eabe74b0a Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 12:53:02 +0700
Subject: [PATCH 127/201] reorg

---
 evm/src/bn254_arithmetic.rs | 131 ++++++++++++++++++------------------
 1 file changed, 66 insertions(+), 65 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index ccea512c..df777268 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -433,71 +433,6 @@ pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
     }
 }
 
-pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
-    unsafe { transmute(f) }
-}
-
-pub fn fp12_to_vec(f: Fp12) -> Vec<U256> {
-    fp12_to_array(f).into_iter().collect()
-}
-
-pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
-    xs.into_iter()
-        .tuples::<(U256, U256)>()
-        .map(|(v1, v2)| Fp2 {
-            re: Fp { val: v1 },
-            im: Fp { val: v2 },
-        })
-        .tuples()
-        .map(|(a1, a2, a3, a4, a5, a6)| Fp12 {
-            z0: Fp6 {
-                t0: a1,
-                t1: a2,
-                t2: a3,
-            },
-            z1: Fp6 {
-                t0: a4,
-                t1: a5,
-                t2: a6,
-            },
-        })
-        .next()
-        .unwrap()
-}
-
-fn gen_fp() -> Fp {
-    let mut rng = thread_rng();
-    let x64 = rng.gen::<u64>();
-    let x256 = U256([x64, x64, x64, x64]) % BN_BASE;
-    Fp { val: x256 }
-}
-
-fn gen_fp2() -> Fp2 {
-    Fp2 {
-        re: gen_fp(),
-        im: gen_fp(),
-    }
-}
-
-fn gen_fp6() -> Fp6 {
-    Fp6 {
-        t0: gen_fp2(),
-        t1: gen_fp2(),
-        t2: gen_fp2(),
-    }
-}
-
-pub fn gen_fp12() -> Fp12 {
-    Fp12 {
-        z0: gen_fp6(),
-        z1: gen_fp6(),
-    }
-}
-
-pub fn gen_fp12_sparse() -> Fp12 {
-    sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
-}
-
 const FROB_T1: [Fp2; 6] = [
     Fp2 {
         re: Fp { val: U256::one() },
@@ -883,3 +818,69 @@ const FROB_Z: [Fp2; 12] = [
         },
     },
 ];
+
+
+pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
+    unsafe { transmute(f) }
+}
+
+pub fn fp12_to_vec(f: Fp12) -> Vec<U256> {
+    fp12_to_array(f).into_iter().collect()
+}
+
+pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
+    xs.into_iter()
+        .tuples::<(U256, U256)>()
+        .map(|(v1, v2)| Fp2 {
+            re: Fp { val: v1 },
+            im: Fp { val: v2 },
+        })
+        .tuples()
+        .map(|(a1, a2, a3, a4, a5, a6)| Fp12 {
+            z0: Fp6 {
+                t0: a1,
+                t1: a2,
+                t2: a3,
+            },
+            z1: Fp6 {
+                t0: a4,
+                t1: a5,
+                t2: a6,
+            },
+        })
+        .next()
+        .unwrap()
+}
+
+fn gen_fp() -> Fp {
+    let mut rng = thread_rng();
+    let x64 = rng.gen::<u64>();
+    let x256 = U256([x64, x64, x64, x64]) % BN_BASE;
+    Fp { val: x256 }
+}
+
+fn gen_fp2() -> Fp2 {
+    Fp2 {
+        re: gen_fp(),
+        im: gen_fp(),
+    }
+}
+
+fn gen_fp6() -> Fp6 {
+    Fp6 {
+        t0: gen_fp2(),
+        t1: gen_fp2(),
+        t2: gen_fp2(),
+    }
+}
+
+pub fn gen_fp12() -> Fp12 {
+    Fp12 {
+        z0: gen_fp6(),
+        z1: gen_fp6(),
+    }
+}
+
+pub fn gen_fp12_sparse() -> Fp12 {
+    sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
+}

From 5f2baea0df2494f9956fcdf26bde37c2450bef64 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 13:59:39 +0700
Subject: [PATCH 128/201] mul test from memory

---
 evm/src/bn254_arithmetic.rs                   |   1 -
 .../curve/bn254/field_arithmetic/fp12_mul.asm |  11 --
 evm/src/cpu/kernel/tests/bn254.rs             | 157 +++++++++++-------
 3 files changed, 96 insertions(+), 73 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index df777268..545c2efb 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -819,7 +819,6 @@ const FROB_Z: [Fp2; 12] = [
     },
 ];
 
-
 pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
     unsafe { transmute(f) }
 }
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index 11e68887..ee8804c7 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -1,13 +1,3 @@
-/// Note: uncomment this to test
-
-global test_mul_fp12:
-    // stack: inA, f, f', inB, g, g', mul_dest, inA, inB, out, return_fp12_on_stack, out
-    %store_fp12
-    // stack:             inB, g, g', mul_dest, inA, inB, out, return_fp12_on_stack, out
-    %store_fp12
-    // stack:                         mul_dest, inA, inB, out, return_fp12_on_stack, out
-    JUMP
-
 ///////////////////////////////////////
 ///// GENERAL FP12 MULTIPLICATION /////
 ///////////////////////////////////////
@@ -319,7 +309,6 @@ global mul_fp12_sparse:
 
 global square_fp12_test:
     POP
-    %jump(square_fp12)
 
 global square_fp12:
     // stack:                                                                   inp, out
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 235ebcec..95164ab2 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -5,39 +5,74 @@ use ethereum_types::U256;
 
 use crate::bn254_arithmetic::{fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, Fp12};
 use crate::cpu::kernel::aggregator::KERNEL;
-use crate::cpu::kernel::interpreter::run_interpreter;
+use crate::cpu::kernel::interpreter::{run_interpreter, Interpreter};
+use crate::memory::segments::Segment;
+use crate::witness::memory::MemoryAddress;
+
+struct InterpreterInit {
+    offset: String,
+    stack: Vec<U256>,
+    memory: Vec<(usize, Vec<U256>)>,
+}
+
+fn run_test_interpreter(init: InterpreterInit) -> Result<Vec<U256>> {
+    let label = KERNEL.global_labels[&init.offset];
+    let mut stack = init.stack;
+    stack.reverse();
+    let mut interpreter = Interpreter::new_with_kernel(label, stack);
+    
+    for (pointer, data) in init.memory {
+        for (i, term) in data.iter().enumerate() {
+            interpreter.generation_state.memory.set(
+                MemoryAddress::new(0, Segment::KernelGeneral, pointer + i),
+                *term,
+            )
+        }
+    }
+
+    interpreter.run()?;
+    let mut output = interpreter.stack().to_vec();
+    output.reverse();
+    Ok(output)
+}
 
 fn get_address_from_label(lbl: &str) -> U256 {
     U256::from(KERNEL.global_labels[lbl])
 }
 
-fn get_output(lbl: &str, stack: Vec<U256>) -> Vec<U256> {
-    let label = KERNEL.global_labels[lbl];
-    let mut input = stack;
-    input.reverse();
-    let mut output = run_interpreter(label, input).unwrap().stack().to_vec();
-    output.reverse();
-    output
-}
-
-fn make_mul_stack(f: Fp12, g: Fp12, mul_label: &str) -> Vec<U256> {
+fn make_mul_interpreter(f: Fp12, g: Fp12, mul_label: String) -> InterpreterInit {
     let in0 = U256::from(64);
     let in1 = U256::from(76);
     let out = U256::from(88);
 
-    let mut stack = vec![in0];
-    stack.extend(fp12_to_vec(f));
-    stack.extend(vec![in1]);
-    stack.extend(fp12_to_vec(g));
-    stack.extend(vec![
-        get_address_from_label(mul_label),
+    let stack = vec![
         in0,
         in1,
         out,
         get_address_from_label("return_fp12_on_stack"),
         out,
-    ]);
-    stack
+    ];
+
+    let memory = vec![
+        (64usize, fp12_to_vec(f)),
+        (76, fp12_to_vec(g))
+    ];
+
+    InterpreterInit { offset: mul_label, stack: stack, memory: memory }
+
+    // let mut stack = vec![in0];
+    // stack.extend(fp12_to_vec(f));
+    // stack.extend(vec![in1]);
+    // stack.extend(fp12_to_vec(g));
+    // stack.extend(vec![
+    //     get_address_from_label(mul_label),
+    //     in0,
+    //     in1,
+    //     out,
+    //     get_address_from_label("return_fp12_on_stack"),
+    //     out,
+    // ]);
+    // stack
 }
 
 #[test]
@@ -46,13 +81,13 @@ fn test_mul_fp12() -> Result<()> {
     let g: Fp12 = gen_fp12();
     let h: Fp12 = gen_fp12_sparse();
 
-    let normal: Vec<U256> = make_mul_stack(f, g, "mul_fp12");
-    let sparse: Vec<U256> = make_mul_stack(f, h, "mul_fp12_sparse");
-    let square: Vec<U256> = make_mul_stack(f, f, "square_fp12_test");
+    let normal: InterpreterInit = make_mul_interpreter(f, g, "mul_fp12".to_string());
+    let sparse: InterpreterInit = make_mul_interpreter(f, h, "mul_fp12_sparse".to_string());
+    let square: InterpreterInit = make_mul_interpreter(f, f, "square_fp12_test".to_string());
 
-    let out_normal: Vec<U256> = get_output("test_mul_fp12", normal);
-    let out_sparse: Vec<U256> = get_output("test_mul_fp12", sparse);
-    let out_square: Vec<U256> = get_output("test_mul_fp12", square);
+    let out_normal: Vec<U256> = run_test_interpreter(normal).unwrap();
+    let out_sparse: Vec<U256> = run_test_interpreter(sparse).unwrap();
+    let out_square: Vec<U256> = run_test_interpreter(square).unwrap();
 
     let exp_normal: Vec<U256> = fp12_to_vec(f * g);
     let exp_sparse: Vec<U256> = fp12_to_vec(f * h);
@@ -65,49 +100,49 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-#[test]
-fn test_frob_fp12() -> Result<()> {
-    let ptr = U256::from(100);
-    let f: Fp12 = gen_fp12();
+// #[test]
+// fn test_frob_fp12() -> Result<()> {
+//     let ptr = U256::from(100);
+//     let f: Fp12 = gen_fp12();
 
-    let mut stack = vec![ptr];
-    stack.extend(fp12_to_vec(f));
-    stack.extend(vec![ptr]);
+//     let mut stack = vec![ptr];
+//     stack.extend(fp12_to_vec(f));
+//     stack.extend(vec![ptr]);
 
-    let out_frob1: Vec<U256> = get_output("test_frob_fp12_1", stack.clone());
-    let out_frob2: Vec<U256> = get_output("test_frob_fp12_2", stack.clone());
-    let out_frob3: Vec<U256> = get_output("test_frob_fp12_3", stack.clone());
-    let out_frob6: Vec<U256> = get_output("test_frob_fp12_6", stack);
+//     let out_frob1: Vec<U256> = run_test_interpreter("test_frob_fp12_1", stack.clone());
+//     let out_frob2: Vec<U256> = run_test_interpreter("test_frob_fp12_2", stack.clone());
+//     let out_frob3: Vec<U256> = run_test_interpreter("test_frob_fp12_3", stack.clone());
+//     let out_frob6: Vec<U256> = run_test_interpreter("test_frob_fp12_6", stack);
 
-    let exp_frob1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
-    let exp_frob2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
-    let exp_frob3: Vec<U256> = fp12_to_vec(frob_fp12(3, f));
-    let exp_frob6: Vec<U256> = fp12_to_vec(frob_fp12(6, f));
+//     let exp_frob1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
+//     let exp_frob2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
+//     let exp_frob3: Vec<U256> = fp12_to_vec(frob_fp12(3, f));
+//     let exp_frob6: Vec<U256> = fp12_to_vec(frob_fp12(6, f));
 
-    assert_eq!(out_frob1, exp_frob1);
-    assert_eq!(out_frob2, exp_frob2);
-    assert_eq!(out_frob3, exp_frob3);
-    assert_eq!(out_frob6, exp_frob6);
+//     assert_eq!(out_frob1, exp_frob1);
+//     assert_eq!(out_frob2, exp_frob2);
+//     assert_eq!(out_frob3, exp_frob3);
+//     assert_eq!(out_frob6, exp_frob6);
 
-    Ok(())
-}
+//     Ok(())
+// }
 
-#[test]
-fn test_inv_fp12() -> Result<()> {
-    let ptr = U256::from(200);
-    let inv = U256::from(300);
+// #[test]
+// fn test_inv_fp12() -> Result<()> {
+//     let ptr = U256::from(200);
+//     let inv = U256::from(300);
 
-    let f: Fp12 = gen_fp12();
-    let mut stack = vec![ptr];
-    stack.extend(fp12_to_vec(f));
-    stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
+//     let f: Fp12 = gen_fp12();
+//     let mut stack = vec![ptr];
+//     stack.extend(fp12_to_vec(f));
+//     stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
 
-    let output: Vec<U256> = get_output("test_inv_fp12", stack);
+//     let output: Vec<U256> = run_test_interpreter("test_inv_fp12", stack);
 
-    assert_eq!(output, vec![]);
+//     assert_eq!(output, vec![]);
 
-    Ok(())
-}
+//     Ok(())
+// }
 
 // #[test]
 // fn test_power() -> Result<()> {
@@ -125,7 +160,7 @@ fn test_inv_fp12() -> Result<()> {
 //         out,
 //     ]);
 
-//     let output: Vec<U256> = get_output("test_pow", stack);
+//     let output: Vec<U256> = run_test_interpreter("test_pow", stack);
 //     let expected: Vec<U256> = fp12_to_vec(power(f));
 
 //     assert_eq!(output, expected);
@@ -158,7 +193,7 @@ fn test_inv_fp12() -> Result<()> {
 //     let q: TwistedCurve = twisted_curve_generator();
 
 //     let stack = make_tate_stack(p, q);
-//     let output = get_output("test_miller", stack);
+//     let output = run_test_interpreter("test_miller", stack);
 //     let expected = fp12_to_vec(miller_loop(p, q));
 
 //     assert_eq!(output, expected);
@@ -172,7 +207,7 @@ fn test_inv_fp12() -> Result<()> {
 //     let q: TwistedCurve = twisted_curve_generator();
 
 //     let stack = make_tate_stack(p, q);
-//     let output = get_output("test_tate", stack);
+//     let output = run_test_interpreter("test_tate", stack);
 //     let expected = fp12_to_vec(tate(p, q));
 
 //     assert_eq!(output, expected);

From 20fb2cb7d8780290c3e92e386d58397606d73f2e Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 14:30:12 +0700
Subject: [PATCH 129/201] read output from memory

---
 evm/src/cpu/kernel/tests/bn254.rs | 77 ++++++++++++++++---------------
 1 file changed, 41 insertions(+), 36 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 95164ab2..3e464047 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -1,3 +1,4 @@
+use std::ops::Range;
 use std::str::FromStr;
 
 use anyhow::Result;
@@ -5,23 +6,24 @@ use ethereum_types::U256;
 
 use crate::bn254_arithmetic::{fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, Fp12};
 use crate::cpu::kernel::aggregator::KERNEL;
-use crate::cpu::kernel::interpreter::{run_interpreter, Interpreter};
+use crate::cpu::kernel::interpreter::Interpreter;
 use crate::memory::segments::Segment;
 use crate::witness::memory::MemoryAddress;
 
-struct InterpreterInit {
+struct InterpreterSetup {
     offset: String,
     stack: Vec<U256>,
     memory: Vec<(usize, Vec<U256>)>,
+    output: Range<usize>,
 }
 
-fn run_test_interpreter(init: InterpreterInit) -> Result<Vec<U256>> {
-    let label = KERNEL.global_labels[&init.offset];
-    let mut stack = init.stack;
+fn get_interpreter_output(setup: InterpreterSetup) -> Result<Vec<U256>> {
+    let label = KERNEL.global_labels[&setup.offset];
+    let mut stack = setup.stack;
     stack.reverse();
     let mut interpreter = Interpreter::new_with_kernel(label, stack);
-    
-    for (pointer, data) in init.memory {
+
+    for (pointer, data) in setup.memory {
         for (i, term) in data.iter().enumerate() {
             interpreter.generation_state.memory.set(
                 MemoryAddress::new(0, Segment::KernelGeneral, pointer + i),
@@ -31,8 +33,15 @@ fn run_test_interpreter(init: InterpreterInit) -> Result<Vec<U256>> {
     }
 
     interpreter.run()?;
-    let mut output = interpreter.stack().to_vec();
-    output.reverse();
+
+    let kernel = &interpreter.generation_state.memory.contexts[interpreter.context].segments
+        [Segment::KernelGeneral as usize]
+        .content;
+
+    let mut output: Vec<U256> = vec![];
+    for i in setup.output {
+        output.push(kernel[i]);
+    }
     Ok(output)
 }
 
@@ -40,25 +49,21 @@ fn get_address_from_label(lbl: &str) -> U256 {
     U256::from(KERNEL.global_labels[lbl])
 }
 
-fn make_mul_interpreter(f: Fp12, g: Fp12, mul_label: String) -> InterpreterInit {
+fn make_mul_interpreter(f: Fp12, g: Fp12, mul_label: String) -> InterpreterSetup {
     let in0 = U256::from(64);
     let in1 = U256::from(76);
     let out = U256::from(88);
 
-    let stack = vec![
-        in0,
-        in1,
-        out,
-        get_address_from_label("return_fp12_on_stack"),
-        out,
-    ];
+    let stack = vec![in0, in1, out, U256::from_str("0xdeadbeef").unwrap()];
 
-    let memory = vec![
-        (64usize, fp12_to_vec(f)),
-        (76, fp12_to_vec(g))
-    ];
+    let memory = vec![(64usize, fp12_to_vec(f)), (76, fp12_to_vec(g))];
 
-    InterpreterInit { offset: mul_label, stack: stack, memory: memory }
+    InterpreterSetup {
+        offset: mul_label,
+        stack: stack,
+        memory: memory,
+        output: 88..100,
+    }
 
     // let mut stack = vec![in0];
     // stack.extend(fp12_to_vec(f));
@@ -81,13 +86,13 @@ fn test_mul_fp12() -> Result<()> {
     let g: Fp12 = gen_fp12();
     let h: Fp12 = gen_fp12_sparse();
 
-    let normal: InterpreterInit = make_mul_interpreter(f, g, "mul_fp12".to_string());
-    let sparse: InterpreterInit = make_mul_interpreter(f, h, "mul_fp12_sparse".to_string());
-    let square: InterpreterInit = make_mul_interpreter(f, f, "square_fp12_test".to_string());
+    let normal: InterpreterSetup = make_mul_interpreter(f, g, "mul_fp12".to_string());
+    let sparse: InterpreterSetup = make_mul_interpreter(f, h, "mul_fp12_sparse".to_string());
+    let square: InterpreterSetup = make_mul_interpreter(f, f, "square_fp12_test".to_string());
 
-    let out_normal: Vec<U256> = run_test_interpreter(normal).unwrap();
-    let out_sparse: Vec<U256> = run_test_interpreter(sparse).unwrap();
-    let out_square: Vec<U256> = run_test_interpreter(square).unwrap();
+    let out_normal: Vec<U256> = get_interpreter_output(normal).unwrap();
+    let out_sparse: Vec<U256> = get_interpreter_output(sparse).unwrap();
+    let out_square: Vec<U256> = get_interpreter_output(square).unwrap();
 
     let exp_normal: Vec<U256> = fp12_to_vec(f * g);
     let exp_sparse: Vec<U256> = fp12_to_vec(f * h);
@@ -109,10 +114,10 @@ fn test_mul_fp12() -> Result<()> {
 //     stack.extend(fp12_to_vec(f));
 //     stack.extend(vec![ptr]);
 
-//     let out_frob1: Vec<U256> = run_test_interpreter("test_frob_fp12_1", stack.clone());
-//     let out_frob2: Vec<U256> = run_test_interpreter("test_frob_fp12_2", stack.clone());
-//     let out_frob3: Vec<U256> = run_test_interpreter("test_frob_fp12_3", stack.clone());
-//     let out_frob6: Vec<U256> = run_test_interpreter("test_frob_fp12_6", stack);
+//     let out_frob1: Vec<U256> = get_interpreter_output("test_frob_fp12_1", stack.clone());
+//     let out_frob2: Vec<U256> = get_interpreter_output("test_frob_fp12_2", stack.clone());
+//     let out_frob3: Vec<U256> = get_interpreter_output("test_frob_fp12_3", stack.clone());
+//     let out_frob6: Vec<U256> = get_interpreter_output("test_frob_fp12_6", stack);
 
 //     let exp_frob1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
 //     let exp_frob2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
@@ -137,7 +142,7 @@ fn test_mul_fp12() -> Result<()> {
 //     stack.extend(fp12_to_vec(f));
 //     stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
 
-//     let output: Vec<U256> = run_test_interpreter("test_inv_fp12", stack);
+//     let output: Vec<U256> = get_interpreter_output("test_inv_fp12", stack);
 
 //     assert_eq!(output, vec![]);
 
@@ -160,7 +165,7 @@ fn test_mul_fp12() -> Result<()> {
 //         out,
 //     ]);
 
-//     let output: Vec<U256> = run_test_interpreter("test_pow", stack);
+//     let output: Vec<U256> = get_interpreter_output("test_pow", stack);
 //     let expected: Vec<U256> = fp12_to_vec(power(f));
 
 //     assert_eq!(output, expected);
@@ -193,7 +198,7 @@ fn test_mul_fp12() -> Result<()> {
 //     let q: TwistedCurve = twisted_curve_generator();
 
 //     let stack = make_tate_stack(p, q);
-//     let output = run_test_interpreter("test_miller", stack);
+//     let output = get_interpreter_output("test_miller", stack);
 //     let expected = fp12_to_vec(miller_loop(p, q));
 
 //     assert_eq!(output, expected);
@@ -207,7 +212,7 @@ fn test_mul_fp12() -> Result<()> {
 //     let q: TwistedCurve = twisted_curve_generator();
 
 //     let stack = make_tate_stack(p, q);
-//     let output = run_test_interpreter("test_tate", stack);
+//     let output = get_interpreter_output("test_tate", stack);
 //     let expected = fp12_to_vec(tate(p, q));
 
 //     assert_eq!(output, expected);

From abab6bf1e1ba236d264e33f68c2a291924f21659 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 14:52:44 +0700
Subject: [PATCH 130/201] test frob from memory

---
 .../bn254/field_arithmetic/frobenius.asm      | 152 ++++++++----------
 evm/src/cpu/kernel/tests/bn254.rs             | 108 ++++++-------
 2 files changed, 124 insertions(+), 136 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index c343378b..35c867b4 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -1,110 +1,29 @@
 global test_frob_fp12_1:
-    // stack: ptr, f, ptr
-    %store_fp12
     // stack:         ptr
     %frob_fp12_1
     // stack:         ptr
-    %load_fp12
     %jump(0xdeadbeef)
 
 global test_frob_fp12_2:
-    // stack: ptr, f, ptr
-    %store_fp12
     // stack:         ptr 
     DUP1
     // stack:    ptr, ptr
     %frob_fp12_2_
     // stack:         ptr
-    %load_fp12
     %jump(0xdeadbeef)
 
 global test_frob_fp12_3:
-    // stack: ptr, f, ptr
-    %store_fp12
     // stack:         ptr
     %frob_fp12_3
     // stack:         ptr
-    %load_fp12
     %jump(0xdeadbeef)
 
 global test_frob_fp12_6:
-    // stack: ptr, f, ptr
-    %store_fp12
     // stack:         ptr
     %frob_fp12_6
     // stack:         ptr
-    %load_fp12
     %jump(0xdeadbeef)
 
-/// let Z` denote the complex conjugate of Z
-
-/// def frob_fp6_n(C0, C1, C2):
-///     if n%2:
-///         D0, D1, D2 = C0`, FROB_t1[n] * C1`, FROB_t2[n] * C2`
-///     else: 
-///         D0, D1, D2 = C0 , FROB_t1[n] * C1 , FROB_t2[n] * C2
-///     return D0, D1, D2 
-
-%macro frob_fp6_1
-    // stack: C0 , C1 , C2
-    %conj
-    // stack: D0 , C1 , C2
-    %swap_fp2_hole_2
-    // stack: C2 , C1 , D0
-    %conj
-    // stack: C2`, C1 , D0
-    %frobt2_1
-    // stack: D2 , C1 , D0
-    %swap_fp2_hole_2
-    // stack: D0 , C1 , D2
-    %swap_fp2
-    // stack: C1 , D0 , D2
-    %conj
-    // stack: C1`, D0 , D2
-    %frobt1_1
-    // stack: D1 , D0 , D2
-    %swap_fp2
-    // stack: D0 , D1 , D2
-%endmacro
-
-%macro frob_fp6_2
-    // stack: C0, C1, C2
-    %swap_fp2_hole_2
-    // stack: C2, C1, C0
-    %frobt2_2
-    // stack: D2, C1, C0
-    %swap_fp2_hole_2
-    // stack: C0, C1, D2
-    %swap_fp2
-    // stack: C1, C0, D2
-    %frobt1_2
-    // stack: D1, C0, D2
-    %swap_fp2
-    // stack: D0, D1, D2
-%endmacro
-
-%macro frob_fp6_3
-    // stack: C0 , C1 , C2
-    %conj
-    // stack: D0 , C1 , C2
-    %swap_fp2_hole_2
-    // stack: C2 , C1 , D0
-    %conj
-    // stack: C2`, C1 , D0
-    %frobt2_3
-    // stack: D2 , C1 , D0
-    %swap_fp2_hole_2
-    // stack: D0 , C1 , D2
-    %swap_fp2
-    // stack: C1 , D0 , D2
-    %conj
-    // stack: C1`, D0 , D2
-    %frobt1_3
-    // stack: D1 , D0 , D2
-    %swap_fp2
-    // stack: D0 , D1 , D2
-%endmacro
-
 
 /// def frob_fp12_n(f, f'):
 ///     g  =             frob_fp6(n, f )
@@ -198,6 +117,77 @@ global test_frob_fp12_6:
     // stack:           ptr
 %endmacro
 
+
+/// let Z` denote the complex conjugate of Z
+
+/// def frob_fp6_n(C0, C1, C2):
+///     if n%2:
+///         D0, D1, D2 = C0`, FROB_T1[n] * C1`, FROB_T2[n] * C2`
+///     else: 
+///         D0, D1, D2 = C0 , FROB_T1[n] * C1 , FROB_T2[n] * C2
+///     return D0, D1, D2 
+
+%macro frob_fp6_1
+    // stack: C0 , C1 , C2
+    %conj
+    // stack: D0 , C1 , C2
+    %swap_fp2_hole_2
+    // stack: C2 , C1 , D0
+    %conj
+    // stack: C2`, C1 , D0
+    %frobt2_1
+    // stack: D2 , C1 , D0
+    %swap_fp2_hole_2
+    // stack: D0 , C1 , D2
+    %swap_fp2
+    // stack: C1 , D0 , D2
+    %conj
+    // stack: C1`, D0 , D2
+    %frobt1_1
+    // stack: D1 , D0 , D2
+    %swap_fp2
+    // stack: D0 , D1 , D2
+%endmacro
+
+%macro frob_fp6_2
+    // stack: C0, C1, C2
+    %swap_fp2_hole_2
+    // stack: C2, C1, C0
+    %frobt2_2
+    // stack: D2, C1, C0
+    %swap_fp2_hole_2
+    // stack: C0, C1, D2
+    %swap_fp2
+    // stack: C1, C0, D2
+    %frobt1_2
+    // stack: D1, C0, D2
+    %swap_fp2
+    // stack: D0, D1, D2
+%endmacro
+
+%macro frob_fp6_3
+    // stack: C0 , C1 , C2
+    %conj
+    // stack: D0 , C1 , C2
+    %swap_fp2_hole_2
+    // stack: C2 , C1 , D0
+    %conj
+    // stack: C2`, C1 , D0
+    %frobt2_3
+    // stack: D2 , C1 , D0
+    %swap_fp2_hole_2
+    // stack: D0 , C1 , D2
+    %swap_fp2
+    // stack: C1 , D0 , D2
+    %conj
+    // stack: C1`, D0 , D2
+    %frobt1_3
+    // stack: D1 , D0 , D2
+    %swap_fp2
+    // stack: D0 , D1 , D2
+%endmacro
+
+
 %macro frobz_1
     %frob_fp6_1
     PUSH 0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 3e464047..6c68c90d 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -1,5 +1,4 @@
 use std::ops::Range;
-use std::str::FromStr;
 
 use anyhow::Result;
 use ethereum_types::U256;
@@ -45,39 +44,25 @@ fn get_interpreter_output(setup: InterpreterSetup) -> Result<Vec<U256>> {
     Ok(output)
 }
 
-fn get_address_from_label(lbl: &str) -> U256 {
-    U256::from(KERNEL.global_labels[lbl])
-}
+fn setup_mul_test(f: Fp12, g: Fp12, label: &str) -> InterpreterSetup {
+    let in0: usize = 64;
+    let in1: usize = 76;
+    let out: usize = 88;
 
-fn make_mul_interpreter(f: Fp12, g: Fp12, mul_label: String) -> InterpreterSetup {
-    let in0 = U256::from(64);
-    let in1 = U256::from(76);
-    let out = U256::from(88);
-
-    let stack = vec![in0, in1, out, U256::from_str("0xdeadbeef").unwrap()];
-
-    let memory = vec![(64usize, fp12_to_vec(f)), (76, fp12_to_vec(g))];
+    let stack = vec![
+        U256::from(in0),
+        U256::from(in1),
+        U256::from(out),
+        U256::from(0xdeadbeefu32),
+    ];
+    let memory = vec![(in0, fp12_to_vec(f)), (in1, fp12_to_vec(g))];
 
     InterpreterSetup {
-        offset: mul_label,
+        offset: label.to_string(),
         stack: stack,
         memory: memory,
-        output: 88..100,
+        output: out..out+12,
     }
-
-    // let mut stack = vec![in0];
-    // stack.extend(fp12_to_vec(f));
-    // stack.extend(vec![in1]);
-    // stack.extend(fp12_to_vec(g));
-    // stack.extend(vec![
-    //     get_address_from_label(mul_label),
-    //     in0,
-    //     in1,
-    //     out,
-    //     get_address_from_label("return_fp12_on_stack"),
-    //     out,
-    // ]);
-    // stack
 }
 
 #[test]
@@ -86,13 +71,13 @@ fn test_mul_fp12() -> Result<()> {
     let g: Fp12 = gen_fp12();
     let h: Fp12 = gen_fp12_sparse();
 
-    let normal: InterpreterSetup = make_mul_interpreter(f, g, "mul_fp12".to_string());
-    let sparse: InterpreterSetup = make_mul_interpreter(f, h, "mul_fp12_sparse".to_string());
-    let square: InterpreterSetup = make_mul_interpreter(f, f, "square_fp12_test".to_string());
+    let setup_normal: InterpreterSetup = setup_mul_test(f, g, "mul_fp12");
+    let setup_sparse: InterpreterSetup = setup_mul_test(f, h, "mul_fp12_sparse");
+    let setup_square: InterpreterSetup = setup_mul_test(f, f, "square_fp12_test");
 
-    let out_normal: Vec<U256> = get_interpreter_output(normal).unwrap();
-    let out_sparse: Vec<U256> = get_interpreter_output(sparse).unwrap();
-    let out_square: Vec<U256> = get_interpreter_output(square).unwrap();
+    let out_normal: Vec<U256> = get_interpreter_output(setup_normal).unwrap();
+    let out_sparse: Vec<U256> = get_interpreter_output(setup_sparse).unwrap();
+    let out_square: Vec<U256> = get_interpreter_output(setup_square).unwrap();
 
     let exp_normal: Vec<U256> = fp12_to_vec(f * g);
     let exp_sparse: Vec<U256> = fp12_to_vec(f * h);
@@ -105,32 +90,45 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-// #[test]
-// fn test_frob_fp12() -> Result<()> {
-//     let ptr = U256::from(100);
-//     let f: Fp12 = gen_fp12();
+fn setup_frob_test(f: Fp12, label: &str) -> InterpreterSetup {
+    let ptr: usize = 100;
+    let stack = vec![U256::from(ptr)];
+    let memory = vec![(ptr, fp12_to_vec(f))];
 
-//     let mut stack = vec![ptr];
-//     stack.extend(fp12_to_vec(f));
-//     stack.extend(vec![ptr]);
+    InterpreterSetup {
+        offset: label.to_string(),
+        stack: stack,
+        memory: memory,
+        output: ptr..ptr+12,
+    }
+}
 
-//     let out_frob1: Vec<U256> = get_interpreter_output("test_frob_fp12_1", stack.clone());
-//     let out_frob2: Vec<U256> = get_interpreter_output("test_frob_fp12_2", stack.clone());
-//     let out_frob3: Vec<U256> = get_interpreter_output("test_frob_fp12_3", stack.clone());
-//     let out_frob6: Vec<U256> = get_interpreter_output("test_frob_fp12_6", stack);
+#[test]
+fn test_frob_fp12() -> Result<()> {
+    let f: Fp12 = gen_fp12();
 
-//     let exp_frob1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
-//     let exp_frob2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
-//     let exp_frob3: Vec<U256> = fp12_to_vec(frob_fp12(3, f));
-//     let exp_frob6: Vec<U256> = fp12_to_vec(frob_fp12(6, f));
+    let setup_frob_1 = setup_frob_test(f, "test_frob_fp12_1");
+    let setup_frob_2 = setup_frob_test(f, "test_frob_fp12_2");
+    let setup_frob_3 = setup_frob_test(f, "test_frob_fp12_3");
+    let setup_frob_6 = setup_frob_test(f, "test_frob_fp12_6");
 
-//     assert_eq!(out_frob1, exp_frob1);
-//     assert_eq!(out_frob2, exp_frob2);
-//     assert_eq!(out_frob3, exp_frob3);
-//     assert_eq!(out_frob6, exp_frob6);
+    let out_frob_1: Vec<U256> = get_interpreter_output(setup_frob_1).unwrap();
+    let out_frob_2: Vec<U256> = get_interpreter_output(setup_frob_2).unwrap();
+    let out_frob_3: Vec<U256> = get_interpreter_output(setup_frob_3).unwrap();
+    let out_frob_6: Vec<U256> = get_interpreter_output(setup_frob_6).unwrap();
 
-//     Ok(())
-// }
+    let exp_frob_1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
+    let exp_frob_2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
+    let exp_frob_3: Vec<U256> = fp12_to_vec(frob_fp12(3, f));
+    let exp_frob_6: Vec<U256> = fp12_to_vec(frob_fp12(6, f));
+
+    assert_eq!(out_frob_1, exp_frob_1);
+    assert_eq!(out_frob_2, exp_frob_2);
+    assert_eq!(out_frob_3, exp_frob_3);
+    assert_eq!(out_frob_6, exp_frob_6);
+
+    Ok(())
+}
 
 // #[test]
 // fn test_inv_fp12() -> Result<()> {

From c74a0c25000838c7e76bf7190ca70654ec9a7b93 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 14:58:34 +0700
Subject: [PATCH 131/201] test inv from memory

---
 .../curve/bn254/field_arithmetic/inverse.asm  |  6 ----
 evm/src/cpu/kernel/tests/bn254.rs             | 36 ++++++++++++-------
 2 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 2b9be3d4..fed823d3 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -22,12 +22,6 @@
 %endmacro
 
 
-global test_inv_fp12:
-    // stack: ptr, f, ptr, inv, retdest
-    %store_fp12
-    // stack:         ptr, inv, retdest
-    %jump(inv_fp12)
-
 global inv_fp12:
     // stack:                ptr, inv, retdest
     DUP1  %load_fp12
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 6c68c90d..778f2de7 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -3,7 +3,7 @@ use std::ops::Range;
 use anyhow::Result;
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, Fp12};
+use crate::bn254_arithmetic::{fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, inv_fp12, Fp12};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
 use crate::memory::segments::Segment;
@@ -130,22 +130,32 @@ fn test_frob_fp12() -> Result<()> {
     Ok(())
 }
 
-// #[test]
-// fn test_inv_fp12() -> Result<()> {
-//     let ptr = U256::from(200);
-//     let inv = U256::from(300);
+fn setup_inv_test(f: Fp12) -> InterpreterSetup {
+    let ptr: usize = 100;
+    let inv: usize = 112;
+    let stack = vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)];
+    let memory = vec![(ptr, fp12_to_vec(f))];
 
-//     let f: Fp12 = gen_fp12();
-//     let mut stack = vec![ptr];
-//     stack.extend(fp12_to_vec(f));
-//     stack.extend(vec![ptr, inv, U256::from_str("0xdeadbeef").unwrap()]);
+    InterpreterSetup {
+        offset: "inv_fp12".to_string(),
+        stack: stack,
+        memory: memory,
+        output: inv..inv+12,
+    }
+}
 
-//     let output: Vec<U256> = get_interpreter_output("test_inv_fp12", stack);
+#[test]
+fn test_inv_fp12() -> Result<()> {
+    let f: Fp12 = gen_fp12();
 
-//     assert_eq!(output, vec![]);
+    let setup = setup_inv_test(f);
+    let output: Vec<U256> = get_interpreter_output(setup).unwrap();
+    let expected: Vec<U256> = fp12_to_vec(inv_fp12(f));
 
-//     Ok(())
-// }
+    assert_eq!(output, expected);
+
+    Ok(())
+}
 
 // #[test]
 // fn test_power() -> Result<()> {

From 3fcb559123b11d70f5a0c1f5bb8440fe859de576 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 15:01:54 +0700
Subject: [PATCH 132/201] redundant macro

---
 .../cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm    | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
index ea713056..5d081a19 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
@@ -1070,8 +1070,3 @@
     // stack:      ind11', x11, ptr'
     %mstore_kernel_general
 %endmacro
-
-global return_fp12_on_stack:
-    // stack: out
-    %load_fp12
-    %jump(0xdeadbeef)

From 922d3ebca88ee68239fd580f0e7b79480de1f5e0 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 15:43:17 +0700
Subject: [PATCH 133/201] add module and fix errors

---
 evm/src/bn254_arithmetic.rs       |  19 ++++--
 evm/src/bn254_curve_pairing.rs    | 110 +++++++++++++++---------------
 evm/src/cpu/kernel/tests/bn254.rs |  18 ++---
 evm/src/lib.rs                    |   2 +-
 4 files changed, 80 insertions(+), 69 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 545c2efb..1159c620 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -14,7 +14,7 @@ pub const BN_BASE: U256 = U256([
 
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp {
-    val: U256,
+    pub val: U256,
 }
 
 impl Add for Fp {
@@ -83,12 +83,16 @@ fn exp_fp(x: Fp, e: U256) -> Fp {
     product
 }
 
+pub fn make_fp(n: i32) -> Fp {
+    Fp { val: U256::from(n) }
+}
+
 /// The degree 2 field extension Fp2 is given by adjoining i, the square root of -1, to Fp
 /// The arithmetic in this extension is standard complex arithmetic
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp2 {
-    re: Fp,
-    im: Fp,
+    pub re: Fp,
+    pub im: Fp,
 }
 
 impl Add for Fp2 {
@@ -160,6 +164,13 @@ const UNIT_FP2: Fp2 = Fp2 {
     im: ZERO_FP,
 };
 
+pub fn mul_fp_fp2(x: Fp, a: Fp2) -> Fp2 {
+    Fp2 {
+        re: x * a.re,
+        im: x * a.im,
+    }
+}
+
 // This function takes the complex conjugate
 fn conj_fp2(a: Fp2) -> Fp2 {
     Fp2 {
@@ -368,7 +379,7 @@ pub fn inv_fp12(f: Fp12) -> Fp12 {
     UNIT_FP12 / f
 }
 
-fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
+pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
     let g0 = Fp6 {
         t0: Fp2 {
             re: g000,
diff --git a/evm/src/bn254_curve_pairing.rs b/evm/src/bn254_curve_pairing.rs
index a6a44677..58ec2c3c 100644
--- a/evm/src/bn254_curve_pairing.rs
+++ b/evm/src/bn254_curve_pairing.rs
@@ -1,18 +1,18 @@
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{Fp, Fp12};
+use crate::bn254_arithmetic::{Fp, Fp2, Fp12, mul_fp_fp2, inv_fp12, frob_fp12, UNIT_FP12, sparse_embed, make_fp};
 
 pub type Curve = [Fp; 2];
-pub type TwistedCurve = [[Fp; 2]; 2];
+pub type TwistedCurve = [Fp2; 2];
 
 pub fn curve_generator() -> Curve {
-    [Fp { val: U256::one() }, Fp { val: U256::from(2) }]
+    [make_fp(1), make_fp(2)]
 }
 
 pub fn twisted_curve_generator() -> TwistedCurve {
     [
-        [
-            Fp {
+        Fp2 {
+            re: Fp {
                 val: U256([
                     0x46debd5cd992f6ed,
                     0x674322d4f75edadd,
@@ -20,7 +20,7 @@ pub fn twisted_curve_generator() -> TwistedCurve {
                     0x1800deef121f1e76,
                 ]),
             },
-            Fp {
+            im: Fp {
                 val: U256([
                     0x97e485b7aef312c2,
                     0xf1aa493335a9e712,
@@ -28,9 +28,9 @@ pub fn twisted_curve_generator() -> TwistedCurve {
                     0x198e9393920d483a,
                 ]),
             },
-        ],
-        [
-            Fp {
+        },
+        Fp2 {
+            re: Fp {
                 val: U256([
                     0x4ce6cc0166fa7daa,
                     0xe3d1e7690c43d37b,
@@ -38,7 +38,7 @@ pub fn twisted_curve_generator() -> TwistedCurve {
                     0x12c85ea5db8c6deb,
                 ]),
             },
-            Fp {
+            im: Fp {
                 val: U256([
                     0x55acdadcd122975b,
                     0xbc4b313370b38ef3,
@@ -46,7 +46,7 @@ pub fn twisted_curve_generator() -> TwistedCurve {
                     0x90689d0585ff075,
                 ]),
             },
-        ],
+        },
     ]
 }
 
@@ -190,66 +190,66 @@ pub fn power(f: Fp12) -> Fp12 {
     ];
 
     let mut sq: Fp12 = f;
-    let mut y0: Fp12 = embed_fp12(U256::one());
-    let mut y2: Fp12 = embed_fp12(U256::one());
-    let mut y4: Fp12 = embed_fp12(U256::one());
+    let mut y0: Fp12 = UNIT_FP12;
+    let mut y2: Fp12 = UNIT_FP12;
+    let mut y4: Fp12 = UNIT_FP12;
 
     for (a, b, c) in EXPS4 {
         if a != 0 {
-            y4 = mul_fp12(y4, sq);
+            y4 = y4 * sq;
         }
         if b != 0 {
-            y2 = mul_fp12(y2, sq);
+            y2 = y2 * sq;
         }
         if c != 0 {
-            y0 = mul_fp12(y0, sq);
+            y0 = y0 * sq;
         }
-        sq = mul_fp12(sq, sq);
+        sq = sq * sq;
     }
-    y4 = mul_fp12(y4, sq);
+    y4 = y4 * sq;
 
     for (a, b) in EXPS2 {
         if a != 0 {
-            y2 = mul_fp12(y2, sq);
+            y2 = y2 * sq;
         }
         if b != 0 {
-            y0 = mul_fp12(y0, sq);
+            y0 = y0 * sq;
         }
-        sq = mul_fp12(sq, sq);
+        sq = sq * sq;
     }
-    y2 = mul_fp12(y2, sq);
+    y2 = y2 * sq;
 
     for a in EXPS0 {
         if a != 0 {
-            y0 = mul_fp12(y0, sq);
+            y0 = y0 * sq;
         }
-        sq = mul_fp12(sq, sq);
+        sq = sq * sq;
     }
-    y0 = mul_fp12(y0, sq);
+    y0 = y0 * sq;
 
     y0 = inv_fp12(y0);
 
-    y4 = mul_fp12(y4, y2);
-    y4 = mul_fp12(y4, y2);
-    y4 = mul_fp12(y4, y0);
+    y4 = y4 * y2;
+    y4 = y4 * y2;
+    y4 = y4 * y0;
 
     y4 = frob_fp12(1, y4);
     y2 = frob_fp12(2, y2);
 
-    mul_fp12(mul_fp12(y4, y2), y0)
+    y4 * y2 * y0
 }
 
 pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
     let [px, py] = p;
     let [qx, qy] = q;
 
-    let cx = neg_fp(mul_fp(U256::from(3), mul_fp(px, px)));
-    let cy = mul_fp(U256::from(2), py);
+    let cx = - make_fp(3) * px * px;
+    let cy = make_fp(2) *  py;
 
     sparse_embed(
-        sub_fp(mul_fp(py, py), U256::from(9)),
-        mul_fp2(embed_fp2(cx), qx),
-        mul_fp2(embed_fp2(cy), qy),
+        py * py - make_fp(9),
+        mul_fp_fp2(cx, qx),
+        mul_fp_fp2(cy, qy),
     )
 }
 
@@ -258,36 +258,36 @@ pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
     let [p2x, p2y] = p2;
     let [qx, qy] = q;
 
-    let cx = sub_fp(p2y, p1y);
-    let cy = sub_fp(p1x, p2x);
+    let cx = p2y - p1y;
+    let cy = p1x - p2x;
 
     sparse_embed(
-        sub_fp(mul_fp(p1y, p2x), mul_fp(p2y, p1x)),
-        mul_fp2(embed_fp2(cx), qx),
-        mul_fp2(embed_fp2(cy), qy),
+        p1y * p2x - p2y * p1x,
+        mul_fp_fp2(cx, qx),
+        mul_fp_fp2(cy, qy),
     )
 }
 
 fn tangent_slope(p: Curve) -> Fp {
     let [px, py] = p;
-    let num = mul_fp(mul_fp(px, px), U256::from(3));
-    let denom = mul_fp(py, U256::from(2));
-    div_fp(num, denom)
+    let num = px * px *  make_fp(3);
+    let denom = py * make_fp(2);
+    num / denom
 }
 
 fn cord_slope(p: Curve, q: Curve) -> Fp {
     let [px, py] = p;
     let [qx, qy] = q;
-    let num = sub_fp(qy, py);
-    let denom = sub_fp(qx, px);
-    div_fp(num, denom)
+    let num = qy - py;
+    let denom = qx - px;
+    num / denom
 }
 
 fn third_point(m: Fp, p: Curve, q: Curve) -> Curve {
     let [px, py] = p;
     let [qx, _] = q;
-    let ox = sub_fp(mul_fp(m, m), add_fp(px, qx));
-    let oy = sub_fp(mul_fp(m, sub_fp(px, ox)), py);
+    let ox = m * m - (px + qx);
+    let oy = (m * (px - ox)) - py;
     [ox, oy]
 }
 
@@ -317,17 +317,17 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
     ];
 
     let mut o = p;
-    let mut acc = embed_fp12(U256::one());
+    let mut acc = UNIT_FP12;
     let mut line;
 
     for i in EXP {
-        acc = mul_fp12(acc, acc);
+        acc = acc * acc;
         line = tangent(o, q);
-        acc = mul_fp12(line, acc);
+        acc = line * acc;
         o = curve_double(o);
         if i != 0 {
             line = cord(p, o, q);
-            acc = mul_fp12(line, acc);
+            acc = line * acc;
             o = curve_add(p, o);
         }
     }
@@ -339,12 +339,12 @@ pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
 
     let inv = inv_fp12(out);
     out = frob_fp12(6, out);
-    out = mul_fp12(out, inv);
+    out = out * inv;
 
     let acc = frob_fp12(2, out);
-    out = mul_fp12(out, acc);
+    out = out * acc;
 
     let pow = power(out);
     out = frob_fp12(3, out);
-    mul_fp12(out, pow)
+    out * pow
 }
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 778f2de7..58f26bcc 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -59,9 +59,9 @@ fn setup_mul_test(f: Fp12, g: Fp12, label: &str) -> InterpreterSetup {
 
     InterpreterSetup {
         offset: label.to_string(),
-        stack: stack,
-        memory: memory,
-        output: out..out+12,
+        stack,
+        memory,
+        output: out..out + 12,
     }
 }
 
@@ -97,9 +97,9 @@ fn setup_frob_test(f: Fp12, label: &str) -> InterpreterSetup {
 
     InterpreterSetup {
         offset: label.to_string(),
-        stack: stack,
-        memory: memory,
-        output: ptr..ptr+12,
+        stack,
+        memory,
+        output: ptr..ptr + 12,
     }
 }
 
@@ -138,9 +138,9 @@ fn setup_inv_test(f: Fp12) -> InterpreterSetup {
 
     InterpreterSetup {
         offset: "inv_fp12".to_string(),
-        stack: stack,
-        memory: memory,
-        output: inv..inv+12,
+        stack,
+        memory,
+        output: inv..inv + 12,
     }
 }
 
diff --git a/evm/src/lib.rs b/evm/src/lib.rs
index 689e6c57..5f2e9ca5 100644
--- a/evm/src/lib.rs
+++ b/evm/src/lib.rs
@@ -9,7 +9,7 @@
 pub mod all_stark;
 pub mod arithmetic;
 pub mod bn254_arithmetic;
-// pub mod bn254_curve_pairing;
+pub mod bn254_curve_pairing;
 pub mod config;
 pub mod constraint_consumer;
 pub mod cpu;

From 8e62d994411aa4771b0b23df3ff2ecfc3001b264 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 15:43:32 +0700
Subject: [PATCH 134/201] fmt

---
 evm/src/bn254_curve_pairing.rs | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/evm/src/bn254_curve_pairing.rs b/evm/src/bn254_curve_pairing.rs
index 58ec2c3c..d03a2b88 100644
--- a/evm/src/bn254_curve_pairing.rs
+++ b/evm/src/bn254_curve_pairing.rs
@@ -1,6 +1,8 @@
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{Fp, Fp2, Fp12, mul_fp_fp2, inv_fp12, frob_fp12, UNIT_FP12, sparse_embed, make_fp};
+use crate::bn254_arithmetic::{
+    frob_fp12, inv_fp12, make_fp, mul_fp_fp2, sparse_embed, Fp, Fp12, Fp2, UNIT_FP12,
+};
 
 pub type Curve = [Fp; 2];
 pub type TwistedCurve = [Fp2; 2];
@@ -243,14 +245,10 @@ pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
     let [px, py] = p;
     let [qx, qy] = q;
 
-    let cx = - make_fp(3) * px * px;
-    let cy = make_fp(2) *  py;
+    let cx = -make_fp(3) * px * px;
+    let cy = make_fp(2) * py;
 
-    sparse_embed(
-        py * py - make_fp(9),
-        mul_fp_fp2(cx, qx),
-        mul_fp_fp2(cy, qy),
-    )
+    sparse_embed(py * py - make_fp(9), mul_fp_fp2(cx, qx), mul_fp_fp2(cy, qy))
 }
 
 pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
@@ -270,7 +268,7 @@ pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
 
 fn tangent_slope(p: Curve) -> Fp {
     let [px, py] = p;
-    let num = px * px *  make_fp(3);
+    let num = px * px * make_fp(3);
     let denom = py * make_fp(2);
     num / denom
 }

From b2f9d885f95a31d451526453ae51afd46cc11131 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 15:59:14 +0700
Subject: [PATCH 135/201] remove redundant macros and improve comments

---
 evm/src/bn254_arithmetic.rs                     | 17 ++++++++++-------
 evm/src/bn254_curve_pairing.rs                  |  2 +-
 .../bn254/curve_arithmetic/miller_loop.asm      |  6 ------
 .../bn254/curve_arithmetic/tate_pairing.asm     |  6 ------
 .../asm/curve/bn254/field_arithmetic/power.asm  |  6 ------
 5 files changed, 11 insertions(+), 26 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 1159c620..7940b347 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -254,16 +254,19 @@ impl Mul for Fp6 {
     }
 }
 
-/// Let x_n = x^(p^n); By Galois Theory, for x: Fp6, the product
+/// Let x_n = x^(p^n) and note that 
+///     x_0 = x^(p^0) = x^1 = x
+///     (x_n)_m = (x^(p^n))^(p^m) = x^(p^n * p^m) = x^(p^(n+m)) = x_{n+m} 
+/// By Galois Theory, given x: Fp6, the product
 ///     phi = x_0 * x_1 * x_2 * x_3 * x_4 * x_5
-/// lands in Fp, and hence the inverse of x (= x_0) is given by
+/// lands in Fp, and hence the inverse of x is given by
 ///     (x_1 * x_2 * x_3 * x_4 * x_5) / phi
-/// Since (x_n)_m = x_{n+m}, we save compute by rearranging the numerator:
+/// We can save compute by rearranging the numerator:
 ///     (x_1 * x_3) * x_5 * (x_1 * x_3)_1
-/// By Galois theory, both the following are in Fp2 and are complex conjugates
+/// By Galois theory, the following are in Fp2 and are complex conjugates
 ///     x_1 * x_3 * x_5,  x_0 * x_2 * x_4
 /// Thus phi = norm(x_1 * x_3 * x_5), and hence the inverse is given by
-///     normalize((x_1 * x_3) * x_5) * (x_1 * x_3)_1
+///     normalize([x_1 * x_3] * x_5) * [x_1 * x_3]_1
 impl Div for Fp6 {
     type Output = Self;
 
@@ -329,12 +332,12 @@ impl Mul for Fp12 {
     }
 }
 
-/// By Galois Theory, for x: Fp12, the product
+/// By Galois Theory, given x: Fp12, the product
 ///     phi = Prod_{i=0}^11 x_i
 /// lands in Fp, and hence the inverse of x is given by
 ///     (Prod_{i=1}^11 x_i) / phi
 /// The 6th Frob map is nontrivial but leaves Fp6 fixed and hence must be the conjugate:
-///     x_6 = (a + bz)_6 = a - bz
+///     x_6 = (a + bz)_6 = a - bz = conj_fp12(x)
 /// Letting prod_17 = x_1 * x_7, the remaining factors in the numerator can be expresed as:
 ///     [(prod_17) * (prod_17)_2] * (prod_17)_4 * [(prod_17) * (prod_17)_2]_1
 /// By Galois theory, both the following are in Fp2 and are complex conjugates
diff --git a/evm/src/bn254_curve_pairing.rs b/evm/src/bn254_curve_pairing.rs
index d03a2b88..db327e2f 100644
--- a/evm/src/bn254_curve_pairing.rs
+++ b/evm/src/bn254_curve_pairing.rs
@@ -285,7 +285,7 @@ fn third_point(m: Fp, p: Curve, q: Curve) -> Curve {
     let [px, py] = p;
     let [qx, _] = q;
     let ox = m * m - (px + qx);
-    let oy = (m * (px - ox)) - py;
+    let oy = m * (px - ox) - py;
     [ox, oy]
 }
 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 5d2104e6..aa341288 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -25,12 +25,6 @@
 ///     0xnm -= 1
 ///     mul_tangent()
 
-global test_miller:
-    // stack: ptr, P, Q, ptr, out, retdest
-    %store_fp6
-    // stack:            ptr, out, retdest
-    %jump(miller_init)
-
 global miller_init:
     // stack:         ptr, out, retdest
     PUSH 1
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 5957d02a..78ce9085 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -14,12 +14,6 @@
 ///
 ///     return out
 
-global test_tate:
-    // stack: ptr, P, Q, ptr, out, retdest
-    %store_fp6
-    // stack:            ptr, out, retdest
-    %jump(tate)
-
 global tate:
     // stack:                        ptr, out, retdest
     DUP2
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index a0d38a04..d8478357 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -1,9 +1,3 @@
-global test_pow:
-    // stack: ptr, f, ptr, out, return_fp12_on_stack, out
-    %store_fp12
-    // stack:         ptr, out, return_fp12_on_stack, out
-    %jump(power)
-
 /// def power(acc):
 ///     power_init()
 ///     power_loop_4()

From d99cadebc2ab7dedafa23d23d753589635fc924a Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 16:03:24 +0700
Subject: [PATCH 136/201] stack macro

---
 evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index fed823d3..39fb73e2 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -36,7 +36,7 @@ global inv_fp12:
     // stack:                ptr, inv, retdest
     PUSH 50  PUSH check_inv
     // stack: check_inv, 50, ptr, inv, retdest 
-    SWAP3  SWAP1  SWAP2
+    %stack(check_inv, 50, ptr, inv) -> (ptr, inv, 50, check_inv)
     // stack: ptr, inv, 50, check_inv, retdest 
     %jump(mul_fp12)
 global check_inv:

From 4d783da82d42b54cb0f45404440f279b7541b5c0 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 20 Jan 2023 16:05:27 +0700
Subject: [PATCH 137/201] fmt

---
 evm/src/bn254_arithmetic.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 7940b347..22bf92c4 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -254,9 +254,9 @@ impl Mul for Fp6 {
     }
 }
 
-/// Let x_n = x^(p^n) and note that 
+/// Let x_n = x^(p^n) and note that
 ///     x_0 = x^(p^0) = x^1 = x
-///     (x_n)_m = (x^(p^n))^(p^m) = x^(p^n * p^m) = x^(p^(n+m)) = x_{n+m} 
+///     (x_n)_m = (x^(p^n))^(p^m) = x^(p^n * p^m) = x^(p^(n+m)) = x_{n+m}
 /// By Galois Theory, given x: Fp6, the product
 ///     phi = x_0 * x_1 * x_2 * x_3 * x_4 * x_5
 /// lands in Fp, and hence the inverse of x is given by

From a5c292c751e4159b183a18ac92bd6c3f30e91542 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 00:33:09 +0700
Subject: [PATCH 138/201] space

---
 evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 39fb73e2..9c3d2868 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -36,7 +36,7 @@ global inv_fp12:
     // stack:                ptr, inv, retdest
     PUSH 50  PUSH check_inv
     // stack: check_inv, 50, ptr, inv, retdest 
-    %stack(check_inv, 50, ptr, inv) -> (ptr, inv, 50, check_inv)
+    %stack (check_inv, 50, ptr, inv) -> (ptr, inv, 50, check_inv)
     // stack: ptr, inv, 50, check_inv, retdest 
     %jump(mul_fp12)
 global check_inv:

From e06a2f2d46dc3e4d13eefa9530466c42ad934413 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 00:56:38 +0700
Subject: [PATCH 139/201] duh

---
 evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 9c3d2868..408d3cc9 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -36,7 +36,7 @@ global inv_fp12:
     // stack:                ptr, inv, retdest
     PUSH 50  PUSH check_inv
     // stack: check_inv, 50, ptr, inv, retdest 
-    %stack (check_inv, 50, ptr, inv) -> (ptr, inv, 50, check_inv)
+    %stack (check_inv, mem, ptr, inv) -> (ptr, inv, mem, check_inv)
     // stack: ptr, inv, 50, check_inv, retdest 
     %jump(mul_fp12)
 global check_inv:

From d2aa937a2ff42e1e437ad44207d7745d14448c1c Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 13:19:07 +0700
Subject: [PATCH 140/201] improved prover input and test api

---
 evm/src/bn254_arithmetic.rs                   |  40 +----
 evm/src/cpu/kernel/aggregator.rs              |   2 +-
 .../curve/bn254/field_arithmetic/inverse.asm  |  17 ++-
 .../field_arithmetic/{utils.asm => util.asm}  |   0
 evm/src/cpu/kernel/tests/bn254.rs             | 142 +++++++++---------
 evm/src/generation/prover_input.rs            |  26 ++--
 evm/src/witness/util.rs                       |  21 +--
 7 files changed, 115 insertions(+), 133 deletions(-)
 rename evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/{utils.asm => util.asm} (100%)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 22bf92c4..c5c7961d 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -2,7 +2,6 @@ use std::mem::transmute;
 use std::ops::{Add, Div, Mul, Neg, Sub};
 
 use ethereum_types::U256;
-use itertools::Itertools;
 use rand::{thread_rng, Rng};
 
 pub const BN_BASE: U256 = U256([
@@ -139,17 +138,13 @@ impl Mul for Fp2 {
     }
 }
 
-/// The inverse of a + bi is given by (a - bi)/(a^2 + b^2) since
-/// (a + bi)(a - bi)/(a^2 + b^2) = (a^2 + b^2)/(a^2 + b^2) = 1
+/// The inverse of z is given by z'/||z|| since ||z|| = zz'
 impl Div for Fp2 {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
         let norm = rhs.re * rhs.re + rhs.im * rhs.im;
-        let inv = Fp2 {
-            re: rhs.re / norm,
-            im: -rhs.im / norm,
-        };
+        let inv = mul_fp_fp2(norm, conj_fp2(rhs));
         self * inv
     }
 }
@@ -833,36 +828,9 @@ const FROB_Z: [Fp2; 12] = [
     },
 ];
 
-pub fn fp12_to_array(f: Fp12) -> [U256; 12] {
-    unsafe { transmute(f) }
-}
-
 pub fn fp12_to_vec(f: Fp12) -> Vec<U256> {
-    fp12_to_array(f).into_iter().collect()
-}
-
-pub fn vec_to_fp12(xs: Vec<U256>) -> Fp12 {
-    xs.into_iter()
-        .tuples::<(U256, U256)>()
-        .map(|(v1, v2)| Fp2 {
-            re: Fp { val: v1 },
-            im: Fp { val: v2 },
-        })
-        .tuples()
-        .map(|(a1, a2, a3, a4, a5, a6)| Fp12 {
-            z0: Fp6 {
-                t0: a1,
-                t1: a2,
-                t2: a3,
-            },
-            z1: Fp6 {
-                t0: a4,
-                t1: a5,
-                t2: a6,
-            },
-        })
-        .next()
-        .unwrap()
+    let f: [U256; 12] = unsafe { transmute(f) };
+    f.into_iter().collect()
 }
 
 fn gen_fp() -> Fp {
diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index d924eeb4..c74baa65 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -34,7 +34,7 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/power.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/utils.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/util.asm"),
         include_str!("asm/curve/common.asm"),
         include_str!("asm/curve/secp256k1/curve_mul.asm"),
         include_str!("asm/curve/secp256k1/curve_add.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 408d3cc9..8f42e047 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -23,6 +23,18 @@
 
 
 global inv_fp12:
+    // stack:                ptr, inv, retdest
+    %prover_inv_fp12
+    // stack:          f^-1, ptr, inv, retdest
+    DUP14
+    // stack:     inv, f^-1, ptr, inv, retdest
+    %store_fp12
+    // stack:                ptr, inv, retdest
+    %stack (ptr, inv) -> (ptr, inv, 50, check_inv)
+    // stack: ptr, inv, 50, check_inv, retdest 
+    %jump(mul_fp12)
+
+global inv_fp12_old:
     // stack:                ptr, inv, retdest
     DUP1  %load_fp12
     // stack:             f, ptr, inv, retdest
@@ -39,9 +51,12 @@ global inv_fp12:
     %stack (check_inv, mem, ptr, inv) -> (ptr, inv, mem, check_inv)
     // stack: ptr, inv, 50, check_inv, retdest 
     %jump(mul_fp12)
+
+
 global check_inv:
     // stack:        retdest
-    PUSH 50  %load_fp12
+    PUSH 50  
+    %load_fp12
     // stack: unit?, retdest
     %assert_eq_unit_fp12
     // stack:        retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/utils.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 58f26bcc..23f9d531 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -13,15 +13,13 @@ struct InterpreterSetup {
     offset: String,
     stack: Vec<U256>,
     memory: Vec<(usize, Vec<U256>)>,
-    output: Range<usize>,
 }
 
-fn get_interpreter_output(setup: InterpreterSetup) -> Result<Vec<U256>> {
+fn run_setup_interpreter(setup: InterpreterSetup) -> Result<Interpreter<'static>> {
     let label = KERNEL.global_labels[&setup.offset];
     let mut stack = setup.stack;
     stack.reverse();
     let mut interpreter = Interpreter::new_with_kernel(label, stack);
-
     for (pointer, data) in setup.memory {
         for (i, term) in data.iter().enumerate() {
             interpreter.generation_state.memory.set(
@@ -30,54 +28,64 @@ fn get_interpreter_output(setup: InterpreterSetup) -> Result<Vec<U256>> {
             )
         }
     }
-
     interpreter.run()?;
-
-    let kernel = &interpreter.generation_state.memory.contexts[interpreter.context].segments
-        [Segment::KernelGeneral as usize]
-        .content;
-
-    let mut output: Vec<U256> = vec![];
-    for i in setup.output {
-        output.push(kernel[i]);
-    }
-    Ok(output)
+    Ok(interpreter)
 }
 
-fn setup_mul_test(f: Fp12, g: Fp12, label: &str) -> InterpreterSetup {
-    let in0: usize = 64;
-    let in1: usize = 76;
-    let out: usize = 88;
-
-    let stack = vec![
-        U256::from(in0),
-        U256::from(in1),
-        U256::from(out),
-        U256::from(0xdeadbeefu32),
-    ];
-    let memory = vec![(in0, fp12_to_vec(f)), (in1, fp12_to_vec(g))];
+fn extract_kernel_output(range: Range<usize>, interpreter: Interpreter<'static>) -> Vec<U256> {
+    let mut output: Vec<U256> = vec![];
+    for i in range {
+        let term = interpreter.generation_state.memory.get(MemoryAddress::new(
+            0,
+            Segment::KernelGeneral,
+            i,
+        ));
+        output.push(term);
+    }
+    output
+}
 
+fn setup_mul_test(
+    in0: usize,
+    in1: usize,
+    out: usize,
+    f: Fp12,
+    g: Fp12,
+    label: &str,
+) -> InterpreterSetup {
     InterpreterSetup {
         offset: label.to_string(),
-        stack,
-        memory,
-        output: out..out + 12,
+        stack: vec![
+            U256::from(in0),
+            U256::from(in1),
+            U256::from(out),
+            U256::from(0xdeadbeefu32),
+        ],
+        memory: vec![(in0, fp12_to_vec(f)), (in1, fp12_to_vec(g))],
     }
 }
 
 #[test]
 fn test_mul_fp12() -> Result<()> {
+    let in0: usize = 64;
+    let in1: usize = 76;
+    let out: usize = 88;
+
     let f: Fp12 = gen_fp12();
     let g: Fp12 = gen_fp12();
     let h: Fp12 = gen_fp12_sparse();
 
-    let setup_normal: InterpreterSetup = setup_mul_test(f, g, "mul_fp12");
-    let setup_sparse: InterpreterSetup = setup_mul_test(f, h, "mul_fp12_sparse");
-    let setup_square: InterpreterSetup = setup_mul_test(f, f, "square_fp12_test");
+    let setup_normal: InterpreterSetup = setup_mul_test(in0, in1, out, f, g, "mul_fp12");
+    let setup_sparse: InterpreterSetup = setup_mul_test(in0, in1, out, f, h, "mul_fp12_sparse");
+    let setup_square: InterpreterSetup = setup_mul_test(in0, in1, out, f, f, "square_fp12_test");
 
-    let out_normal: Vec<U256> = get_interpreter_output(setup_normal).unwrap();
-    let out_sparse: Vec<U256> = get_interpreter_output(setup_sparse).unwrap();
-    let out_square: Vec<U256> = get_interpreter_output(setup_square).unwrap();
+    let intrptr_normal: Interpreter = run_setup_interpreter(setup_normal).unwrap();
+    let intrptr_sparse: Interpreter = run_setup_interpreter(setup_sparse).unwrap();
+    let intrptr_square: Interpreter = run_setup_interpreter(setup_square).unwrap();
+
+    let out_normal: Vec<U256> = extract_kernel_output(out..out + 12, intrptr_normal);
+    let out_sparse: Vec<U256> = extract_kernel_output(out..out + 12, intrptr_sparse);
+    let out_square: Vec<U256> = extract_kernel_output(out..out + 12, intrptr_square);
 
     let exp_normal: Vec<U256> = fp12_to_vec(f * g);
     let exp_sparse: Vec<U256> = fp12_to_vec(f * h);
@@ -90,32 +98,33 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-fn setup_frob_test(f: Fp12, label: &str) -> InterpreterSetup {
-    let ptr: usize = 100;
-    let stack = vec![U256::from(ptr)];
-    let memory = vec![(ptr, fp12_to_vec(f))];
-
+fn setup_frob_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
     InterpreterSetup {
         offset: label.to_string(),
-        stack,
-        memory,
-        output: ptr..ptr + 12,
+        stack: vec![U256::from(ptr)],
+        memory: vec![(ptr, fp12_to_vec(f))],
     }
 }
 
 #[test]
 fn test_frob_fp12() -> Result<()> {
+    let ptr: usize = 100;
     let f: Fp12 = gen_fp12();
 
-    let setup_frob_1 = setup_frob_test(f, "test_frob_fp12_1");
-    let setup_frob_2 = setup_frob_test(f, "test_frob_fp12_2");
-    let setup_frob_3 = setup_frob_test(f, "test_frob_fp12_3");
-    let setup_frob_6 = setup_frob_test(f, "test_frob_fp12_6");
+    let setup_frob_1 = setup_frob_test(ptr, f, "test_frob_fp12_1");
+    let setup_frob_2 = setup_frob_test(ptr, f, "test_frob_fp12_2");
+    let setup_frob_3 = setup_frob_test(ptr, f, "test_frob_fp12_3");
+    let setup_frob_6 = setup_frob_test(ptr, f, "test_frob_fp12_6");
 
-    let out_frob_1: Vec<U256> = get_interpreter_output(setup_frob_1).unwrap();
-    let out_frob_2: Vec<U256> = get_interpreter_output(setup_frob_2).unwrap();
-    let out_frob_3: Vec<U256> = get_interpreter_output(setup_frob_3).unwrap();
-    let out_frob_6: Vec<U256> = get_interpreter_output(setup_frob_6).unwrap();
+    let intrptr_frob_1: Interpreter = run_setup_interpreter(setup_frob_1).unwrap();
+    let intrptr_frob_2: Interpreter = run_setup_interpreter(setup_frob_2).unwrap();
+    let intrptr_frob_3: Interpreter = run_setup_interpreter(setup_frob_3).unwrap();
+    let intrptr_frob_6: Interpreter = run_setup_interpreter(setup_frob_6).unwrap();
+
+    let out_frob_1: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_1);
+    let out_frob_2: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_2);
+    let out_frob_3: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_3);
+    let out_frob_6: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_6);
 
     let exp_frob_1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
     let exp_frob_2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
@@ -130,26 +139,19 @@ fn test_frob_fp12() -> Result<()> {
     Ok(())
 }
 
-fn setup_inv_test(f: Fp12) -> InterpreterSetup {
-    let ptr: usize = 100;
-    let inv: usize = 112;
-    let stack = vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)];
-    let memory = vec![(ptr, fp12_to_vec(f))];
-
-    InterpreterSetup {
-        offset: "inv_fp12".to_string(),
-        stack,
-        memory,
-        output: inv..inv + 12,
-    }
-}
-
 #[test]
 fn test_inv_fp12() -> Result<()> {
+    let ptr: usize = 100;
+    let inv: usize = 112;
     let f: Fp12 = gen_fp12();
 
-    let setup = setup_inv_test(f);
-    let output: Vec<U256> = get_interpreter_output(setup).unwrap();
+    let setup = InterpreterSetup {
+        offset: "inv_fp12".to_string(),
+        stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)],
+        memory: vec![(ptr, fp12_to_vec(f))],
+    };
+    let interpreter: Interpreter = run_setup_interpreter(setup).unwrap();
+    let output: Vec<U256> = extract_kernel_output(inv..inv + 12, interpreter);
     let expected: Vec<U256> = fp12_to_vec(inv_fp12(f));
 
     assert_eq!(output, expected);
@@ -173,7 +175,7 @@ fn test_inv_fp12() -> Result<()> {
 //         out,
 //     ]);
 
-//     let output: Vec<U256> = get_interpreter_output("test_pow", stack);
+//     let output: Vec<U256> = run_setup_interpreter("test_pow", stack);
 //     let expected: Vec<U256> = fp12_to_vec(power(f));
 
 //     assert_eq!(output, expected);
@@ -206,7 +208,7 @@ fn test_inv_fp12() -> Result<()> {
 //     let q: TwistedCurve = twisted_curve_generator();
 
 //     let stack = make_tate_stack(p, q);
-//     let output = get_interpreter_output("test_miller", stack);
+//     let output = run_setup_interpreter("test_miller", stack);
 //     let expected = fp12_to_vec(miller_loop(p, q));
 
 //     assert_eq!(output, expected);
@@ -220,7 +222,7 @@ fn test_inv_fp12() -> Result<()> {
 //     let q: TwistedCurve = twisted_curve_generator();
 
 //     let stack = make_tate_stack(p, q);
-//     let output = get_interpreter_output("test_tate", stack);
+//     let output = run_setup_interpreter("test_tate", stack);
 //     let expected = fp12_to_vec(tate(p, q));
 
 //     assert_eq!(output, expected);
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 9f305e41..4dff42c7 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -1,16 +1,17 @@
+use std::mem::transmute;
 use std::str::FromStr;
 
 use anyhow::{bail, Error};
 use ethereum_types::{BigEndianHash, H256, U256};
 use plonky2::field::types::Field;
 
-use crate::bn254_arithmetic::{fp12_to_array, inv_fp12, vec_to_fp12};
+use crate::bn254_arithmetic::{inv_fp12, Fp12};
 use crate::generation::prover_input::EvmField::{
     Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar,
 };
 use crate::generation::prover_input::FieldOp::{Inverse, Sqrt};
 use crate::generation::state::GenerationState;
-use crate::witness::util::{stack_peek, stack_peeks};
+use crate::witness::util::{kernel_general_peek, stack_peek};
 
 /// Prover input function represented as a scoped function name.
 /// Example: `PROVER_INPUT(ff::bn254_base::inverse)` is represented as `ProverInputFn([ff, bn254_base, inverse])`.
@@ -57,10 +58,7 @@ impl<F: Field> GenerationState<F> {
     /// Finite field extension operations.
     fn run_ffe(&self, input_fn: &ProverInputFn) -> U256 {
         let field = EvmField::from_str(input_fn.0[1].as_str()).unwrap();
-        let component = input_fn.0[2].as_str();
-        let xs = stack_peeks(self).expect("Empty stack");
-        // TODO: This sucks... come back later
-        let n = match component {
+        let n = match input_fn.0[2].as_str() {
             "component_0" => 0,
             "component_1" => 1,
             "component_2" => 2,
@@ -75,7 +73,12 @@ impl<F: Field> GenerationState<F> {
             "component_11" => 11,
             _ => panic!("out of bounds"),
         };
-        field.inverse_fp12(n, xs)
+        let ptr = stack_peek(self, 11 - n).expect("Empty stack").as_usize();
+        let mut f: [U256; 12] = [U256::zero(); 12];
+        for i in 0..12 {
+            f[i] = kernel_general_peek(self, ptr + i);
+        }
+        field.inverse_fp12(n, f)
     }
 
     /// MPT data.
@@ -196,11 +199,10 @@ impl EvmField {
         modexp(x, q, n)
     }
 
-    fn inverse_fp12(&self, n: usize, xs: Vec<U256>) -> U256 {
-        let offset = 12 - n;
-        let vec: Vec<U256> = xs[offset..].to_vec();
-        let f = fp12_to_array(inv_fp12(vec_to_fp12(vec)));
-        f[n]
+    fn inverse_fp12(&self, n: usize, f: [U256; 12]) -> U256 {
+        let f: Fp12 = unsafe { transmute(f) };
+        let f_inv: [U256; 12] = unsafe { transmute(inv_fp12(f)) };
+        f_inv[n]
     }
 }
 
diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs
index 9aa0cb03..d47365f0 100644
--- a/evm/src/witness/util.rs
+++ b/evm/src/witness/util.rs
@@ -27,7 +27,7 @@ fn to_bits_le<F: Field>(n: u8) -> [F; 8] {
     res
 }
 
-/// Peak at the stack item `i`th from the top. If `i=0` this gives the tip.
+/// Peek at the stack item `i`th from the top. If `i=0` this gives the tip.
 pub(crate) fn stack_peek<F: Field>(state: &GenerationState<F>, i: usize) -> Option<U256> {
     if i >= state.registers.stack_len {
         return None;
@@ -39,18 +39,13 @@ pub(crate) fn stack_peek<F: Field>(state: &GenerationState<F>, i: usize) -> Opti
     )))
 }
 
-/// Peek at the entire stack.
-pub(crate) fn stack_peeks<F: Field>(state: &GenerationState<F>) -> Option<Vec<U256>> {
-    let n = state.registers.stack_len;
-    let mut stack: Vec<U256> = vec![];
-    for i in 0..n {
-        stack.extend(vec![state.memory.get(MemoryAddress::new(
-            state.registers.code_context(),
-            Segment::Stack,
-            n - 1 - i,
-        ))])
-    }
-    Some(stack)
+/// Peek at the kernel general item at address `i`
+pub(crate) fn kernel_general_peek<F: Field>(state: &GenerationState<F>, i: usize) -> U256 {
+    state.memory.get(MemoryAddress::new(
+        state.registers.context,
+        Segment::KernelGeneral,
+        i,
+    ))
 }
 
 pub(crate) fn mem_read_with_log<F: Field>(

From 155e973d06b85855e9703253f8418d54a142d3e3 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 13:24:45 +0700
Subject: [PATCH 141/201] slight refactor

---
 evm/src/bn254_arithmetic.rs       |  6 -----
 evm/src/cpu/kernel/tests/bn254.rs | 38 ++++++++++++++++++-------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index c5c7961d..72307d78 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -1,4 +1,3 @@
-use std::mem::transmute;
 use std::ops::{Add, Div, Mul, Neg, Sub};
 
 use ethereum_types::U256;
@@ -828,11 +827,6 @@ const FROB_Z: [Fp2; 12] = [
     },
 ];
 
-pub fn fp12_to_vec(f: Fp12) -> Vec<U256> {
-    let f: [U256; 12] = unsafe { transmute(f) };
-    f.into_iter().collect()
-}
-
 fn gen_fp() -> Fp {
     let mut rng = thread_rng();
     let x64 = rng.gen::<u64>();
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 23f9d531..f6eafc85 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -1,9 +1,10 @@
+use std::mem::transmute;
 use std::ops::Range;
 
 use anyhow::Result;
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{fp12_to_vec, frob_fp12, gen_fp12, gen_fp12_sparse, inv_fp12, Fp12};
+use crate::bn254_arithmetic::{frob_fp12, gen_fp12, gen_fp12_sparse, inv_fp12, Fp12};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
 use crate::memory::segments::Segment;
@@ -45,6 +46,11 @@ fn extract_kernel_output(range: Range<usize>, interpreter: Interpreter<'static>)
     output
 }
 
+fn fp12_on_stack(f: Fp12) -> Vec<U256> {
+    let f: [U256; 12] = unsafe { transmute(f) };
+    f.into_iter().collect()
+}
+
 fn setup_mul_test(
     in0: usize,
     in1: usize,
@@ -61,7 +67,7 @@ fn setup_mul_test(
             U256::from(out),
             U256::from(0xdeadbeefu32),
         ],
-        memory: vec![(in0, fp12_to_vec(f)), (in1, fp12_to_vec(g))],
+        memory: vec![(in0, fp12_on_stack(f)), (in1, fp12_on_stack(g))],
     }
 }
 
@@ -87,9 +93,9 @@ fn test_mul_fp12() -> Result<()> {
     let out_sparse: Vec<U256> = extract_kernel_output(out..out + 12, intrptr_sparse);
     let out_square: Vec<U256> = extract_kernel_output(out..out + 12, intrptr_square);
 
-    let exp_normal: Vec<U256> = fp12_to_vec(f * g);
-    let exp_sparse: Vec<U256> = fp12_to_vec(f * h);
-    let exp_square: Vec<U256> = fp12_to_vec(f * f);
+    let exp_normal: Vec<U256> = fp12_on_stack(f * g);
+    let exp_sparse: Vec<U256> = fp12_on_stack(f * h);
+    let exp_square: Vec<U256> = fp12_on_stack(f * f);
 
     assert_eq!(out_normal, exp_normal);
     assert_eq!(out_sparse, exp_sparse);
@@ -102,7 +108,7 @@ fn setup_frob_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
     InterpreterSetup {
         offset: label.to_string(),
         stack: vec![U256::from(ptr)],
-        memory: vec![(ptr, fp12_to_vec(f))],
+        memory: vec![(ptr, fp12_on_stack(f))],
     }
 }
 
@@ -126,10 +132,10 @@ fn test_frob_fp12() -> Result<()> {
     let out_frob_3: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_3);
     let out_frob_6: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_6);
 
-    let exp_frob_1: Vec<U256> = fp12_to_vec(frob_fp12(1, f));
-    let exp_frob_2: Vec<U256> = fp12_to_vec(frob_fp12(2, f));
-    let exp_frob_3: Vec<U256> = fp12_to_vec(frob_fp12(3, f));
-    let exp_frob_6: Vec<U256> = fp12_to_vec(frob_fp12(6, f));
+    let exp_frob_1: Vec<U256> = fp12_on_stack(frob_fp12(1, f));
+    let exp_frob_2: Vec<U256> = fp12_on_stack(frob_fp12(2, f));
+    let exp_frob_3: Vec<U256> = fp12_on_stack(frob_fp12(3, f));
+    let exp_frob_6: Vec<U256> = fp12_on_stack(frob_fp12(6, f));
 
     assert_eq!(out_frob_1, exp_frob_1);
     assert_eq!(out_frob_2, exp_frob_2);
@@ -148,11 +154,11 @@ fn test_inv_fp12() -> Result<()> {
     let setup = InterpreterSetup {
         offset: "inv_fp12".to_string(),
         stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)],
-        memory: vec![(ptr, fp12_to_vec(f))],
+        memory: vec![(ptr, fp12_on_stack(f))],
     };
     let interpreter: Interpreter = run_setup_interpreter(setup).unwrap();
     let output: Vec<U256> = extract_kernel_output(inv..inv + 12, interpreter);
-    let expected: Vec<U256> = fp12_to_vec(inv_fp12(f));
+    let expected: Vec<U256> = fp12_on_stack(inv_fp12(f));
 
     assert_eq!(output, expected);
 
@@ -167,7 +173,7 @@ fn test_inv_fp12() -> Result<()> {
 //     let f: Fp12 = gen_fp12();
 
 //     let mut stack = vec![ptr];
-//     stack.extend(fp12_to_vec(f));
+//     stack.extend(fp12_on_stack(f));
 //     stack.extend(vec![
 //         ptr,
 //         out,
@@ -176,7 +182,7 @@ fn test_inv_fp12() -> Result<()> {
 //     ]);
 
 //     let output: Vec<U256> = run_setup_interpreter("test_pow", stack);
-//     let expected: Vec<U256> = fp12_to_vec(power(f));
+//     let expected: Vec<U256> = fp12_on_stack(power(f));
 
 //     assert_eq!(output, expected);
 
@@ -209,7 +215,7 @@ fn test_inv_fp12() -> Result<()> {
 
 //     let stack = make_tate_stack(p, q);
 //     let output = run_setup_interpreter("test_miller", stack);
-//     let expected = fp12_to_vec(miller_loop(p, q));
+//     let expected = fp12_on_stack(miller_loop(p, q));
 
 //     assert_eq!(output, expected);
 
@@ -223,7 +229,7 @@ fn test_inv_fp12() -> Result<()> {
 
 //     let stack = make_tate_stack(p, q);
 //     let output = run_setup_interpreter("test_tate", stack);
-//     let expected = fp12_to_vec(tate(p, q));
+//     let expected = fp12_on_stack(tate(p, q));
 
 //     assert_eq!(output, expected);
 

From 530fb65b7ea3d4e08460245fc4db722e9d55bad6 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 13:52:26 +0700
Subject: [PATCH 142/201] cleanup

---
 evm/src/generation/prover_input.rs | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 4dff42c7..1e1e5674 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -58,21 +58,13 @@ impl<F: Field> GenerationState<F> {
     /// Finite field extension operations.
     fn run_ffe(&self, input_fn: &ProverInputFn) -> U256 {
         let field = EvmField::from_str(input_fn.0[1].as_str()).unwrap();
-        let n = match input_fn.0[2].as_str() {
-            "component_0" => 0,
-            "component_1" => 1,
-            "component_2" => 2,
-            "component_3" => 3,
-            "component_4" => 4,
-            "component_5" => 5,
-            "component_6" => 6,
-            "component_7" => 7,
-            "component_8" => 8,
-            "component_9" => 9,
-            "component_10" => 10,
-            "component_11" => 11,
-            _ => panic!("out of bounds"),
-        };
+        let n = input_fn.0[2]
+            .as_str()
+            .split('_')
+            .nth(1)
+            .unwrap()
+            .parse::<usize>()
+            .unwrap();
         let ptr = stack_peek(self, 11 - n).expect("Empty stack").as_usize();
         let mut f: [U256; 12] = [U256::zero(); 12];
         for i in 0..12 {

From 769c615cf1af3790b2be7e47c30925b9a8367bd4 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 14:17:01 +0700
Subject: [PATCH 143/201] cleanup

---
 evm/src/bn254_arithmetic.rs                   |   2 +-
 ...n254_curve_pairing.rs => bn254_pairing.rs} | 241 +++++++++---------
 evm/src/lib.rs                                |   2 +-
 3 files changed, 119 insertions(+), 126 deletions(-)
 rename evm/src/{bn254_curve_pairing.rs => bn254_pairing.rs} (80%)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 72307d78..b921d862 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -185,7 +185,7 @@ fn normalize_fp2(a: Fp2) -> Fp2 {
 /// The degree 3 field extension Fp6 over Fp2 is given by adjoining t, where t^3 = 9 + i
 /// We begin by defining a helper function which multiplies an Fp2 element by 9 + i
 fn i9(a: Fp2) -> Fp2 {
-    let nine = Fp { val: U256::from(9) };
+    let nine = make_fp(9);
     Fp2 {
         re: nine * a.re - a.im,
         im: a.re + nine * a.im,
diff --git a/evm/src/bn254_curve_pairing.rs b/evm/src/bn254_pairing.rs
similarity index 80%
rename from evm/src/bn254_curve_pairing.rs
rename to evm/src/bn254_pairing.rs
index db327e2f..0a5766b6 100644
--- a/evm/src/bn254_curve_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -4,52 +4,58 @@ use crate::bn254_arithmetic::{
     frob_fp12, inv_fp12, make_fp, mul_fp_fp2, sparse_embed, Fp, Fp12, Fp2, UNIT_FP12,
 };
 
-pub type Curve = [Fp; 2];
-pub type TwistedCurve = [Fp2; 2];
-
-pub fn curve_generator() -> Curve {
-    [make_fp(1), make_fp(2)]
+// The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct Curve {
+    x: Fp,
+    y: Fp,
 }
 
-pub fn twisted_curve_generator() -> TwistedCurve {
-    [
-        Fp2 {
-            re: Fp {
-                val: U256([
-                    0x46debd5cd992f6ed,
-                    0x674322d4f75edadd,
-                    0x426a00665e5c4479,
-                    0x1800deef121f1e76,
-                ]),
-            },
-            im: Fp {
-                val: U256([
-                    0x97e485b7aef312c2,
-                    0xf1aa493335a9e712,
-                    0x7260bfb731fb5d25,
-                    0x198e9393920d483a,
-                ]),
-            },
-        },
-        Fp2 {
-            re: Fp {
-                val: U256([
-                    0x4ce6cc0166fa7daa,
-                    0xe3d1e7690c43d37b,
-                    0x4aab71808dcb408f,
-                    0x12c85ea5db8c6deb,
-                ]),
-            },
-            im: Fp {
-                val: U256([
-                    0x55acdadcd122975b,
-                    0xbc4b313370b38ef3,
-                    0xec9e99ad690c3395,
-                    0x90689d0585ff075,
-                ]),
-            },
-        },
-    ]
+// The twisted consists of pairs (x, y): (Fp2, Fp2) |
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub struct TwistedCurve {
+    x: Fp2,
+    y: Fp2,
+}
+
+// The tate pairing takes point each from the curve and its twist and outputs an Fp12
+pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
+    let miller_output = miller_loop(p, q);
+    let post_mul_1 = frob_fp12(6, miller_output) / miller_output;
+    let post_mul_2 = frob_fp12(2, post_mul_1) * post_mul_1;
+    let power_output = power(post_mul_2);
+    frob_fp12(3, post_mul_2) * power_output
+}
+
+pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
+    const EXP: [usize; 253] = [
+        1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1,
+        1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
+        1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
+        1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
+        1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,
+        1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
+        0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
+        1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    ];
+
+    let mut o = p;
+    let mut acc = UNIT_FP12;
+    let mut line;
+
+    for i in EXP {
+        acc = acc * acc;
+        line = tangent(o, q);
+        acc = line * acc;
+        o = curve_double(o);
+        if i != 0 {
+            line = cord(p, o, q);
+            acc = line * acc;
+            o = curve_add(p, o);
+        }
+    }
+    acc
 }
 
 pub fn power(f: Fp12) -> Fp12 {
@@ -242,107 +248,94 @@ pub fn power(f: Fp12) -> Fp12 {
 }
 
 pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
-    let [px, py] = p;
-    let [qx, qy] = q;
-
-    let cx = -make_fp(3) * px * px;
-    let cy = make_fp(2) * py;
-
-    sparse_embed(py * py - make_fp(9), mul_fp_fp2(cx, qx), mul_fp_fp2(cy, qy))
-}
-
-pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
-    let [p1x, p1y] = p1;
-    let [p2x, p2y] = p2;
-    let [qx, qy] = q;
-
-    let cx = p2y - p1y;
-    let cy = p1x - p2x;
-
+    let cx = -make_fp(3) * p.x * p.x;
+    let cy = make_fp(2) * p.y;
     sparse_embed(
-        p1y * p2x - p2y * p1x,
-        mul_fp_fp2(cx, qx),
-        mul_fp_fp2(cy, qy),
+        p.y * p.y - make_fp(9),
+        mul_fp_fp2(cx, q.x),
+        mul_fp_fp2(cy, q.y),
     )
 }
 
-fn tangent_slope(p: Curve) -> Fp {
-    let [px, py] = p;
-    let num = px * px * make_fp(3);
-    let denom = py * make_fp(2);
-    num / denom
-}
+pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
+    let cx = p2.y - p1.y;
+    let cy = p1.x - p2.x;
 
-fn cord_slope(p: Curve, q: Curve) -> Fp {
-    let [px, py] = p;
-    let [qx, qy] = q;
-    let num = qy - py;
-    let denom = qx - px;
-    num / denom
+    sparse_embed(
+        p1.y * p2.x - p2.y * p1.x,
+        mul_fp_fp2(cx, q.x),
+        mul_fp_fp2(cy, q.y),
+    )
 }
 
 fn third_point(m: Fp, p: Curve, q: Curve) -> Curve {
-    let [px, py] = p;
-    let [qx, _] = q;
-    let ox = m * m - (px + qx);
-    let oy = m * (px - ox) - py;
-    [ox, oy]
+    let x = m * m - (p.x + q.x);
+    Curve {
+        x,
+        y: m * (p.x - x) - p.y,
+    }
 }
 
 fn curve_add(p: Curve, q: Curve) -> Curve {
     if p == q {
         curve_double(p)
     } else {
-        third_point(cord_slope(p, q), p, q)
+        let slope = (q.y - p.y) / (q.x - p.x);
+        third_point(slope, p, q)
     }
 }
 
 fn curve_double(p: Curve) -> Curve {
-    third_point(tangent_slope(p), p, p)
+    let slope = p.x * p.x * make_fp(3) / (p.y * make_fp(2));
+    third_point(slope, p, p)
 }
 
-pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
-    const EXP: [usize; 253] = [
-        1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1,
-        1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
-        1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
-        1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
-        1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,
-        1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
-        0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
-        1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    ];
-
-    let mut o = p;
-    let mut acc = UNIT_FP12;
-    let mut line;
-
-    for i in EXP {
-        acc = acc * acc;
-        line = tangent(o, q);
-        acc = line * acc;
-        o = curve_double(o);
-        if i != 0 {
-            line = cord(p, o, q);
-            acc = line * acc;
-            o = curve_add(p, o);
-        }
+// This curve is cyclic with generator (1, 2)
+pub fn curve_generator() -> Curve {
+    Curve {
+        x: make_fp(1),
+        y: make_fp(2),
     }
-    acc
 }
 
-pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
-    let mut out = miller_loop(p, q);
-
-    let inv = inv_fp12(out);
-    out = frob_fp12(6, out);
-    out = out * inv;
-
-    let acc = frob_fp12(2, out);
-    out = out * acc;
-
-    let pow = power(out);
-    out = frob_fp12(3, out);
-    out * pow
+// This curve is cyclic with generator (x, y) as follows
+pub fn twisted_curve_generator() -> TwistedCurve {
+    TwistedCurve {
+        x: Fp2 {
+            re: Fp {
+                val: U256([
+                    0x46debd5cd992f6ed,
+                    0x674322d4f75edadd,
+                    0x426a00665e5c4479,
+                    0x1800deef121f1e76,
+                ]),
+            },
+            im: Fp {
+                val: U256([
+                    0x97e485b7aef312c2,
+                    0xf1aa493335a9e712,
+                    0x7260bfb731fb5d25,
+                    0x198e9393920d483a,
+                ]),
+            },
+        },
+        y: Fp2 {
+            re: Fp {
+                val: U256([
+                    0x4ce6cc0166fa7daa,
+                    0xe3d1e7690c43d37b,
+                    0x4aab71808dcb408f,
+                    0x12c85ea5db8c6deb,
+                ]),
+            },
+            im: Fp {
+                val: U256([
+                    0x55acdadcd122975b,
+                    0xbc4b313370b38ef3,
+                    0xec9e99ad690c3395,
+                    0x090689d0585ff075,
+                ]),
+            },
+        },
+    }
 }
diff --git a/evm/src/lib.rs b/evm/src/lib.rs
index 5f2e9ca5..b6bb6130 100644
--- a/evm/src/lib.rs
+++ b/evm/src/lib.rs
@@ -9,7 +9,7 @@
 pub mod all_stark;
 pub mod arithmetic;
 pub mod bn254_arithmetic;
-pub mod bn254_curve_pairing;
+pub mod bn254_pairing;
 pub mod config;
 pub mod constraint_consumer;
 pub mod cpu;

From 8b670d54d1676493a7ac7105a7af5ee6778a35cc Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 14:26:38 +0700
Subject: [PATCH 144/201] meh

---
 evm/src/bn254_arithmetic.rs | 48 ++++++++++++++++++-------------------
 evm/src/bn254_pairing.rs    |  2 +-
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index b921d862..5d52c452 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -65,8 +65,8 @@ impl Div for Fp {
     }
 }
 
-const ZERO_FP: Fp = Fp { val: U256::zero() };
-const UNIT_FP: Fp = Fp { val: U256::one() };
+pub const ZERO_FP: Fp = Fp { val: U256::zero() };
+pub const UNIT_FP: Fp = Fp { val: U256::one() };
 
 fn exp_fp(x: Fp, e: U256) -> Fp {
     let mut current = x;
@@ -148,12 +148,12 @@ impl Div for Fp2 {
     }
 }
 
-const ZERO_FP2: Fp2 = Fp2 {
+pub const ZERO_FP2: Fp2 = Fp2 {
     re: ZERO_FP,
     im: ZERO_FP,
 };
 
-const UNIT_FP2: Fp2 = Fp2 {
+pub const UNIT_FP2: Fp2 = Fp2 {
     re: UNIT_FP,
     im: ZERO_FP,
 };
@@ -376,25 +376,6 @@ pub fn inv_fp12(f: Fp12) -> Fp12 {
     UNIT_FP12 / f
 }
 
-pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
-    let g0 = Fp6 {
-        t0: Fp2 {
-            re: g000,
-            im: ZERO_FP,
-        },
-        t1: g01,
-        t2: ZERO_FP2,
-    };
-
-    let g1 = Fp6 {
-        t0: ZERO_FP2,
-        t1: g11,
-        t2: ZERO_FP2,
-    };
-
-    Fp12 { z0: g0, z1: g1 }
-}
-
 /// The nth frobenius endomorphism of a finite field F of order p^q is given by sending x: F to x^(p^n)
 /// since any element x: F satisfies x^(p^q) = x = x^(p^0), these endomorphisms cycle modulo q
 ///
@@ -827,7 +808,7 @@ const FROB_Z: [Fp2; 12] = [
     },
 ];
 
-fn gen_fp() -> Fp {
+pub fn gen_fp() -> Fp {
     let mut rng = thread_rng();
     let x64 = rng.gen::<u64>();
     let x256 = U256([x64, x64, x64, x64]) % BN_BASE;
@@ -859,3 +840,22 @@ pub fn gen_fp12() -> Fp12 {
 pub fn gen_fp12_sparse() -> Fp12 {
     sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
 }
+
+pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
+    let g0 = Fp6 {
+        t0: Fp2 {
+            re: g000,
+            im: ZERO_FP,
+        },
+        t1: g01,
+        t2: ZERO_FP2,
+    };
+
+    let g1 = Fp6 {
+        t0: ZERO_FP2,
+        t1: g11,
+        t2: ZERO_FP2,
+    };
+
+    Fp12 { z0: g0, z1: g1 }
+}
diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 0a5766b6..71da964c 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -1,7 +1,7 @@
 use ethereum_types::U256;
 
 use crate::bn254_arithmetic::{
-    frob_fp12, inv_fp12, make_fp, mul_fp_fp2, sparse_embed, Fp, Fp12, Fp2, UNIT_FP12,
+    frob_fp12, inv_fp12, make_fp, mul_fp_fp2, Fp, Fp12, Fp2, UNIT_FP12, sparse_embed
 };
 
 // The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2

From 94d99cca9e36c1e51aa831e9cec5199cc96c5851 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 15:55:45 +0700
Subject: [PATCH 145/201] extra comments

---
 evm/src/bn254_pairing.rs | 52 +++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 71da964c..53449c16 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -1,7 +1,9 @@
+use std::ops::Add;
+
 use ethereum_types::U256;
 
 use crate::bn254_arithmetic::{
-    frob_fp12, inv_fp12, make_fp, mul_fp_fp2, Fp, Fp12, Fp2, UNIT_FP12, sparse_embed
+    frob_fp12, inv_fp12, make_fp, mul_fp_fp2, sparse_embed, Fp, Fp12, Fp2, UNIT_FP12,
 };
 
 // The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2
@@ -11,7 +13,26 @@ pub struct Curve {
     y: Fp,
 }
 
-// The twisted consists of pairs (x, y): (Fp2, Fp2) |
+/// Standard addition formula for elliptic curves, source:
+/// https://en.wikipedia.org/wiki/Elliptic_curve#Algebraic_interpretation
+impl Add for Curve {
+    type Output = Self;
+
+    fn add(self, other: Self) -> Self {
+        let m = if self == other {
+            make_fp(3) * self.x * self.x / (make_fp(2) * self.y)
+        } else {
+            (other.y - self.y) / (other.x - self.x)
+        };
+        let x = m * m - (self.x + other.x);
+        Curve {
+            x,
+            y: m * (self.x - x) - self.y,
+        }
+    }
+}
+
+// The twisted curve consists of pairs (x, y): (Fp2, Fp2) | y^2 = x^3 + 3/(9 + i)
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct TwistedCurve {
     x: Fp2,
@@ -48,11 +69,11 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
         acc = acc * acc;
         line = tangent(o, q);
         acc = line * acc;
-        o = curve_double(o);
+        o = o + o;
         if i != 0 {
             line = cord(p, o, q);
             acc = line * acc;
-            o = curve_add(p, o);
+            o = o + p;
         }
     }
     acc
@@ -260,7 +281,6 @@ pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
 pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
     let cx = p2.y - p1.y;
     let cy = p1.x - p2.x;
-
     sparse_embed(
         p1.y * p2.x - p2.y * p1.x,
         mul_fp_fp2(cx, q.x),
@@ -268,28 +288,6 @@ pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
     )
 }
 
-fn third_point(m: Fp, p: Curve, q: Curve) -> Curve {
-    let x = m * m - (p.x + q.x);
-    Curve {
-        x,
-        y: m * (p.x - x) - p.y,
-    }
-}
-
-fn curve_add(p: Curve, q: Curve) -> Curve {
-    if p == q {
-        curve_double(p)
-    } else {
-        let slope = (q.y - p.y) / (q.x - p.x);
-        third_point(slope, p, q)
-    }
-}
-
-fn curve_double(p: Curve) -> Curve {
-    let slope = p.x * p.x * make_fp(3) / (p.y * make_fp(2));
-    third_point(slope, p, p)
-}
-
 // This curve is cyclic with generator (1, 2)
 pub fn curve_generator() -> Curve {
     Curve {

From f34b35eda2e48a495920f30670fc9b1282dc07d6 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 16:11:32 +0700
Subject: [PATCH 146/201] extra comments

---
 evm/src/bn254_arithmetic.rs | 14 +++++++++-----
 evm/src/bn254_pairing.rs    | 28 +++++++++++++++-------------
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 5d52c452..526e5f9a 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -15,6 +15,14 @@ pub struct Fp {
     pub val: U256,
 }
 
+impl Fp {
+    pub fn new(val: usize) -> Fp {
+        Fp {
+            val: U256::from(val),
+        }
+    }
+}
+
 impl Add for Fp {
     type Output = Self;
 
@@ -81,10 +89,6 @@ fn exp_fp(x: Fp, e: U256) -> Fp {
     product
 }
 
-pub fn make_fp(n: i32) -> Fp {
-    Fp { val: U256::from(n) }
-}
-
 /// The degree 2 field extension Fp2 is given by adjoining i, the square root of -1, to Fp
 /// The arithmetic in this extension is standard complex arithmetic
 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -185,7 +189,7 @@ fn normalize_fp2(a: Fp2) -> Fp2 {
 /// The degree 3 field extension Fp6 over Fp2 is given by adjoining t, where t^3 = 9 + i
 /// We begin by defining a helper function which multiplies an Fp2 element by 9 + i
 fn i9(a: Fp2) -> Fp2 {
-    let nine = make_fp(9);
+    let nine = Fp::new(9);
     Fp2 {
         re: nine * a.re - a.im,
         im: a.re + nine * a.im,
diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 53449c16..8c0b501a 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -3,7 +3,7 @@ use std::ops::Add;
 use ethereum_types::U256;
 
 use crate::bn254_arithmetic::{
-    frob_fp12, inv_fp12, make_fp, mul_fp_fp2, sparse_embed, Fp, Fp12, Fp2, UNIT_FP12,
+    frob_fp12, inv_fp12, mul_fp_fp2, sparse_embed, Fp, Fp12, Fp2, UNIT_FP12,
 };
 
 // The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2
@@ -20,7 +20,7 @@ impl Add for Curve {
 
     fn add(self, other: Self) -> Self {
         let m = if self == other {
-            make_fp(3) * self.x * self.x / (make_fp(2) * self.y)
+            Fp::new(3) * self.x * self.x / (Fp::new(2) * self.y)
         } else {
             (other.y - self.y) / (other.x - self.x)
         };
@@ -269,10 +269,10 @@ pub fn power(f: Fp12) -> Fp12 {
 }
 
 pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
-    let cx = -make_fp(3) * p.x * p.x;
-    let cy = make_fp(2) * p.y;
+    let cx = -Fp::new(3) * p.x * p.x;
+    let cy = Fp::new(2) * p.y;
     sparse_embed(
-        p.y * p.y - make_fp(9),
+        p.y * p.y - Fp::new(9),
         mul_fp_fp2(cx, q.x),
         mul_fp_fp2(cy, q.y),
     )
@@ -288,16 +288,18 @@ pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
     )
 }
 
-// This curve is cyclic with generator (1, 2)
-pub fn curve_generator() -> Curve {
+// The curve is cyclic with generator (1, 2)
+pub const CURVE_GENERATOR: Curve = {
     Curve {
-        x: make_fp(1),
-        y: make_fp(2),
+        x: Fp { val: U256::one() },
+        y: Fp {
+            val: U256([2, 0, 0, 0]),
+        },
     }
-}
+};
 
-// This curve is cyclic with generator (x, y) as follows
-pub fn twisted_curve_generator() -> TwistedCurve {
+// The twisted curve is cyclic with generator (x, y) as follows
+pub const TWISTED_GENERATOR: TwistedCurve = {
     TwistedCurve {
         x: Fp2 {
             re: Fp {
@@ -336,4 +338,4 @@ pub fn twisted_curve_generator() -> TwistedCurve {
             },
         },
     }
-}
+};

From 17cfae66550de1bd3ff2620e728d8400f322d8e0 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 16:15:27 +0700
Subject: [PATCH 147/201] reorg

---
 evm/src/bn254_pairing.rs | 43 ++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 8c0b501a..d368162e 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -13,7 +13,8 @@ pub struct Curve {
     y: Fp,
 }
 
-/// Standard addition formula for elliptic curves, source:
+/// Standard addition formula for elliptic curves, restricted to the cases  
+/// where neither inputs nor output would ever be the identity O. source:
 /// https://en.wikipedia.org/wiki/Elliptic_curve#Algebraic_interpretation
 impl Add for Curve {
     type Output = Self;
@@ -79,6 +80,26 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
     acc
 }
 
+pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
+    let cx = -Fp::new(3) * p.x * p.x;
+    let cy = Fp::new(2) * p.y;
+    sparse_embed(
+        p.y * p.y - Fp::new(9),
+        mul_fp_fp2(cx, q.x),
+        mul_fp_fp2(cy, q.y),
+    )
+}
+
+pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
+    let cx = p2.y - p1.y;
+    let cy = p1.x - p2.x;
+    sparse_embed(
+        p1.y * p2.x - p2.y * p1.x,
+        mul_fp_fp2(cx, q.x),
+        mul_fp_fp2(cy, q.y),
+    )
+}
+
 pub fn power(f: Fp12) -> Fp12 {
     const EXPS4: [(usize, usize, usize); 64] = [
         (1, 1, 0),
@@ -268,26 +289,6 @@ pub fn power(f: Fp12) -> Fp12 {
     y4 * y2 * y0
 }
 
-pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
-    let cx = -Fp::new(3) * p.x * p.x;
-    let cy = Fp::new(2) * p.y;
-    sparse_embed(
-        p.y * p.y - Fp::new(9),
-        mul_fp_fp2(cx, q.x),
-        mul_fp_fp2(cy, q.y),
-    )
-}
-
-pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
-    let cx = p2.y - p1.y;
-    let cy = p1.x - p2.x;
-    sparse_embed(
-        p1.y * p2.x - p2.y * p1.x,
-        mul_fp_fp2(cx, q.x),
-        mul_fp_fp2(cy, q.y),
-    )
-}
-
 // The curve is cyclic with generator (1, 2)
 pub const CURVE_GENERATOR: Curve = {
     Curve {

From 7b524381731801aa1cbb671712d587d0be4b810d Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 21 Jan 2023 16:38:43 +0700
Subject: [PATCH 148/201] en route to ownership

---
 evm/src/generation/prover_input.rs | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index 1e1e5674..b606afe5 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -66,11 +66,18 @@ impl<F: Field> GenerationState<F> {
             .parse::<usize>()
             .unwrap();
         let ptr = stack_peek(self, 11 - n).expect("Empty stack").as_usize();
-        let mut f: [U256; 12] = [U256::zero(); 12];
-        for i in 0..12 {
-            f[i] = kernel_general_peek(self, ptr + i);
-        }
-        field.inverse_fp12(n, f)
+
+        let f: [U256; 12] = match field {
+            Bn254Base => {
+                let mut f: [U256; 12] = [U256::zero(); 12];
+                for i in 0..12 {
+                    f[i] = kernel_general_peek(self, ptr + i);
+                }
+                f
+            }
+            _ => todo!(),
+        };
+        field.field_extension_inverse(n, f)
     }
 
     /// MPT data.
@@ -191,7 +198,7 @@ impl EvmField {
         modexp(x, q, n)
     }
 
-    fn inverse_fp12(&self, n: usize, f: [U256; 12]) -> U256 {
+    fn field_extension_inverse(&self, n: usize, f: [U256; 12]) -> U256 {
         let f: Fp12 = unsafe { transmute(f) };
         let f_inv: [U256; 12] = unsafe { transmute(inv_fp12(f)) };
         f_inv[n]

From ec4cddb7c9ab74fd11aa1e2c7cbf29f8a2488667 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 23 Jan 2023 14:59:08 +0700
Subject: [PATCH 149/201] inv as method

---
 evm/src/bn254_arithmetic.rs                   | 59 +++++----------
 evm/src/bn254_pairing.rs                      | 75 +++++++++++++------
 .../bn254/curve_arithmetic/tate_pairing.asm   |  2 +-
 .../curve/bn254/field_arithmetic/power.asm    |  2 +-
 evm/src/cpu/kernel/tests/bn254.rs             |  9 ++-
 evm/src/generation/prover_input.rs            |  4 +-
 6 files changed, 82 insertions(+), 69 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 526e5f9a..1cdd41bc 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -147,7 +147,7 @@ impl Div for Fp2 {
 
     fn div(self, rhs: Self) -> Self::Output {
         let norm = rhs.re * rhs.re + rhs.im * rhs.im;
-        let inv = mul_fp_fp2(norm, conj_fp2(rhs));
+        let inv = scalar_mul_fp2(norm, conj_fp2(rhs));
         self * inv
     }
 }
@@ -162,7 +162,7 @@ pub const UNIT_FP2: Fp2 = Fp2 {
     im: ZERO_FP,
 };
 
-pub fn mul_fp_fp2(x: Fp, a: Fp2) -> Fp2 {
+pub fn scalar_mul_fp2(x: Fp, a: Fp2) -> Fp2 {
     Fp2 {
         re: x * a.re,
         im: x * a.im,
@@ -199,9 +199,9 @@ fn i9(a: Fp2) -> Fp2 {
 // Fp6 has basis 1, t, t^2 over Fp2
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp6 {
-    t0: Fp2,
-    t1: Fp2,
-    t2: Fp2,
+    pub t0: Fp2,
+    pub t1: Fp2,
+    pub t2: Fp2,
 }
 
 impl Add for Fp6 {
@@ -273,7 +273,7 @@ impl Div for Fp6 {
         let prod_135 = (prod_13 * frob_fp6(5, rhs)).t0;
         let prod_odds_over_phi = normalize_fp2(prod_135);
         let prod_24 = frob_fp6(1, prod_13);
-        let inv = mul_fp2_fp6(prod_odds_over_phi, prod_24);
+        let inv = scalar_mul_fp6(prod_odds_over_phi, prod_24);
         self * inv
     }
 }
@@ -290,7 +290,7 @@ pub const UNIT_FP6: Fp6 = Fp6 {
     t2: ZERO_FP2,
 };
 
-fn mul_fp2_fp6(x: Fp2, f: Fp6) -> Fp6 {
+fn scalar_mul_fp6(x: Fp2, f: Fp6) -> Fp6 {
     Fp6 {
         t0: x * f.t0,
         t1: x * f.t1,
@@ -312,8 +312,8 @@ fn sh(c: Fp6) -> Fp6 {
 /// It thus has basis 1, z over Fp6
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp12 {
-    z0: Fp6,
-    z1: Fp6,
+    pub z0: Fp6,
+    pub z1: Fp6,
 }
 
 impl Mul for Fp12 {
@@ -351,8 +351,8 @@ impl Div for Fp12 {
         let prod_odds = (prod_1379 * frob_fp6(4, prod_17)).t0;
         let prod_odds_over_phi = normalize_fp2(prod_odds);
         let prod_evens_except_six = frob_fp6(1, prod_1379);
-        let prod_penultimate = mul_fp2_fp6(prod_odds_over_phi, prod_evens_except_six);
-        let inv = mul_fp6_fp12(prod_penultimate, conj_fp12(rhs));
+        let prod_penultimate = scalar_mul_fp6(prod_odds_over_phi, prod_evens_except_six);
+        let inv = scalar_mul_fp12(prod_penultimate, conj_fp12(rhs));
         self * inv
     }
 }
@@ -369,15 +369,17 @@ fn conj_fp12(f: Fp12) -> Fp12 {
     }
 }
 
-fn mul_fp6_fp12(c: Fp6, f: Fp12) -> Fp12 {
+fn scalar_mul_fp12(c: Fp6, f: Fp12) -> Fp12 {
     Fp12 {
         z0: c * f.z0,
         z1: c * f.z1,
     }
 }
 
-pub fn inv_fp12(f: Fp12) -> Fp12 {
-    UNIT_FP12 / f
+impl Fp12 {
+    pub fn inv(self) -> Fp12 {
+        UNIT_FP12 / self
+    }
 }
 
 /// The nth frobenius endomorphism of a finite field F of order p^q is given by sending x: F to x^(p^n)
@@ -422,7 +424,7 @@ pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
     let n = n % 12;
     Fp12 {
         z0: frob_fp6(n, f.z0),
-        z1: mul_fp2_fp6(FROB_Z[n], frob_fp6(n, f.z1)),
+        z1: scalar_mul_fp6(FROB_Z[n], frob_fp6(n, f.z1)),
     }
 }
 
@@ -819,14 +821,14 @@ pub fn gen_fp() -> Fp {
     Fp { val: x256 }
 }
 
-fn gen_fp2() -> Fp2 {
+pub fn gen_fp2() -> Fp2 {
     Fp2 {
         re: gen_fp(),
         im: gen_fp(),
     }
 }
 
-fn gen_fp6() -> Fp6 {
+pub fn gen_fp6() -> Fp6 {
     Fp6 {
         t0: gen_fp2(),
         t1: gen_fp2(),
@@ -840,26 +842,3 @@ pub fn gen_fp12() -> Fp12 {
         z1: gen_fp6(),
     }
 }
-
-pub fn gen_fp12_sparse() -> Fp12 {
-    sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
-}
-
-pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
-    let g0 = Fp6 {
-        t0: Fp2 {
-            re: g000,
-            im: ZERO_FP,
-        },
-        t1: g01,
-        t2: ZERO_FP2,
-    };
-
-    let g1 = Fp6 {
-        t0: ZERO_FP2,
-        t1: g11,
-        t2: ZERO_FP2,
-    };
-
-    Fp12 { z0: g0, z1: g1 }
-}
diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index d368162e..c75fcb16 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -3,7 +3,7 @@ use std::ops::Add;
 use ethereum_types::U256;
 
 use crate::bn254_arithmetic::{
-    frob_fp12, inv_fp12, mul_fp_fp2, sparse_embed, Fp, Fp12, Fp2, UNIT_FP12,
+    frob_fp12, scalar_mul_fp2, Fp, Fp6, Fp12, Fp2, UNIT_FP12, ZERO_FP, ZERO_FP2, gen_fp, gen_fp2
 };
 
 // The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2
@@ -40,13 +40,10 @@ pub struct TwistedCurve {
     y: Fp2,
 }
 
-// The tate pairing takes point each from the curve and its twist and outputs an Fp12
+// The tate pairing takes a point each from the curve and its twist and outputs an Fp12 element
 pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
     let miller_output = miller_loop(p, q);
-    let post_mul_1 = frob_fp12(6, miller_output) / miller_output;
-    let post_mul_2 = frob_fp12(2, post_mul_1) * post_mul_1;
-    let power_output = power(post_mul_2);
-    frob_fp12(3, post_mul_2) * power_output
+    invariance_inducing_power(miller_output)
 }
 
 pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
@@ -80,13 +77,36 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
     acc
 }
 
+pub fn gen_fp12_sparse() -> Fp12 {
+    sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
+}
+
+pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
+    let g0 = Fp6 {
+        t0: Fp2 {
+            re: g000,
+            im: ZERO_FP,
+        },
+        t1: g01,
+        t2: ZERO_FP2,
+    };
+
+    let g1 = Fp6 {
+        t0: ZERO_FP2,
+        t1: g11,
+        t2: ZERO_FP2,
+    };
+
+    Fp12 { z0: g0, z1: g1 }
+}
+
 pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
     let cx = -Fp::new(3) * p.x * p.x;
     let cy = Fp::new(2) * p.y;
     sparse_embed(
         p.y * p.y - Fp::new(9),
-        mul_fp_fp2(cx, q.x),
-        mul_fp_fp2(cy, q.y),
+        scalar_mul_fp2(cx, q.x),
+        scalar_mul_fp2(cy, q.y),
     )
 }
 
@@ -95,12 +115,34 @@ pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
     let cy = p1.x - p2.x;
     sparse_embed(
         p1.y * p2.x - p2.y * p1.x,
-        mul_fp_fp2(cx, q.x),
-        mul_fp_fp2(cy, q.y),
+        scalar_mul_fp2(cx, q.x),
+        scalar_mul_fp2(cy, q.y),
     )
 }
 
-pub fn power(f: Fp12) -> Fp12 {
+/// The output T of the miller loop is not an invariant, 
+/// but one gets an invariant by raising T to the power
+///     (p^12 - 1)/N = (p^6 - 1)(p^2 + 1)(p^4 - p^2 + 1)/N
+/// where N is the cyclic group order of the curve.
+/// To achieve this, we first exponentiate T by p^6 - 1 via
+///     T = T_6 / T
+/// and then exponentiate the result by p^2 + 1 via
+///     T = T_2 * T
+/// We then note that (p^4 - p^2 + 1)/N can be rewritten as
+///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
+/// where 0 < a0, a1, a2 < p. Then the final power is given by
+///     T = T_3 * (T^a2)_2 * (T^-a1)_1 * (T^-a0)
+pub fn invariance_inducing_power(f: Fp12) -> Fp12 {
+    let mut t = frob_fp12(6, f) / f;
+    t = frob_fp12(2, t) * t;
+    let (t_a2, t_a1, t_a0) = get_powers(t);
+    frob_fp12(3, t) * frob_fp12(2, t_a2) * frob_fp12(1, t_a1) * t_a0
+}
+
+/// Given an f: Fp12, this function computes the triple
+///     T^a2, T^(-a1), T^(-a0)
+/// 
+fn get_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     const EXPS4: [(usize, usize, usize); 64] = [
         (1, 1, 0),
         (1, 1, 1),
@@ -277,16 +319,7 @@ pub fn power(f: Fp12) -> Fp12 {
     }
     y0 = y0 * sq;
 
-    y0 = inv_fp12(y0);
-
-    y4 = y4 * y2;
-    y4 = y4 * y2;
-    y4 = y4 * y0;
-
-    y4 = frob_fp12(1, y4);
-    y2 = frob_fp12(2, y2);
-
-    y4 * y2 * y0
+    (y2, y4 * y2 * y2 * y0, y0.inv())
 }
 
 // The curve is cyclic with generator (1, 2)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 78ce9085..2557d882 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -8,7 +8,7 @@
 ///     acc = frob_fp12(2, out)
 ///     out = mul_fp12(out, acc)
 ///
-///     pow = power(out)
+///     pow = invariance_inducing_power(out)
 ///     out = frob_fp12(3, out) 
 ///     out = mul_fp12(out, pow)
 ///
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
index d8478357..51a122c0 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
@@ -1,4 +1,4 @@
-/// def power(acc):
+/// def invariance_inducing_power(acc):
 ///     power_init()
 ///     power_loop_4()
 ///     power_loop_2()
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index f6eafc85..0bea6b4c 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -4,7 +4,8 @@ use std::ops::Range;
 use anyhow::Result;
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{frob_fp12, gen_fp12, gen_fp12_sparse, inv_fp12, Fp12};
+use crate::bn254_arithmetic::{frob_fp12, gen_fp12, Fp12};
+use crate::bn254_pairing::{gen_fp12_sparse};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
 use crate::memory::segments::Segment;
@@ -158,7 +159,7 @@ fn test_inv_fp12() -> Result<()> {
     };
     let interpreter: Interpreter = run_setup_interpreter(setup).unwrap();
     let output: Vec<U256> = extract_kernel_output(inv..inv + 12, interpreter);
-    let expected: Vec<U256> = fp12_on_stack(inv_fp12(f));
+    let expected: Vec<U256> = fp12_on_stack(f.inv());
 
     assert_eq!(output, expected);
 
@@ -166,7 +167,7 @@ fn test_inv_fp12() -> Result<()> {
 }
 
 // #[test]
-// fn test_power() -> Result<()> {
+// fn test_invariance_inducing_power() -> Result<()> {
 //     let ptr = U256::from(300);
 //     let out = U256::from(400);
 
@@ -182,7 +183,7 @@ fn test_inv_fp12() -> Result<()> {
 //     ]);
 
 //     let output: Vec<U256> = run_setup_interpreter("test_pow", stack);
-//     let expected: Vec<U256> = fp12_on_stack(power(f));
+//     let expected: Vec<U256> = fp12_on_stack(invariance_inducing_power(f));
 
 //     assert_eq!(output, expected);
 
diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index b606afe5..dacf0423 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -5,7 +5,7 @@ use anyhow::{bail, Error};
 use ethereum_types::{BigEndianHash, H256, U256};
 use plonky2::field::types::Field;
 
-use crate::bn254_arithmetic::{inv_fp12, Fp12};
+use crate::bn254_arithmetic::Fp12;
 use crate::generation::prover_input::EvmField::{
     Bn254Base, Bn254Scalar, Secp256k1Base, Secp256k1Scalar,
 };
@@ -200,7 +200,7 @@ impl EvmField {
 
     fn field_extension_inverse(&self, n: usize, f: [U256; 12]) -> U256 {
         let f: Fp12 = unsafe { transmute(f) };
-        let f_inv: [U256; 12] = unsafe { transmute(inv_fp12(f)) };
+        let f_inv: [U256; 12] = unsafe { transmute(f.inv()) };
         f_inv[n]
     }
 }

From 75c5938c4998da69fd3aa9962d1a29e497db6fc7 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 24 Jan 2023 00:01:47 +0700
Subject: [PATCH 150/201] rewrite w methods

---
 evm/src/bn254_arithmetic.rs       | 403 ++++++++++++++++--------------
 evm/src/bn254_pairing.rs          |  25 +-
 evm/src/cpu/kernel/tests/bn254.rs |  22 +-
 3 files changed, 227 insertions(+), 223 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 1cdd41bc..fb8277eb 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -64,12 +64,18 @@ impl Mul for Fp {
     }
 }
 
+impl Fp {
+    pub fn inv(self) -> Fp {
+        exp_fp(self, BN_BASE - 2)
+    }
+}
+
+#[allow(clippy::suspicious_arithmetic_impl)]
 impl Div for Fp {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
-        let inv = exp_fp(rhs, BN_BASE - 2);
-        self * inv
+        self * rhs.inv()
     }
 }
 
@@ -97,6 +103,16 @@ pub struct Fp2 {
     pub im: Fp,
 }
 
+pub const ZERO_FP2: Fp2 = Fp2 {
+    re: ZERO_FP,
+    im: ZERO_FP,
+};
+
+pub const UNIT_FP2: Fp2 = Fp2 {
+    re: UNIT_FP,
+    im: ZERO_FP,
+};
+
 impl Add for Fp2 {
     type Output = Self;
 
@@ -141,61 +157,58 @@ impl Mul for Fp2 {
     }
 }
 
-/// The inverse of z is given by z'/||z|| since ||z|| = zz'
+impl Fp2 {
+    /// We preemptively define a helper function which multiplies an Fp2 element by 9 + i
+    fn i9(self) -> Fp2 {
+        let nine = Fp::new(9);
+        Fp2 {
+            re: nine * self.re - self.im,
+            im: self.re + nine * self.im,
+        }
+    }
+
+    pub fn scale(self, x: Fp) -> Fp2 {
+        Fp2 {
+            re: x * self.re,
+            im: x * self.im,
+        }
+    }
+
+    // This function takes the complex conjugate
+    fn conj(self) -> Fp2 {
+        Fp2 {
+            re: self.re,
+            im: -self.im,
+        }
+    }
+
+    // Return the magnitude of the complex number
+    fn norm(self) -> Fp {
+        self.re * self.re + self.im * self.im
+    }
+
+    // This function normalizes the input to the complex unit circle
+    fn normalize(self) -> Fp2 {
+        let norm = self.norm();
+        self.scale(UNIT_FP / norm)
+    }
+    /// The inverse of z is given by z'/||z|| since ||z|| = zz'
+    pub fn inv(self) -> Fp2 {
+        let norm = self.re * self.re + self.im * self.im;
+        self.conj().scale(norm)
+    }
+}
+
+#[allow(clippy::suspicious_arithmetic_impl)]
 impl Div for Fp2 {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
-        let norm = rhs.re * rhs.re + rhs.im * rhs.im;
-        let inv = scalar_mul_fp2(norm, conj_fp2(rhs));
-        self * inv
-    }
-}
-
-pub const ZERO_FP2: Fp2 = Fp2 {
-    re: ZERO_FP,
-    im: ZERO_FP,
-};
-
-pub const UNIT_FP2: Fp2 = Fp2 {
-    re: UNIT_FP,
-    im: ZERO_FP,
-};
-
-pub fn scalar_mul_fp2(x: Fp, a: Fp2) -> Fp2 {
-    Fp2 {
-        re: x * a.re,
-        im: x * a.im,
-    }
-}
-
-// This function takes the complex conjugate
-fn conj_fp2(a: Fp2) -> Fp2 {
-    Fp2 {
-        re: a.re,
-        im: -a.im,
-    }
-}
-
-// This function normalizes the input to the complex unit circle
-fn normalize_fp2(a: Fp2) -> Fp2 {
-    let norm = a.re * a.re + a.im * a.im;
-    Fp2 {
-        re: a.re / norm,
-        im: a.im / norm,
+        self * rhs.inv()
     }
 }
 
 /// The degree 3 field extension Fp6 over Fp2 is given by adjoining t, where t^3 = 9 + i
-/// We begin by defining a helper function which multiplies an Fp2 element by 9 + i
-fn i9(a: Fp2) -> Fp2 {
-    let nine = Fp::new(9);
-    Fp2 {
-        re: nine * a.re - a.im,
-        im: a.re + nine * a.im,
-    }
-}
-
 // Fp6 has basis 1, t, t^2 over Fp2
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Fp6 {
@@ -204,6 +217,18 @@ pub struct Fp6 {
     pub t2: Fp2,
 }
 
+pub const ZERO_FP6: Fp6 = Fp6 {
+    t0: ZERO_FP2,
+    t1: ZERO_FP2,
+    t2: ZERO_FP2,
+};
+
+pub const UNIT_FP6: Fp6 = Fp6 {
+    t0: UNIT_FP2,
+    t1: ZERO_FP2,
+    t2: ZERO_FP2,
+};
+
 impl Add for Fp6 {
     type Output = Self;
 
@@ -245,66 +270,89 @@ impl Mul for Fp6 {
 
     fn mul(self, other: Self) -> Self {
         Fp6 {
-            t0: self.t0 * other.t0 + i9(self.t1 * other.t2 + self.t2 * other.t1),
-            t1: self.t0 * other.t1 + self.t1 * other.t0 + i9(self.t2 * other.t2),
+            t0: self.t0 * other.t0 + (self.t1 * other.t2 + self.t2 * other.t1).i9(),
+            t1: self.t0 * other.t1 + self.t1 * other.t0 + (self.t2 * other.t2).i9(),
             t2: self.t0 * other.t2 + self.t1 * other.t1 + self.t2 * other.t0,
         }
     }
 }
 
-/// Let x_n = x^(p^n) and note that
-///     x_0 = x^(p^0) = x^1 = x
-///     (x_n)_m = (x^(p^n))^(p^m) = x^(p^n * p^m) = x^(p^(n+m)) = x_{n+m}
-/// By Galois Theory, given x: Fp6, the product
-///     phi = x_0 * x_1 * x_2 * x_3 * x_4 * x_5
-/// lands in Fp, and hence the inverse of x is given by
-///     (x_1 * x_2 * x_3 * x_4 * x_5) / phi
-/// We can save compute by rearranging the numerator:
-///     (x_1 * x_3) * x_5 * (x_1 * x_3)_1
-/// By Galois theory, the following are in Fp2 and are complex conjugates
-///     x_1 * x_3 * x_5,  x_0 * x_2 * x_4
-/// Thus phi = norm(x_1 * x_3 * x_5), and hence the inverse is given by
-///     normalize([x_1 * x_3] * x_5) * [x_1 * x_3]_1
+impl Fp6 {
+    fn scale(self, x: Fp2) -> Fp6 {
+        Fp6 {
+            t0: x * self.t0,
+            t1: x * self.t1,
+            t2: x * self.t2,
+        }
+    }
+
+    /// This function multiplies an Fp6 element by t, and hence shifts the bases,
+    /// where the t^2 coefficient picks up a factor of 9+i as the 1 coefficient of the output
+    fn sh(self) -> Fp6 {
+        Fp6 {
+            t0: self.t2.i9(),
+            t1: self.t0,
+            t2: self.t1,
+        }
+    }
+
+    /// The nth frobenius endomorphism of a p^q field is given by mapping
+    ///     x to x^(p^n)
+    /// which sends a + bt + ct^2: Fp6 to
+    ///     a^(p^n) + b^(p^n) * t^(p^n) + c^(p^n) * t^(2p^n)
+    /// Note that p == 3 mod 4, and i^3 = -i, so x + yi gets mapped to
+    ///     (x + yi)^(p^n) = x^(p^n) + y^(p^n) i^(p^n) = x + y i^(p^n mod 4)
+    /// which reduces to x + yi for n even and x - yi for n odd
+    /// The values of t^(p^n) and t^(2p^n) are precomputed in
+    /// the constant arrays FROB_T1 and FROB_T2
+    fn frob(self, n: usize) -> Fp6 {
+        let n = n % 6;
+        let frob_t1 = FROB_T1[n];
+        let frob_t2 = FROB_T2[n];
+
+        if n % 2 != 0 {
+            Fp6 {
+                t0: self.t0.conj(),
+                t1: frob_t1 * self.t1.conj(),
+                t2: frob_t2 * self.t2.conj(),
+            }
+        } else {
+            Fp6 {
+                t0: self.t0,
+                t1: frob_t1 * self.t1,
+                t2: frob_t2 * self.t2,
+            }
+        }
+    }
+
+    /// Let x_n = x^(p^n) and note that
+    ///     x_0 = x^(p^0) = x^1 = x
+    ///     (x_n)_m = (x^(p^n))^(p^m) = x^(p^n * p^m) = x^(p^(n+m)) = x_{n+m}
+    /// By Galois Theory, given x: Fp6, the product
+    ///     phi = x_0 * x_1 * x_2 * x_3 * x_4 * x_5
+    /// lands in Fp, and hence the inverse of x is given by
+    ///     (x_1 * x_2 * x_3 * x_4 * x_5) / phi
+    /// We can save compute by rearranging the numerator:
+    ///     (x_1 * x_3) * x_5 * (x_1 * x_3)_1
+    /// By Galois theory, the following are in Fp2 and are complex conjugates
+    ///     x_1 * x_3 * x_5,  x_0 * x_2 * x_4
+    /// Thus phi = norm(x_1 * x_3 * x_5), and hence the inverse is given by
+    ///     normalize([x_1 * x_3] * x_5) * [x_1 * x_3]_1
+    pub fn inv(self) -> Fp6 {
+        let prod_13 = self.frob(1) * self.frob(3);
+        let prod_135 = (prod_13 * self.frob(5)).t0;
+        let prod_odds_over_phi = prod_135.normalize();
+        let prod_24 = prod_13.frob(1);
+        prod_24.scale(prod_odds_over_phi)
+    }
+}
+
+#[allow(clippy::suspicious_arithmetic_impl)]
 impl Div for Fp6 {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
-        let prod_13 = frob_fp6(1, rhs) * frob_fp6(3, rhs);
-        let prod_135 = (prod_13 * frob_fp6(5, rhs)).t0;
-        let prod_odds_over_phi = normalize_fp2(prod_135);
-        let prod_24 = frob_fp6(1, prod_13);
-        let inv = scalar_mul_fp6(prod_odds_over_phi, prod_24);
-        self * inv
-    }
-}
-
-pub const ZERO_FP6: Fp6 = Fp6 {
-    t0: ZERO_FP2,
-    t1: ZERO_FP2,
-    t2: ZERO_FP2,
-};
-
-pub const UNIT_FP6: Fp6 = Fp6 {
-    t0: UNIT_FP2,
-    t1: ZERO_FP2,
-    t2: ZERO_FP2,
-};
-
-fn scalar_mul_fp6(x: Fp2, f: Fp6) -> Fp6 {
-    Fp6 {
-        t0: x * f.t0,
-        t1: x * f.t1,
-        t2: x * f.t2,
-    }
-}
-
-/// This function multiplies an Fp6 element by t, and hence shifts the bases,
-/// where the t^2 coefficient picks up a factor of 9+i as the 1 coefficient of the output
-fn sh(c: Fp6) -> Fp6 {
-    Fp6 {
-        t0: i9(c.t2),
-        t1: c.t0,
-        t2: c.t1,
+        self * rhs.inv()
     }
 }
 
@@ -316,6 +364,11 @@ pub struct Fp12 {
     pub z1: Fp6,
 }
 
+pub const UNIT_FP12: Fp12 = Fp12 {
+    z0: UNIT_FP6,
+    z1: ZERO_FP6,
+};
+
 impl Mul for Fp12 {
     type Output = Self;
 
@@ -324,107 +377,69 @@ impl Mul for Fp12 {
         let h1 = self.z1 * other.z1;
         let h01 = (self.z0 + self.z1) * (other.z0 + other.z1);
         Fp12 {
-            z0: h0 + sh(h1),
+            z0: h0 + h1.sh(),
             z1: h01 - (h0 + h1),
         }
     }
 }
 
-/// By Galois Theory, given x: Fp12, the product
-///     phi = Prod_{i=0}^11 x_i
-/// lands in Fp, and hence the inverse of x is given by
-///     (Prod_{i=1}^11 x_i) / phi
-/// The 6th Frob map is nontrivial but leaves Fp6 fixed and hence must be the conjugate:
-///     x_6 = (a + bz)_6 = a - bz = conj_fp12(x)
-/// Letting prod_17 = x_1 * x_7, the remaining factors in the numerator can be expresed as:
-///     [(prod_17) * (prod_17)_2] * (prod_17)_4 * [(prod_17) * (prod_17)_2]_1
-/// By Galois theory, both the following are in Fp2 and are complex conjugates
-///     prod_odds,  prod_evens
-/// Thus phi = norm(prod_odds), and hence the inverse is given by
-///    normalize(prod_odds) * prod_evens_except_six * conj_fp12(x)
+impl Fp12 {
+    fn conj(self) -> Fp12 {
+        Fp12 {
+            z0: self.z0,
+            z1: -self.z1,
+        }
+    }
+
+    fn scale(self, x: Fp6) -> Fp12 {
+        Fp12 {
+            z0: x * self.z0,
+            z1: x * self.z1,
+        }
+    }
+
+    /// The nth frobenius endomorphism of a p^q field is given by mapping
+    ///     x to x^(p^n)
+    /// which sends a + bz: Fp12 to
+    ///     a^(p^n) + b^(p^n) * z^(p^n)
+    /// where the values of z^(p^n) are precomputed in the constant array FROB_Z
+    pub fn frob(self, n: usize) -> Fp12 {
+        let n = n % 12;
+        Fp12 {
+            z0: self.z0.frob(n),
+            z1: self.z1.frob(n).scale(FROB_Z[n]),
+        }
+    }
+
+    /// By Galois Theory, given x: Fp12, the product
+    ///     phi = Prod_{i=0}^11 x_i
+    /// lands in Fp, and hence the inverse of x is given by
+    ///     (Prod_{i=1}^11 x_i) / phi
+    /// The 6th Frob map is nontrivial but leaves Fp6 fixed and hence must be the conjugate:
+    ///     x_6 = (a + bz)_6 = a - bz = conj_fp12(x)
+    /// Letting prod_17 = x_1 * x_7, the remaining factors in the numerator can be expresed as:
+    ///     [(prod_17) * (prod_17)_2] * (prod_17)_4 * [(prod_17) * (prod_17)_2]_1
+    /// By Galois theory, both the following are in Fp2 and are complex conjugates
+    ///     prod_odds,  prod_evens
+    /// Thus phi = norm(prod_odds), and hence the inverse is given by
+    ///    normalize(prod_odds) * prod_evens_except_six * conj_fp12(x)
+    pub fn inv(self) -> Fp12 {
+        let prod_17 = (self.frob(1) * self.frob(7)).z0;
+        let prod_1379 = prod_17 * prod_17.frob(2);
+        let prod_odds = (prod_1379 * prod_17.frob(4)).t0;
+        let prod_odds_over_phi = prod_odds.normalize();
+        let prod_evens_except_six = prod_1379.frob(1);
+        let prod_penultimate = prod_evens_except_six.scale(prod_odds_over_phi);
+        self.conj().scale(prod_penultimate)
+    }
+}
+
+#[allow(clippy::suspicious_arithmetic_impl)]
 impl Div for Fp12 {
     type Output = Self;
 
     fn div(self, rhs: Self) -> Self::Output {
-        let prod_17 = (frob_fp12(1, rhs) * frob_fp12(7, rhs)).z0;
-        let prod_1379 = prod_17 * frob_fp6(2, prod_17);
-        let prod_odds = (prod_1379 * frob_fp6(4, prod_17)).t0;
-        let prod_odds_over_phi = normalize_fp2(prod_odds);
-        let prod_evens_except_six = frob_fp6(1, prod_1379);
-        let prod_penultimate = scalar_mul_fp6(prod_odds_over_phi, prod_evens_except_six);
-        let inv = scalar_mul_fp12(prod_penultimate, conj_fp12(rhs));
-        self * inv
-    }
-}
-
-pub const UNIT_FP12: Fp12 = Fp12 {
-    z0: UNIT_FP6,
-    z1: ZERO_FP6,
-};
-
-fn conj_fp12(f: Fp12) -> Fp12 {
-    Fp12 {
-        z0: f.z0,
-        z1: -f.z1,
-    }
-}
-
-fn scalar_mul_fp12(c: Fp6, f: Fp12) -> Fp12 {
-    Fp12 {
-        z0: c * f.z0,
-        z1: c * f.z1,
-    }
-}
-
-impl Fp12 {
-    pub fn inv(self) -> Fp12 {
-        UNIT_FP12 / self
-    }
-}
-
-/// The nth frobenius endomorphism of a finite field F of order p^q is given by sending x: F to x^(p^n)
-/// since any element x: F satisfies x^(p^q) = x = x^(p^0), these endomorphisms cycle modulo q
-///
-/// Thus in the case of Fp, there are no nontrivial such endomorphisms since x^p = x.
-///
-/// In the case of Fp2, the first and only nontrivial frobenius map sends a + bi to its complex conjugate:
-///     a^p + b^p(i^p) = a - bi
-/// since p == 3 mod 4, and i^3 = -i
-///
-/// An Fp6 element a + bt + ct^2 is sent to
-///     a^(p^n) + b^(p^n) * t^(p^n) + c^(p^n) * t^(2p^n)
-/// where the values of t^(p^n) and t^(2p^n) are precomputed in the constant arrays FROB_T1 and FROB_T2
-///
-///
-/// An Fp12 element a + bz is sent to
-///     a^(p^n) + b^(p^n) * z^(p^n)
-/// where the values of z^(p^n) are precomputed in the constant array FROB_Z
-
-fn frob_fp6(n: usize, c: Fp6) -> Fp6 {
-    let n = n % 6;
-    let frob_t1 = FROB_T1[n];
-    let frob_t2 = FROB_T2[n];
-
-    if n % 2 != 0 {
-        Fp6 {
-            t0: conj_fp2(c.t0),
-            t1: frob_t1 * conj_fp2(c.t1),
-            t2: frob_t2 * conj_fp2(c.t2),
-        }
-    } else {
-        Fp6 {
-            t0: c.t0,
-            t1: frob_t1 * c.t1,
-            t2: frob_t2 * c.t2,
-        }
-    }
-}
-
-pub fn frob_fp12(n: usize, f: Fp12) -> Fp12 {
-    let n = n % 12;
-    Fp12 {
-        z0: frob_fp6(n, f.z0),
-        z1: scalar_mul_fp6(FROB_Z[n], frob_fp6(n, f.z1)),
+        self * rhs.inv()
     }
 }
 
@@ -523,7 +538,7 @@ const FROB_T2: [Fp2; 6] = [
                     0x848a1f55921ea762,
                     0xd33365f7be94ec72,
                     0x80f3c0b75a181e84,
-                    0x5b54f5e64eea801,
+                    0x05b54f5e64eea801,
                 ]),
             }
         },
@@ -737,7 +752,7 @@ const FROB_Z: [Fp2; 12] = [
                     0x71c39bb757899a9b,
                     0x2307d819d98302a7,
                     0x121dc8b86f6c4ccf,
-                    0xbfab77f2c36b843,
+                    0x0bfab77f2c36b843,
                 ]),
             }
         },
diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index c75fcb16..828865a3 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -2,9 +2,7 @@ use std::ops::Add;
 
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{
-    frob_fp12, scalar_mul_fp2, Fp, Fp6, Fp12, Fp2, UNIT_FP12, ZERO_FP, ZERO_FP2, gen_fp, gen_fp2
-};
+use crate::bn254_arithmetic::{gen_fp, gen_fp2, Fp, Fp12, Fp2, Fp6, UNIT_FP12, ZERO_FP, ZERO_FP2};
 
 // The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2
 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -103,24 +101,16 @@ pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
 pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
     let cx = -Fp::new(3) * p.x * p.x;
     let cy = Fp::new(2) * p.y;
-    sparse_embed(
-        p.y * p.y - Fp::new(9),
-        scalar_mul_fp2(cx, q.x),
-        scalar_mul_fp2(cy, q.y),
-    )
+    sparse_embed(p.y * p.y - Fp::new(9), q.x.scale(cx), q.y.scale(cy))
 }
 
 pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
     let cx = p2.y - p1.y;
     let cy = p1.x - p2.x;
-    sparse_embed(
-        p1.y * p2.x - p2.y * p1.x,
-        scalar_mul_fp2(cx, q.x),
-        scalar_mul_fp2(cy, q.y),
-    )
+    sparse_embed(p1.y * p2.x - p2.y * p1.x, q.x.scale(cx), q.y.scale(cy))
 }
 
-/// The output T of the miller loop is not an invariant, 
+/// The output T of the miller loop is not an invariant,
 /// but one gets an invariant by raising T to the power
 ///     (p^12 - 1)/N = (p^6 - 1)(p^2 + 1)(p^4 - p^2 + 1)/N
 /// where N is the cyclic group order of the curve.
@@ -133,15 +123,14 @@ pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
 /// where 0 < a0, a1, a2 < p. Then the final power is given by
 ///     T = T_3 * (T^a2)_2 * (T^-a1)_1 * (T^-a0)
 pub fn invariance_inducing_power(f: Fp12) -> Fp12 {
-    let mut t = frob_fp12(6, f) / f;
-    t = frob_fp12(2, t) * t;
+    let mut t = f.frob(6) / f;
+    t = t.frob(2) * t;
     let (t_a2, t_a1, t_a0) = get_powers(t);
-    frob_fp12(3, t) * frob_fp12(2, t_a2) * frob_fp12(1, t_a1) * t_a0
+    t.frob(3) * t_a2.frob(2) * t_a1.frob(1) * t_a0
 }
 
 /// Given an f: Fp12, this function computes the triple
 ///     T^a2, T^(-a1), T^(-a0)
-/// 
 fn get_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     const EXPS4: [(usize, usize, usize); 64] = [
         (1, 1, 0),
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 0bea6b4c..62a2a8d3 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -4,21 +4,21 @@ use std::ops::Range;
 use anyhow::Result;
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{frob_fp12, gen_fp12, Fp12};
-use crate::bn254_pairing::{gen_fp12_sparse};
+use crate::bn254_arithmetic::{gen_fp12, Fp12};
+use crate::bn254_pairing::gen_fp12_sparse;
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
 use crate::memory::segments::Segment;
 use crate::witness::memory::MemoryAddress;
 
 struct InterpreterSetup {
-    offset: String,
+    label: String,
     stack: Vec<U256>,
     memory: Vec<(usize, Vec<U256>)>,
 }
 
 fn run_setup_interpreter(setup: InterpreterSetup) -> Result<Interpreter<'static>> {
-    let label = KERNEL.global_labels[&setup.offset];
+    let label = KERNEL.global_labels[&setup.label];
     let mut stack = setup.stack;
     stack.reverse();
     let mut interpreter = Interpreter::new_with_kernel(label, stack);
@@ -61,7 +61,7 @@ fn setup_mul_test(
     label: &str,
 ) -> InterpreterSetup {
     InterpreterSetup {
-        offset: label.to_string(),
+        label: label.to_string(),
         stack: vec![
             U256::from(in0),
             U256::from(in1),
@@ -107,7 +107,7 @@ fn test_mul_fp12() -> Result<()> {
 
 fn setup_frob_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
     InterpreterSetup {
-        offset: label.to_string(),
+        label: label.to_string(),
         stack: vec![U256::from(ptr)],
         memory: vec![(ptr, fp12_on_stack(f))],
     }
@@ -133,10 +133,10 @@ fn test_frob_fp12() -> Result<()> {
     let out_frob_3: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_3);
     let out_frob_6: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_6);
 
-    let exp_frob_1: Vec<U256> = fp12_on_stack(frob_fp12(1, f));
-    let exp_frob_2: Vec<U256> = fp12_on_stack(frob_fp12(2, f));
-    let exp_frob_3: Vec<U256> = fp12_on_stack(frob_fp12(3, f));
-    let exp_frob_6: Vec<U256> = fp12_on_stack(frob_fp12(6, f));
+    let exp_frob_1: Vec<U256> = fp12_on_stack(f.frob(1));
+    let exp_frob_2: Vec<U256> = fp12_on_stack(f.frob(2));
+    let exp_frob_3: Vec<U256> = fp12_on_stack(f.frob(3));
+    let exp_frob_6: Vec<U256> = fp12_on_stack(f.frob(6));
 
     assert_eq!(out_frob_1, exp_frob_1);
     assert_eq!(out_frob_2, exp_frob_2);
@@ -153,7 +153,7 @@ fn test_inv_fp12() -> Result<()> {
     let f: Fp12 = gen_fp12();
 
     let setup = InterpreterSetup {
-        offset: "inv_fp12".to_string(),
+        label: "inv_fp12".to_string(),
         stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)],
         memory: vec![(ptr, fp12_on_stack(f))],
     };

From c13cf972372978b73d3f9afe7bda099b9f11ee0c Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 24 Jan 2023 09:42:42 +0700
Subject: [PATCH 151/201] tate test

---
 evm/src/bn254_pairing.rs          | 34 +++++++++---------
 evm/src/cpu/kernel/tests/bn254.rs | 58 +++++++++++++++----------------
 2 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 828865a3..fc470e9b 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -7,8 +7,8 @@ use crate::bn254_arithmetic::{gen_fp, gen_fp2, Fp, Fp12, Fp2, Fp6, UNIT_FP12, ZE
 // The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct Curve {
-    x: Fp,
-    y: Fp,
+    pub x: Fp,
+    pub y: Fp,
 }
 
 /// Standard addition formula for elliptic curves, restricted to the cases  
@@ -34,8 +34,8 @@ impl Add for Curve {
 // The twisted curve consists of pairs (x, y): (Fp2, Fp2) | y^2 = x^3 + 3/(9 + i)
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub struct TwistedCurve {
-    x: Fp2,
-    y: Fp2,
+    pub x: Fp2,
+    pub y: Fp2,
 }
 
 // The tate pairing takes a point each from the curve and its twist and outputs an Fp12 element
@@ -75,8 +75,16 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
     acc
 }
 
-pub fn gen_fp12_sparse() -> Fp12 {
-    sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
+pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
+    let cx = -Fp::new(3) * p.x * p.x;
+    let cy = Fp::new(2) * p.y;
+    sparse_embed(p.y * p.y - Fp::new(9), q.x.scale(cx), q.y.scale(cy))
+}
+
+pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
+    let cx = p2.y - p1.y;
+    let cy = p1.x - p2.x;
+    sparse_embed(p1.y * p2.x - p2.y * p1.x, q.x.scale(cx), q.y.scale(cy))
 }
 
 pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
@@ -98,16 +106,8 @@ pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
     Fp12 { z0: g0, z1: g1 }
 }
 
-pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
-    let cx = -Fp::new(3) * p.x * p.x;
-    let cy = Fp::new(2) * p.y;
-    sparse_embed(p.y * p.y - Fp::new(9), q.x.scale(cx), q.y.scale(cy))
-}
-
-pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
-    let cx = p2.y - p1.y;
-    let cy = p1.x - p2.x;
-    sparse_embed(p1.y * p2.x - p2.y * p1.x, q.x.scale(cx), q.y.scale(cy))
+pub fn gen_fp12_sparse() -> Fp12 {
+    sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
 }
 
 /// The output T of the miller loop is not an invariant,
@@ -308,7 +308,7 @@ fn get_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     }
     y0 = y0 * sq;
 
-    (y2, y4 * y2 * y2 * y0, y0.inv())
+    (y2, y4 * y2 * y2 / y0, y0.inv())
 }
 
 // The curve is cyclic with generator (1, 2)
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 62a2a8d3..761dca0a 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -5,7 +5,9 @@ use anyhow::Result;
 use ethereum_types::U256;
 
 use crate::bn254_arithmetic::{gen_fp12, Fp12};
-use crate::bn254_pairing::gen_fp12_sparse;
+use crate::bn254_pairing::{
+    gen_fp12_sparse, tate, CURVE_GENERATOR, TWISTED_GENERATOR,
+};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
 use crate::memory::segments::Segment;
@@ -190,25 +192,6 @@ fn test_inv_fp12() -> Result<()> {
 //     Ok(())
 // }
 
-// fn make_tate_stack(p: Curve, q: TwistedCurve) -> Vec<U256> {
-//     let ptr = U256::from(300);
-//     let out = U256::from(400);
-
-//     let p_: Vec<U256> = p.into_iter().collect();
-//     let q_: Vec<U256> = q.into_iter().flatten().collect();
-
-//     let mut stack = vec![ptr];
-//     stack.extend(p_);
-//     stack.extend(q_);
-//     stack.extend(vec![
-//         ptr,
-//         out,
-//         get_address_from_label("return_fp12_on_stack"),
-//         out,
-//     ]);
-//     stack
-// }
-
 // #[test]
 // fn test_miller() -> Result<()> {
 //     let p: Curve = curve_generator();
@@ -223,16 +206,31 @@ fn test_inv_fp12() -> Result<()> {
 //     Ok(())
 // }
 
-// #[test]
-// fn test_tate() -> Result<()> {
-//     let p: Curve = curve_generator();
-//     let q: TwistedCurve = twisted_curve_generator();
+#[test]
+fn test_tate() -> Result<()> {
+    let ptr: usize = 300;
+    let out: usize = 400;
 
-//     let stack = make_tate_stack(p, q);
-//     let output = run_setup_interpreter("test_tate", stack);
-//     let expected = fp12_on_stack(tate(p, q));
+    let setup = InterpreterSetup {
+        label: "tate".to_string(),
+        stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
+        memory: vec![(
+            ptr,
+            vec![
+                CURVE_GENERATOR.x.val,
+                CURVE_GENERATOR.y.val,
+                TWISTED_GENERATOR.x.re.val,
+                TWISTED_GENERATOR.x.im.val,
+                TWISTED_GENERATOR.y.re.val,
+                TWISTED_GENERATOR.y.im.val,
+            ],
+        )],
+    };
+    let interpreter = run_setup_interpreter(setup).unwrap();
+    let output: Vec<U256> = extract_kernel_output(out..out + 12, interpreter);
+    let expected = fp12_on_stack(tate(CURVE_GENERATOR, TWISTED_GENERATOR));
 
-//     assert_eq!(output, expected);
+    assert_eq!(output, expected);
 
-//     Ok(())
-// }
+    Ok(())
+}

From 8ca6ba7bde6ae341c8277d0084a7e103cfc2e909 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 24 Jan 2023 09:43:47 +0700
Subject: [PATCH 152/201] clean

---
 evm/src/cpu/kernel/tests/bn254.rs | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 761dca0a..5985b404 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -5,9 +5,7 @@ use anyhow::Result;
 use ethereum_types::U256;
 
 use crate::bn254_arithmetic::{gen_fp12, Fp12};
-use crate::bn254_pairing::{
-    gen_fp12_sparse, tate, CURVE_GENERATOR, TWISTED_GENERATOR,
-};
+use crate::bn254_pairing::{gen_fp12_sparse, tate, CURVE_GENERATOR, TWISTED_GENERATOR};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
 use crate::memory::segments::Segment;
@@ -211,20 +209,19 @@ fn test_tate() -> Result<()> {
     let ptr: usize = 300;
     let out: usize = 400;
 
+    let inputs: Vec<U256> = vec![
+        CURVE_GENERATOR.x.val,
+        CURVE_GENERATOR.y.val,
+        TWISTED_GENERATOR.x.re.val,
+        TWISTED_GENERATOR.x.im.val,
+        TWISTED_GENERATOR.y.re.val,
+        TWISTED_GENERATOR.y.im.val,
+    ];
+
     let setup = InterpreterSetup {
         label: "tate".to_string(),
         stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
-        memory: vec![(
-            ptr,
-            vec![
-                CURVE_GENERATOR.x.val,
-                CURVE_GENERATOR.y.val,
-                TWISTED_GENERATOR.x.re.val,
-                TWISTED_GENERATOR.x.im.val,
-                TWISTED_GENERATOR.y.re.val,
-                TWISTED_GENERATOR.y.im.val,
-            ],
-        )],
+        memory: vec![(ptr, inputs)],
     };
     let interpreter = run_setup_interpreter(setup).unwrap();
     let output: Vec<U256> = extract_kernel_output(out..out + 12, interpreter);

From 60cbdde879a4b77ed242ce86c0c6e671d617921e Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 24 Jan 2023 14:56:15 +0700
Subject: [PATCH 153/201] clean

---
 evm/src/bn254_pairing.rs                         | 11 +++++++++--
 .../bn254/curve_arithmetic/tate_pairing.asm      | 16 +++-------------
 evm/src/cpu/kernel/tests/bn254.rs                |  1 -
 3 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index fc470e9b..9c4956c2 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -129,8 +129,14 @@ pub fn invariance_inducing_power(f: Fp12) -> Fp12 {
     t.frob(3) * t_a2.frob(2) * t_a1.frob(1) * t_a0
 }
 
-/// Given an f: Fp12, this function computes the triple
+/// Given an f: Fp12, this function computes
 ///     T^a2, T^(-a1), T^(-a0)
+/// by first computing
+///     T^a4, T^a2, T^a0
+/// where a1 is given by
+///     a1 = a4 + 2a2 - a0
+/// thus what remains is inverting T^a0 and returning
+///     T^a2, T^a4 * T^a2 * T^a2 * T^(-a0), T^(-a0)
 fn get_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     const EXPS4: [(usize, usize, usize); 64] = [
         (1, 1, 0),
@@ -308,7 +314,8 @@ fn get_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     }
     y0 = y0 * sq;
 
-    (y2, y4 * y2 * y2 / y0, y0.inv())
+    let y0_inv = y0.inv();
+    (y2, y4 * y2 * y2 * y0_inv, y0_inv)
 }
 
 // The curve is cyclic with generator (1, 2)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 2557d882..fe46a9e7 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -1,18 +1,8 @@
 /// def tate(P: Curve, Q: TwistedCurve) -> Fp12:
 ///     out = miller_loop(P, Q)
-///
-///     inv = inv_fp12(out)
-///     out = frob_fp12(6, out)
-///     out = mul_fp12(out, inv)
-///
-///     acc = frob_fp12(2, out)
-///     out = mul_fp12(out, acc)
-///
-///     pow = invariance_inducing_power(out)
-///     out = frob_fp12(3, out) 
-///     out = mul_fp12(out, pow)
-///
-///     return out
+///     out = out.frob(6) / out
+///     out = out.frob(2) * out
+///     return final_power(out)
 
 global tate:
     // stack:                        ptr, out, retdest
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 5985b404..18a7eb4a 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -208,7 +208,6 @@ fn test_inv_fp12() -> Result<()> {
 fn test_tate() -> Result<()> {
     let ptr: usize = 300;
     let out: usize = 400;
-
     let inputs: Vec<U256> = vec![
         CURVE_GENERATOR.x.val,
         CURVE_GENERATOR.y.val,

From 5deb16486542bc409e26a6229bc1c47a0b1157ac Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 24 Jan 2023 16:35:49 +0700
Subject: [PATCH 154/201] refactor power

---
 evm/src/bn254_pairing.rs                      |  30 +--
 evm/src/cpu/kernel/aggregator.rs              |   2 +-
 .../bn254/curve_arithmetic/final_power.asm    | 229 ++++++++++++++++++
 .../bn254/curve_arithmetic/miller_loop.asm    |   2 +-
 .../bn254/curve_arithmetic/tate_pairing.asm   |  96 +++-----
 .../curve/bn254/field_arithmetic/power.asm    | 223 -----------------
 6 files changed, 276 insertions(+), 306 deletions(-)
 create mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
 delete mode 100644 evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 9c4956c2..2df002c1 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -110,34 +110,34 @@ pub fn gen_fp12_sparse() -> Fp12 {
     sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
 }
 
-/// The output T of the miller loop is not an invariant,
-/// but one gets an invariant by raising T to the power
+/// The output y of the miller loop is not an invariant,
+/// but one gets an invariant by raising y to the power
 ///     (p^12 - 1)/N = (p^6 - 1)(p^2 + 1)(p^4 - p^2 + 1)/N
 /// where N is the cyclic group order of the curve.
-/// To achieve this, we first exponentiate T by p^6 - 1 via
-///     T = T_6 / T
+/// To achieve this, we first exponentiate y by p^6 - 1 via
+///     y = y_6 / y
 /// and then exponentiate the result by p^2 + 1 via
-///     T = T_2 * T
+///     y = y_2 * y
 /// We then note that (p^4 - p^2 + 1)/N can be rewritten as
 ///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
 /// where 0 < a0, a1, a2 < p. Then the final power is given by
-///     T = T_3 * (T^a2)_2 * (T^-a1)_1 * (T^-a0)
+///     y = y_3 * (y^a2)_2 * (y^-a1)_1 * (y^-a0)
 pub fn invariance_inducing_power(f: Fp12) -> Fp12 {
-    let mut t = f.frob(6) / f;
-    t = t.frob(2) * t;
-    let (t_a2, t_a1, t_a0) = get_powers(t);
-    t.frob(3) * t_a2.frob(2) * t_a1.frob(1) * t_a0
+    let mut y = f.frob(6) / f;
+    y = y.frob(2) * y;
+    let (y_a2, y_a1, y_a0) = get_custom_powers(y);
+    y.frob(3) * y_a2.frob(2) * y_a1.frob(1) * y_a0
 }
 
 /// Given an f: Fp12, this function computes
-///     T^a2, T^(-a1), T^(-a0)
+///     y^a2, y^(-a1), y^(-a0)
 /// by first computing
-///     T^a4, T^a2, T^a0
+///     y^a4, y^a2, y^a0
 /// where a1 is given by
 ///     a1 = a4 + 2a2 - a0
-/// thus what remains is inverting T^a0 and returning
-///     T^a2, T^a4 * T^a2 * T^a2 * T^(-a0), T^(-a0)
-fn get_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
+/// thus what remains is inverting y^a0 and returning
+///     y^a2, y^a4 * y^a2 * y^a2 * y^(-a0), y^(-a0)
+fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     const EXPS4: [(usize, usize, usize); 64] = [
         (1, 1, 0),
         (1, 1, 1),
diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index c74baa65..440ee49c 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -27,13 +27,13 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/final_power.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/power.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/util.asm"),
         include_str!("asm/curve/common.asm"),
         include_str!("asm/curve/secp256k1/curve_mul.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
new file mode 100644
index 00000000..e790a261
--- /dev/null
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
@@ -0,0 +1,229 @@
+/// def final_exp(y):
+///     y0, y4, y2 = 1, 1, 1
+///     power_loop_4()
+///     power_loop_2()
+///     power_loop_0()
+///     custom_powers()
+///     final_power()
+///
+/// def custom_powers()
+///     y0 = y0^{-1}
+///     y1 = y4 * y2^2 * y0
+///     return y2, y1, y0
+///
+/// def final_power()
+///     y  = y.frob(3)
+///     y2 = y2.frob(2)
+///     y1 = y1.frob(1)
+///     return y * y2 * y4 * y0
+
+global final_exp:
+    // stack:             val, retdest
+    %stack () -> (1, 1, 1)
+    // stack:    1, 1, 1, val, retdest
+    %mstore_kernel_general(200)  
+    %mstore_kernel_general(224)  
+    %mstore_kernel_general(212)
+    // stack:             val, retdest  {200: y0, 212: y2, 224: y4}
+    stack () -> 64, 62, 65
+    // stack: 64, 62, 65, val, retdest  {200: y0, 212: y2, 224: y4}
+    %jump(power_loop_4)
+
+custom_powers:
+    // stack:                             val, retdest  {200: y0, 212: y2, 224: y4}
+    %stack () -> (200, 236, make_y1_1)
+    // stack:        200, 236, make_y1_1, val, retdest  {200: y0, 212: y2, 224: y4}
+    %jump(inv_fp12)
+make_y1_1:
+    // stack:                             val, retdest  {236: y0^-1, 212: y2, 224: y4}
+    %stack () -> (212, 224, 224, make_y1_2)
+    // stack:   212, 224, 224, make_y1_2, val, retdest  {236: y0^-1, 212: y2, 224: y4}
+    %jump(mul_fp12)
+make_y1_2:
+    // stack:                             val, retdest  {236: y0^-1, 212: y2, 224: y4 * y2}
+    %stack () -> (212, 224, 224, make_y1_3)
+    // stack:   212, 224, 224, make_y1_3, val, retdest  {236: y0^-1, 212: y2, 224: y4 * y2}
+    %jump(mul_fp12)
+make_y1_3:
+    // stack:                             val, retdest  {236: y0^-1, 212: y2, 224: y4 * y2^2}
+    %stack () -> (236, 224, 224, final_power)
+    // stack: 236, 224, 224, final_power, val, retdest  {236: y0^-1, 212: y2, 224: y4 * y2^2}
+    %jump(mul_fp12)
+
+final_power:
+    // stack:                                val, retdest  {val: y  , 212:  y^a2   , 224:  y^a1   , 236: y^a0}
+    %frob_fp12_3
+    // stack:                                val, retdest  {val: y_3, 212:  y^a2   , 224:  y^a1   , 236: y^a0}
+    %stack () -> (212, 212)
+    %frob_fp12_2_
+    POP
+    // stack:                                val, retdest  {val: y_3, 212: (y^a2)_2, 224:  y^a1   , 236: y^a0}
+    PUSH 224
+    %frob_fp12_1
+    POP
+    // stack:                                val, retdest  {val: y_3, 212: (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
+    %stack (val) -> (212, val, val, penult_mul, val)
+    // stack:     212, val, val, penult_mul, val, retdest  {val: y_3, 212: (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
+    %jump(mul_fp12)
+penult_mul:
+    // stack:                                val, retdest  {val: y_3 * (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
+    %stack (val) -> (224, val, val, final_mul, val)
+    // stack:      224, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
+    %jump(mul_fp12)
+final_mul: 
+    // stack:                                val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 236: y^a0}
+    %stack (val) -> (236, val, val)
+    // stack:                      236, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 236: y^a0}
+    %jump(mul_fp12)
+
+
+/// def power_loop_4():
+///     for i in range(64):
+///         abc = load(i, power_data_4)
+///         if a:
+///             y4 *= acc
+///         if b:
+///             y2 *= acc
+///         if c:
+///             y0 *= acc
+///         acc = square_fp12(acc)
+///     y4 *= acc
+///
+/// def power_loop_2():
+///     for i in range(62):
+///        ab = load(i, power_data_2)
+///        if a:
+///            y2 *= acc
+///        if b:
+///            y0 *= acc
+///        acc = square_fp12(acc)
+///     y2 *= acc
+///
+/// def power_loop_0():
+///     for i in range(65):
+///         a = load(i, power_data_0)
+///         if a:
+///             y0 *= acc
+///         acc = square_fp12(acc)
+///     y0 *= acc
+
+power_loop_4:
+    // stack:                                     i  , j, k, val  {200: y0, 212: y2, 224: y4}
+    DUP1  ISZERO
+    // stack:                             break?, i  , j, k, val  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_4_end)
+    // stack:                                     i  , j, k, val  {200: y0, 212: y2, 224: y4}
+    %sub_const(1)
+    // stack:                                     i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    DUP1  %mload_kernel_code(power_data_4)
+    // stack:                                abc, i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    DUP1  %lt_const(100)
+    // stack:                         skip?, abc, i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_4_b)
+    // stack:                                abc, i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    %sub_const(100)
+    // stack:                                 bc, i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_4_b  PUSH 224  DUP1  DUP8
+    // stack: val, 224, 224, power_loop_4_b,  bc, i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+power_loop_4_b:
+    // stack:                               bc, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    DUP1  %lt_const(10)
+    // stack:                        skip?, bc, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_4_c)
+    // stack:                               bc, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    %sub_const(10)
+    // stack:                                c, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_4_c  PUSH 212  DUP1  DUP8
+    // stack: val, 212, 212, power_loop_4_c, c, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+power_loop_4_c:
+    // stack:                              c, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    ISZERO
+    // stack:                          skip?, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_4_sq)
+    // stack:                                 i, j, k, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_4_sq  PUSH 200  DUP1  DUP7
+    // stack: val, 200, 200, power_loop_4_sq, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+power_loop_4_sq:
+    // stack:                         i, j, k, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_4  DUP5  DUP1
+    // stack: val, val, power_loop_4, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jump(square_fp12)
+power_loop_4_end:
+    // stack:                           0, j, k, val  {200: y0, 212: y2, 224: y4}
+    POP  
+    // stack:                              j, k, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_2  PUSH 224  DUP1  DUP6
+    // stack: val, 224, 224, power_loop_2, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+
+power_loop_2:
+    // stack:                                   j  , k, val  {200: y0, 212: y2, 224: y4}
+    DUP1  ISZERO
+    // stack:                           break?, j  , k, val  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_2_end)
+    // stack:                                   j  , k, val  {200: y0, 212: y2, 224: y4}
+    %sub_const(1)
+    // stack:                                   j-1, k, val  {200: y0, 212: y2, 224: y4}
+    DUP1  %mload_kernel_code(power_data_2)
+    // stack:                               ab, j-1, k, val  {200: y0, 212: y2, 224: y4}
+    DUP1  %lt_const(10)
+    // stack:                        skip?, ab, j-1, k, val  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_2_b)
+    // stack:                               ab, j-1, k, val  {200: y0, 212: y2, 224: y4}
+    %sub_const(10)
+    // stack:                                b, j-1, k, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_2_b  PUSH 212  DUP1  DUP7
+    // stack: val, 212, 212, power_loop_2_b, b, j-1, k, val  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+power_loop_2_b:
+    // stack:                              b, j, k, val  {200: y0, 212: y2, 224: y4}
+    ISZERO
+    // stack:                          skip?, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_2_sq)
+    // stack:                                 j, k, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_2_sq  PUSH 200  DUP1  DUP6
+    // stack: val, 200, 200, power_loop_2_sq, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+power_loop_2_sq:
+    // stack:                         j, k, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_2  DUP4  DUP1
+    // stack: val, val, power_loop_2, j, k, val  {200: y0, 212: y2, 224: y4}
+    %jump(square_fp12)
+power_loop_2_end:
+    // stack:                           0, k, val  {200: y0, 212: y2, 224: y4}
+    POP  
+    // stack:                              k, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_0  PUSH 212  DUP1  DUP5
+    // stack: val, 212, 212, power_loop_0, k, val  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+
+power_loop_0:
+    // stack:                                 k  , val  {200: y0, 212: y2, 224: y4}
+    DUP1  ISZERO
+    // stack:                         break?, k  , val  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_0_end)
+    // stack:                                 k  , val  {200: y0, 212: y2, 224: y4}
+    %sub_const(1)
+    // stack:                                 k-1, val  {200: y0, 212: y2, 224: y4}
+    DUP1  %mload_kernel_code(power_data_0)
+    // stack:                              a, k-1, val  {200: y0, 212: y2, 224: y4}
+    ISZERO
+    // stack:                          skip?, k-1, val  {200: y0, 212: y2, 224: y4}
+    %jumpi(power_loop_0_sq)
+    // stack:                                 k-1, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_0_sq  PUSH 200  DUP1  DUP5
+    // stack: val, 200, 200, power_loop_0_sq, k-1, val  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)
+power_loop_0_sq:
+    // stack:                         k, val  {200: y0, 212: y2, 224: y4}
+    PUSH power_loop_0  DUP3  DUP1
+    // stack: val, val, power_loop_0, k, val  {200: y0, 212: y2, 224: y4}
+    %jump(square_fp12)
+power_loop_0_end:
+    // stack:                        0, val  {200: y0, 212: y2, 224: y4}
+    %stack (i, val) -> (200, val, 200, custom_powers, val)
+    // stack: 200, val, 200, custom_powers, val  {200: y0, 212: y2, 224: y4}
+    %jump(mul_fp12)    
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index aa341288..0c92143b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -25,7 +25,7 @@
 ///     0xnm -= 1
 ///     mul_tangent()
 
-global miller_init:
+global miller:
     // stack:         ptr, out, retdest
     PUSH 1
     // stack:      1, ptr, out, retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index fe46a9e7..198d7b0e 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -1,74 +1,38 @@
 /// def tate(P: Curve, Q: TwistedCurve) -> Fp12:
 ///     out = miller_loop(P, Q)
-///     out = out.frob(6) / out
-///     out = out.frob(2) * out
-///     return final_power(out)
-
+///     return make_invariant(P, Q)
 global tate:
-    // stack:                        ptr, out, retdest
-    DUP2
-    // stack:                   out, ptr, out, retdest
-    PUSH post_miller
-    // stack:      post_miller, out, ptr, out, retdest
-    SWAP2
-    // stack:      ptr, out, post_miller, out, retdest
-    %jump(miller_init)
-global post_miller:    
-    // stack:                             out, retdest
-    PUSH tate_mul_1
-    // stack:                 tate_mul_1, out, retdest
-    PUSH 100 
-    // stack:            100, tate_mul_1, out, retdest
-    DUP3 
-    // stack:       out, 100, tate_mul_1, out, retdest
+    // stack:                      inp, out, retdest
+    %stack (inp, out) -> (inp, out, make_invariant, out)
+    // stack: inp, out, make_invariant, out, retdest
+    %jump(miller)
+
+
+/// def make_invariant(y: Fp12):
+///     y = first_exp(y)
+///     y = second_exp(y)
+///     return final_exponentiation(y)
+global make_invariant:
+/// def first_exp(t):
+///     return t.frob(6) / t
+    // stack:                      out, retdest  {out: y}
+    %stack (out) -> (out, 100, first_exp, out)         
+    // stack: out, 100, first_exp, out, retdest  {out: y}
     %jump(inv_fp12)
-tate_mul_1:
-    // stack:                             out, retdest  {100: inv}
+global first_exp:
+    // stack:                             out, retdest  {out: y  , 100: y^-1}
     %frob_fp12_6
-    // stack:                             out, retdest  {100: inv}
-    PUSH tate_mul_2
-    // stack:                 tate_mul_2, out, retdest  {100: inv}
-    DUP2
-    // stack:            out, tate_mul_2, out, retdest  {100: inv}
-    PUSH 100 
-    // stack:       100, out, tate_mul_2, out, retdest  {100: inv}
-    DUP2
-    // stack:  out, 100, out, tate_mul_2, out, retdest  {100: inv}
+    // stack:                             out, retdest  {out: y_6, 100: y^-1}
+    %stack (out) -> (out, 100, out, second_exp, out)
+    // stack:  out, 100, out, second_exp, out, retdest  {out: y_6, 100: y^-1}
     %jump(mul_fp12)
-tate_mul_2:
-    // stack:                             out, retdest  {100: inv}
-    PUSH tate_power
-    // stack:                 tate_power, out, retdest  {100: inv}
-    DUP2
-    // stack:            out, tate_power, out, retdest  {100: inv}
-    PUSH 100
-    // stack:       100, out, tate_power, out, retdest  {100: inv}       
-    DUP2 
-    // stack:  out, 100, out, tate_power, out, retdest  {100: inv}
+
+/// def second_exp(t):
+///     return t.frob(2) * t
+global second_exp:
+    // stack:                           out, retdest  {out: y}
+    %stack (out) -> (out, 100, out, final_exp, out)
+    // stack: out, 100, out, final_exp, out, retdest  {out: y}
     %frob_fp12_2_
-    // stack:       100, out, tate_power, out, retdest  {100: acc} 
-    DUP2
-    // stack:  out, 100, out, tate_power, out, retdest  {100: acc}
-    %jump(mul_fp12)
-tate_power: 
-    // stack:                             out, retdest  {100: acc}
-    PUSH tate_return
-    // stack:                tate_return, out, retdest  {100: acc}
-    PUSH 100
-    // stack:           100, tate_return, out, retdest  {100: acc}
-    PUSH 300
-    // stack:      300, 100, tate_return, out, retdest  {100: acc}
-    DUP4
-    // stack: out, 300, 100, tate_return, out, retdest  {100: acc}
-    %move_fp12
-    // stack:      300, 100, tate_return, out, retdest  {100: acc, 300: out}
-    %jump(power)
-tate_return: 
-    // stack:                             out, retdest  {100: pow}
-    PUSH 100
-    // stack:                        100, out, retdest  {100: pow}
-    DUP2
-    // stack:                   out, 100, out, retdest  {100: pow}
-    %frob_fp12_3
-    // stack:                   out, 100, out, retdest  {100: pow}
+    // stack:      100, out, final_exp, out, retdest  {out: y, 100: y_2}
     %jump(mul_fp12)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
deleted file mode 100644
index 51a122c0..00000000
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/power.asm
+++ /dev/null
@@ -1,223 +0,0 @@
-/// def invariance_inducing_power(acc):
-///     power_init()
-///     power_loop_4()
-///     power_loop_2()
-///     power_loop_0()
-///     power_return()
-///
-/// def power_init()
-///     y0, y4, y2 = 1, 1, 1
-///
-/// def power_return()
-///     y0  = y0^{-1}
-///     y4 *= y0 * (y2**2)
-///     y4  = frob_fp12_1(y4)
-///     y2  = frob_fp12_2_(y2)
-///     return y2 * y4 * y0 
-
-global power:
-    // stack:             ptr, out, retdest
-    PUSH 1  DUP1  DUP1
-    // stack:    1, 1, 1, ptr, out, retdest
-    %mstore_kernel_general(200)  %mstore_kernel_general(224)  %mstore_kernel_general(212)
-    // stack:             ptr, out, retdest  {200: y0, 212: y2, 224: y4}
-    PUSH 65  PUSH 62  PUSH 64
-    // stack: 64, 62, 65, ptr, out, retdest  {200: y0, 212: y2, 224: y4}
-    %jump(power_loop_4)
-
-power_return:
-    // stack:                                out, retdest  {200: y0, 212: y2, 224: y4}
-    PUSH power_return_1  PUSH 236  PUSH 200
-    // stack:      200, 236, power_return_1, out, retdest  {200: y0, 212: y2, 224: y4}
-    %jump(inv_fp12)
-power_return_1:
-    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
-    PUSH power_return_2  PUSH 224  DUP1  PUSH 212
-    // stack: 212, 224, 224, power_return_2, out, retdest  {236: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-power_return_2: 
-    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
-    PUSH power_return_3  PUSH 224  DUP1  PUSH 212
-    // stack: 212, 224, 224, power_return_3, out, retdest  {236: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-power_return_3:
-    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
-    PUSH power_return_4  PUSH 224  DUP1  PUSH 236
-    // stack: 236, 224, 224, power_return_4, out, retdest  {236: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-power_return_4:
-    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
-    PUSH 224
-    // stack:                           224, out, retdest  {236: y0, 212: y2, 224: y4}
-    %frob_fp12_1
-    // stack:                           224, out, retdest  {236: y0, 212: y2, 224: y4}
-    POP
-    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
-    PUSH 212  DUP1
-    // stack:                      212, 212, out, retdest  {236: y0, 212: y2, 224: y4}
-    %frob_fp12_2_
-    // stack:                           212, out, retdest  {236: y0, 212: y2, 224: y4}
-    POP
-    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
-    PUSH power_return_5  DUP2  PUSH 236  PUSH 224
-    // stack: 224, 236, out, power_return_5, out, retdest  {236: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-power_return_5:
-    // stack:                                out, retdest  {236: y0, 212: y2, 224: y4}
-    PUSH 212  DUP2
-    // stack:                      out, 212, out, retdest  {236: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-
-
-/// def power_loop_4():
-///     for i in range(64):
-///         abc = load(i, power_data_4)
-///         if a:
-///             y4 *= acc
-///         if b:
-///             y2 *= acc
-///         if c:
-///             y0 *= acc
-///         acc = square_fp12(acc)
-///     y4 *= acc
-///
-/// def power_loop_2():
-///     for i in range(62):
-///        ab = load(i, power_data_2)
-///        if a:
-///            y2 *= acc
-///        if b:
-///            y0 *= acc
-///        acc = square_fp12(acc)
-///     y2 *= acc
-///
-/// def power_loop_0():
-///     for i in range(65):
-///         a = load(i, power_data_0)
-///         if a:
-///             y0 *= acc
-///         acc = square_fp12(acc)
-///     y0 *= acc
-
-power_loop_4:
-    // stack:                                     i  , j, k, ptr  {200: y0, 212: y2, 224: y4}
-    DUP1  ISZERO
-    // stack:                             break?, i  , j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jumpi(power_loop_4_end)
-    // stack:                                     i  , j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %sub_const(1)
-    // stack:                                     i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    DUP1  %mload_kernel_code(power_data_4)
-    // stack:                                abc, i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    DUP1  %lt_const(100)
-    // stack:                         skip?, abc, i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jumpi(power_loop_4_b)
-    // stack:                                abc, i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %sub_const(100)
-    // stack:                                 bc, i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_4_b  PUSH 224  DUP1  DUP8
-    // stack: ptr, 224, 224, power_loop_4_b,  bc, i-1, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-power_loop_4_b:
-    // stack:                               bc, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    DUP1  %lt_const(10)
-    // stack:                        skip?, bc, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jumpi(power_loop_4_c)
-    // stack:                               bc, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %sub_const(10)
-    // stack:                                c, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_4_c  PUSH 212  DUP1  DUP8
-    // stack: ptr, 212, 212, power_loop_4_c, c, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-power_loop_4_c:
-    // stack:                              c, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    ISZERO
-    // stack:                          skip?, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jumpi(power_loop_4_sq)
-    // stack:                                 i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_4_sq  PUSH 200  DUP1  DUP7
-    // stack: ptr, 200, 200, power_loop_4_sq, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-power_loop_4_sq:
-    // stack:                         i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_4  DUP5  DUP1
-    // stack: ptr, ptr, power_loop_4, i, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jump(square_fp12)
-power_loop_4_end:
-    // stack:                           0, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    POP  
-    // stack:                              j, k, ptr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_2  PUSH 224  DUP1  DUP6
-    // stack: ptr, 224, 224, power_loop_2, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-
-power_loop_2:
-    // stack:                                   j  , k, ptr  {200: y0, 212: y2, 224: y4}
-    DUP1  ISZERO
-    // stack:                           break?, j  , k, ptr  {200: y0, 212: y2, 224: y4}
-    %jumpi(power_loop_2_end)
-    // stack:                                   j  , k, ptr  {200: y0, 212: y2, 224: y4}
-    %sub_const(1)
-    // stack:                                   j-1, k, ptr  {200: y0, 212: y2, 224: y4}
-    DUP1  %mload_kernel_code(power_data_2)
-    // stack:                               ab, j-1, k, ptr  {200: y0, 212: y2, 224: y4}
-    DUP1  %lt_const(10)
-    // stack:                        skip?, ab, j-1, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jumpi(power_loop_2_b)
-    // stack:                               ab, j-1, k, ptr  {200: y0, 212: y2, 224: y4}
-    %sub_const(10)
-    // stack:                                b, j-1, k, ptr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_2_b  PUSH 212  DUP1  DUP7
-    // stack: ptr, 212, 212, power_loop_2_b, b, j-1, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-power_loop_2_b:
-    // stack:                              b, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    ISZERO
-    // stack:                          skip?, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jumpi(power_loop_2_sq)
-    // stack:                                 j, k, ptr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_2_sq  PUSH 200  DUP1  DUP6
-    // stack: ptr, 200, 200, power_loop_2_sq, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-power_loop_2_sq:
-    // stack:                         j, k, ptr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_2  DUP4  DUP1
-    // stack: ptr, ptr, power_loop_2, j, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jump(square_fp12)
-power_loop_2_end:
-    // stack:                           0, k, ptr  {200: y0, 212: y2, 224: y4}
-    POP  
-    // stack:                              k, ptr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_0  PUSH 212  DUP1  DUP5
-    // stack: ptr, 212, 212, power_loop_0, k, ptr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
-
-power_loop_0:
-    // stack:                                 k  , ptr
-    DUP1  ISZERO
-    // stack:                         break?, k  , ptr
-    %jumpi(power_loop_0_end)
-    // stack:                                 k  , ptr
-    %sub_const(1)
-    // stack:                                 k-1, ptr
-    DUP1  %mload_kernel_code(power_data_0)
-    // stack:                              a, k-1, ptr
-    ISZERO
-    // stack:                          skip?, k-1, ptr
-    %jumpi(power_loop_0_sq)
-    // stack:                                 k-1, ptr
-    PUSH power_loop_0_sq  PUSH 200  DUP1  DUP5
-    // stack: ptr, 200, 200, power_loop_0_sq, k-1, ptr
-    %jump(mul_fp12)
-power_loop_0_sq:
-    // stack:                         k, ptr
-    PUSH power_loop_0  DUP3  DUP1
-    // stack: ptr, ptr, power_loop_0, k, ptr
-    %jump(square_fp12)
-power_loop_0_end:
-    // stack:                      0, ptr
-    POP  
-    // stack:                         ptr
-    PUSH 200  PUSH power_return  SWAP2  DUP2 
-    // stack: 200, ptr, 200, power_return
-    %jump(mul_fp12)

From c9b005d22e0d19e3c58d69db336117feaacf1780 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 24 Jan 2023 17:08:29 +0700
Subject: [PATCH 155/201] new power works

---
 .../bn254/curve_arithmetic/final_power.asm    | 166 +++++++++---------
 .../bn254/curve_arithmetic/tate_pairing.asm   |  19 +-
 evm/src/cpu/kernel/tests/bn254.rs             |   1 +
 3 files changed, 97 insertions(+), 89 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
index e790a261..4d126954 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
@@ -18,62 +18,66 @@
 ///     return y * y2 * y4 * y0
 
 global final_exp:
-    // stack:             val, retdest
+    // stack:                  val, retdest
+    %stack (val) -> (val, 300, val)
+    // stack:        val, 300, val, retdest
+    %move_fp12
+    // stack:             300, val, retdest
     %stack () -> (1, 1, 1)
-    // stack:    1, 1, 1, val, retdest
+    // stack:    1, 1, 1, 300, val, retdest
     %mstore_kernel_general(200)  
     %mstore_kernel_general(224)  
     %mstore_kernel_general(212)
-    // stack:             val, retdest  {200: y0, 212: y2, 224: y4}
-    stack () -> 64, 62, 65
-    // stack: 64, 62, 65, val, retdest  {200: y0, 212: y2, 224: y4}
+    // stack:             300, val, retdest  {200: y0, 212: y2, 224: y4}
+    %stack () -> (64, 62, 65)
+    // stack: 64, 62, 65, 300, val, retdest  {200: y0, 212: y2, 224: y4}
     %jump(power_loop_4)
 
 custom_powers:
     // stack:                             val, retdest  {200: y0, 212: y2, 224: y4}
-    %stack () -> (200, 236, make_y1_1)
-    // stack:        200, 236, make_y1_1, val, retdest  {200: y0, 212: y2, 224: y4}
+    %stack () -> (200, 236, make_term_1)
+    // stack:      200, 236, make_term_1, val, retdest  {200: y0, 212: y2, 224: y4}
     %jump(inv_fp12)
-make_y1_1:
-    // stack:                             val, retdest  {236: y0^-1, 212: y2, 224: y4}
-    %stack () -> (212, 224, 224, make_y1_2)
-    // stack:   212, 224, 224, make_y1_2, val, retdest  {236: y0^-1, 212: y2, 224: y4}
+make_term_1:
+    // stack:                             val, retdest  {212: y2, 224: y4, 236: y0^-1}
+    %stack () -> (212, 224, 224, make_term_2)
+    // stack: 212, 224, 224, make_term_2, val, retdest  {212: y2, 224: y4, 236: y0^-1}
     %jump(mul_fp12)
-make_y1_2:
-    // stack:                             val, retdest  {236: y0^-1, 212: y2, 224: y4 * y2}
-    %stack () -> (212, 224, 224, make_y1_3)
-    // stack:   212, 224, 224, make_y1_3, val, retdest  {236: y0^-1, 212: y2, 224: y4 * y2}
+make_term_2:
+    // stack:                             val, retdest  {212: y2, 224: y4 * y2, 236: y0^-1}
+    %stack () -> (212, 224, 224, make_term_3)
+    // stack: 212, 224, 224, make_term_3, val, retdest  {212: y2, 224: y4 * y2, 236: y0^-1}
     %jump(mul_fp12)
-make_y1_3:
-    // stack:                             val, retdest  {236: y0^-1, 212: y2, 224: y4 * y2^2}
+make_term_3:
+    // stack:                             val, retdest  {212: y2, 224: y4 * y2^2, 236: y0^-1}
     %stack () -> (236, 224, 224, final_power)
-    // stack: 236, 224, 224, final_power, val, retdest  {236: y0^-1, 212: y2, 224: y4 * y2^2}
+    // stack: 236, 224, 224, final_power, val, retdest  {212: y2, 224: y4 * y2^2, 236: y0^-1}
     %jump(mul_fp12)
 
 final_power:
-    // stack:                                val, retdest  {val: y  , 212:  y^a2   , 224:  y^a1   , 236: y^a0}
+    // stack:                            val, retdest  {val: y  , 212:  y^a2   , 224:  y^a1   , 236: y^a0}
     %frob_fp12_3
-    // stack:                                val, retdest  {val: y_3, 212:  y^a2   , 224:  y^a1   , 236: y^a0}
+    // stack:                            val, retdest  {val: y_3, 212:  y^a2   , 224:  y^a1   , 236: y^a0}
     %stack () -> (212, 212)
     %frob_fp12_2_
     POP
-    // stack:                                val, retdest  {val: y_3, 212: (y^a2)_2, 224:  y^a1   , 236: y^a0}
+    // stack:                            val, retdest  {val: y_3, 212: (y^a2)_2, 224:  y^a1   , 236: y^a0}
     PUSH 224
     %frob_fp12_1
     POP
-    // stack:                                val, retdest  {val: y_3, 212: (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
+    // stack:                            val, retdest  {val: y_3, 212: (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
     %stack (val) -> (212, val, val, penult_mul, val)
-    // stack:     212, val, val, penult_mul, val, retdest  {val: y_3, 212: (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
+    // stack: 212, val, val, penult_mul, val, retdest  {val: y_3, 212: (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
     %jump(mul_fp12)
 penult_mul:
-    // stack:                                val, retdest  {val: y_3 * (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
+    // stack:                            val, retdest  {val: y_3 * (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
     %stack (val) -> (224, val, val, final_mul, val)
-    // stack:      224, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
+    // stack:  224, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
     %jump(mul_fp12)
 final_mul: 
-    // stack:                                val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 236: y^a0}
+    // stack:                            val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 236: y^a0}
     %stack (val) -> (236, val, val)
-    // stack:                      236, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 236: y^a0}
+    // stack:                  236, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 236: y^a0}
     %jump(mul_fp12)
 
 
@@ -108,122 +112,122 @@ final_mul:
 ///     y0 *= acc
 
 power_loop_4:
-    // stack:                                     i  , j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                     i  , j, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP1  ISZERO
-    // stack:                             break?, i  , j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                             break?, i  , j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_4_end)
-    // stack:                                     i  , j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                     i  , j, k, sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(1)
-    // stack:                                     i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                     i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP1  %mload_kernel_code(power_data_4)
-    // stack:                                abc, i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                abc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP1  %lt_const(100)
-    // stack:                         skip?, abc, i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                         skip?, abc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_4_b)
-    // stack:                                abc, i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                abc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(100)
-    // stack:                                 bc, i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                 bc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_4_b  PUSH 224  DUP1  DUP8
-    // stack: val, 224, 224, power_loop_4_b,  bc, i-1, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 224, 224, power_loop_4_b,  bc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_4_b:
-    // stack:                               bc, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                               bc, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP1  %lt_const(10)
-    // stack:                        skip?, bc, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                        skip?, bc, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_4_c)
-    // stack:                               bc, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                               bc, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(10)
-    // stack:                                c, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_4_c  PUSH 212  DUP1  DUP8
-    // stack: val, 212, 212, power_loop_4_c, c, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 212, 212, power_loop_4_c, c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_4_c:
-    // stack:                              c, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                              c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     ISZERO
-    // stack:                          skip?, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                          skip?, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_4_sq)
-    // stack:                                 i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                 i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_4_sq  PUSH 200  DUP1  DUP7
-    // stack: val, 200, 200, power_loop_4_sq, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 200, 200, power_loop_4_sq, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_4_sq:
-    // stack:                         i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                         i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_4  DUP5  DUP1
-    // stack: val, val, power_loop_4, i, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, sqr, power_loop_4, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(square_fp12)
 power_loop_4_end:
-    // stack:                           0, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                           0, j, k, sqr  {200: y0, 212: y2, 224: y4}
     POP  
-    // stack:                              j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                              j, k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_2  PUSH 224  DUP1  DUP6
-    // stack: val, 224, 224, power_loop_2, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 224, 224, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 
 power_loop_2:
-    // stack:                                   j  , k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                   j  , k, sqr  {200: y0, 212: y2, 224: y4}
     DUP1  ISZERO
-    // stack:                           break?, j  , k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                           break?, j  , k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_2_end)
-    // stack:                                   j  , k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                   j  , k, sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(1)
-    // stack:                                   j-1, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                   j-1, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP1  %mload_kernel_code(power_data_2)
-    // stack:                               ab, j-1, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                               ab, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP1  %lt_const(10)
-    // stack:                        skip?, ab, j-1, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                        skip?, ab, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_2_b)
-    // stack:                               ab, j-1, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                               ab, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(10)
-    // stack:                                b, j-1, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                b, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_2_b  PUSH 212  DUP1  DUP7
-    // stack: val, 212, 212, power_loop_2_b, b, j-1, k, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 212, 212, power_loop_2_b, b, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_2_b:
-    // stack:                              b, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                              b, j, k, sqr  {200: y0, 212: y2, 224: y4}
     ISZERO
-    // stack:                          skip?, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                          skip?, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_2_sq)
-    // stack:                                 j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                 j, k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_2_sq  PUSH 200  DUP1  DUP6
-    // stack: val, 200, 200, power_loop_2_sq, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 200, 200, power_loop_2_sq, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_2_sq:
-    // stack:                         j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                         j, k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_2  DUP4  DUP1
-    // stack: val, val, power_loop_2, j, k, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, sqr, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(square_fp12)
 power_loop_2_end:
-    // stack:                           0, k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                           0, k, sqr  {200: y0, 212: y2, 224: y4}
     POP  
-    // stack:                              k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                              k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_0  PUSH 212  DUP1  DUP5
-    // stack: val, 212, 212, power_loop_0, k, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 212, 212, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 
 power_loop_0:
-    // stack:                                 k  , val  {200: y0, 212: y2, 224: y4}
+    // stack:                                 k  , sqr  {200: y0, 212: y2, 224: y4}
     DUP1  ISZERO
-    // stack:                         break?, k  , val  {200: y0, 212: y2, 224: y4}
+    // stack:                         break?, k  , sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_0_end)
-    // stack:                                 k  , val  {200: y0, 212: y2, 224: y4}
+    // stack:                                 k  , sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(1)
-    // stack:                                 k-1, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                 k-1, sqr  {200: y0, 212: y2, 224: y4}
     DUP1  %mload_kernel_code(power_data_0)
-    // stack:                              a, k-1, val  {200: y0, 212: y2, 224: y4}
+    // stack:                              a, k-1, sqr  {200: y0, 212: y2, 224: y4}
     ISZERO
-    // stack:                          skip?, k-1, val  {200: y0, 212: y2, 224: y4}
+    // stack:                          skip?, k-1, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_0_sq)
-    // stack:                                 k-1, val  {200: y0, 212: y2, 224: y4}
+    // stack:                                 k-1, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_0_sq  PUSH 200  DUP1  DUP5
-    // stack: val, 200, 200, power_loop_0_sq, k-1, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 200, 200, power_loop_0_sq, k-1, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_0_sq:
-    // stack:                         k, val  {200: y0, 212: y2, 224: y4}
+    // stack:                         k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_0  DUP3  DUP1
-    // stack: val, val, power_loop_0, k, val  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, sqr, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(square_fp12)
 power_loop_0_end:
-    // stack:                        0, val  {200: y0, 212: y2, 224: y4}
-    %stack (i, val) -> (200, val, 200, custom_powers, val)
-    // stack: 200, val, 200, custom_powers, val  {200: y0, 212: y2, 224: y4}
+    // stack:                         0, sqr  {200: y0, 212: y2, 224: y4}
+    %stack (i, sqr) -> (200, sqr, 200, custom_powers)
+    // stack:   200, sqr, 200, custom_powers  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)    
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 198d7b0e..b4931ac8 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -13,8 +13,10 @@ global tate:
 ///     y = second_exp(y)
 ///     return final_exponentiation(y)
 global make_invariant:
-/// def first_exp(t):
-///     return t.frob(6) / t
+
+/// map t to t^(p^6 - 1) via 
+///     def first_exp(t):
+///         return t.frob(6) / t
     // stack:                      out, retdest  {out: y}
     %stack (out) -> (out, 100, first_exp, out)         
     // stack: out, 100, first_exp, out, retdest  {out: y}
@@ -27,12 +29,13 @@ global first_exp:
     // stack:  out, 100, out, second_exp, out, retdest  {out: y_6, 100: y^-1}
     %jump(mul_fp12)
 
-/// def second_exp(t):
-///     return t.frob(2) * t
+/// map t to t^(p^2 + 1) via 
+///     def second_exp(t):
+///         return t.frob(2) * t
 global second_exp:
-    // stack:                           out, retdest  {out: y}
-    %stack (out) -> (out, 100, out, final_exp, out)
-    // stack: out, 100, out, final_exp, out, retdest  {out: y}
+    // stack:                                out, retdest  {out: y}
+    %stack (out) -> (out, 100, out, out, final_exp, out)
+    // stack: out, 100, out, out, final_exp, out, retdest  {out: y}
     %frob_fp12_2_
-    // stack:      100, out, final_exp, out, retdest  {out: y, 100: y_2}
+    // stack:      100, out, out, final_exp, out, retdest  {out: y, 100: y_2}
     %jump(mul_fp12)
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 18a7eb4a..a801300c 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -224,6 +224,7 @@ fn test_tate() -> Result<()> {
     };
     let interpreter = run_setup_interpreter(setup).unwrap();
     let output: Vec<U256> = extract_kernel_output(out..out + 12, interpreter);
+    // let output: Vec<U256> = interpreter.stack().to_vec();
     let expected = fp12_on_stack(tate(CURVE_GENERATOR, TWISTED_GENERATOR));
 
     assert_eq!(output, expected);

From 0b81258af3cd44312178c8a8c0413573148bfe3c Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 24 Jan 2023 17:18:13 +0700
Subject: [PATCH 156/201] stack macros

---
 .../bn254/curve_arithmetic/final_power.asm    | 74 ++++++++++++++-----
 1 file changed, 54 insertions(+), 20 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
index 4d126954..515fab1b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
@@ -113,32 +113,40 @@ final_mul:
 
 power_loop_4:
     // stack:                                     i  , j, k, sqr  {200: y0, 212: y2, 224: y4}
-    DUP1  ISZERO
+    DUP1  
+    ISZERO
     // stack:                             break?, i  , j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_4_end)
     // stack:                                     i  , j, k, sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(1)
     // stack:                                     i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    DUP1  %mload_kernel_code(power_data_4)
+    DUP1  
+    %mload_kernel_code(power_data_4)
     // stack:                                abc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    DUP1  %lt_const(100)
+    DUP1  
+    %lt_const(100)
     // stack:                         skip?, abc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_4_b)
     // stack:                                abc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(100)
     // stack:                                 bc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_4_b  PUSH 224  DUP1  DUP8
+    %stack () -> (224, 224, power_loop_4_b)
+    // stack:      224, 224, power_loop_4_b,  bc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    DUP8
     // stack: sqr, 224, 224, power_loop_4_b,  bc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_4_b:
     // stack:                               bc, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    DUP1  %lt_const(10)
+    DUP1  
+    %lt_const(10)
     // stack:                        skip?, bc, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_4_c)
     // stack:                               bc, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(10)
     // stack:                                c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_4_c  PUSH 212  DUP1  DUP8
+    %stack () -> (212, 212, power_loop_4_c)
+    // stack:      212, 212, power_loop_4_c, c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    DUP8
     // stack: sqr, 212, 212, power_loop_4_c, c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_4_c:
@@ -147,39 +155,51 @@ power_loop_4_c:
     // stack:                          skip?, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_4_sq)
     // stack:                                 i, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_4_sq  PUSH 200  DUP1  DUP7
+    %stack () -> (200, 200, power_loop_4_sq)
+    // stack:      200, 200, power_loop_4_sq, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    DUP7
     // stack: sqr, 200, 200, power_loop_4_sq, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_4_sq:
     // stack:                         i, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_4  DUP5  DUP1
+    PUSH power_loop_4  
+    // stack:           power_loop_4, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    DUP5  
+    DUP1
     // stack: sqr, sqr, power_loop_4, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(square_fp12)
 power_loop_4_end:
     // stack:                           0, j, k, sqr  {200: y0, 212: y2, 224: y4}
     POP  
     // stack:                              j, k, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_2  PUSH 224  DUP1  DUP6
+    %stack () -> (224, 224, power_loop_2) 
+    // stack:      224, 224, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    DUP6
     // stack: sqr, 224, 224, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 
 power_loop_2:
     // stack:                                   j  , k, sqr  {200: y0, 212: y2, 224: y4}
-    DUP1  ISZERO
+    DUP1  
+    ISZERO
     // stack:                           break?, j  , k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_2_end)
     // stack:                                   j  , k, sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(1)
     // stack:                                   j-1, k, sqr  {200: y0, 212: y2, 224: y4}
-    DUP1  %mload_kernel_code(power_data_2)
+    DUP1  
+    %mload_kernel_code(power_data_2)
     // stack:                               ab, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
-    DUP1  %lt_const(10)
+    DUP1  
+    %lt_const(10)
     // stack:                        skip?, ab, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_2_b)
     // stack:                               ab, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(10)
     // stack:                                b, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_2_b  PUSH 212  DUP1  DUP7
+    %stack () -> (212, 212, power_loop_2_b) 
+    // stack:      212, 212, power_loop_2_b, b, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
+    DUP7
     // stack: sqr, 212, 212, power_loop_2_b, b, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_2_b:
@@ -188,42 +208,56 @@ power_loop_2_b:
     // stack:                          skip?, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_2_sq)
     // stack:                                 j, k, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_2_sq  PUSH 200  DUP1  DUP6
+    %stack () -> (200, 200, power_loop_2_sq) 
+    // stack:      200, 200, power_loop_2_sq, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    DUP6
     // stack: sqr, 200, 200, power_loop_2_sq, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_2_sq:
     // stack:                         j, k, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_2  DUP4  DUP1
+    PUSH power_loop_2  
+    // stack:           power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    DUP4  
+    DUP1
     // stack: sqr, sqr, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(square_fp12)
 power_loop_2_end:
     // stack:                           0, k, sqr  {200: y0, 212: y2, 224: y4}
     POP  
     // stack:                              k, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_0  PUSH 212  DUP1  DUP5
+    %stack () -> (212, 212, power_loop_0)
+    // stack:      212, 212, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
+    DUP5
     // stack: sqr, 212, 212, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 
 power_loop_0:
     // stack:                                 k  , sqr  {200: y0, 212: y2, 224: y4}
-    DUP1  ISZERO
+    DUP1  
+    ISZERO
     // stack:                         break?, k  , sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_0_end)
     // stack:                                 k  , sqr  {200: y0, 212: y2, 224: y4}
     %sub_const(1)
     // stack:                                 k-1, sqr  {200: y0, 212: y2, 224: y4}
-    DUP1  %mload_kernel_code(power_data_0)
+    DUP1  
+    %mload_kernel_code(power_data_0)
     // stack:                              a, k-1, sqr  {200: y0, 212: y2, 224: y4}
     ISZERO
     // stack:                          skip?, k-1, sqr  {200: y0, 212: y2, 224: y4}
     %jumpi(power_loop_0_sq)
     // stack:                                 k-1, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_0_sq  PUSH 200  DUP1  DUP5
+    %stack () -> (200, 200, power_loop_0_sq)  
+    // stack:      200, 200, power_loop_0_sq, k-1, sqr  {200: y0, 212: y2, 224: y4}
+    DUP5
     // stack: sqr, 200, 200, power_loop_0_sq, k-1, sqr  {200: y0, 212: y2, 224: y4}
     %jump(mul_fp12)
 power_loop_0_sq:
     // stack:                         k, sqr  {200: y0, 212: y2, 224: y4}
-    PUSH power_loop_0  DUP3  DUP1
+    PUSH power_loop_0  
+    // stack:           power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
+    DUP3  
+    DUP1
     // stack: sqr, sqr, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
     %jump(square_fp12)
 power_loop_0_end:

From d98c69f0bc909fee68fd51e1f3ed8df4eda3dd47 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 14:12:29 +0700
Subject: [PATCH 157/201] better comments

---
 evm/src/bn254_arithmetic.rs                   | 47 ++++++++++---------
 .../bn254/curve_arithmetic/final_power.asm    |  2 +-
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index fb8277eb..26445f06 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -158,7 +158,7 @@ impl Mul for Fp2 {
 }
 
 impl Fp2 {
-    /// We preemptively define a helper function which multiplies an Fp2 element by 9 + i
+    // We preemptively define a helper function which multiplies an Fp2 element by 9 + i
     fn i9(self) -> Fp2 {
         let nine = Fp::new(9);
         Fp2 {
@@ -167,6 +167,7 @@ impl Fp2 {
         }
     }
 
+    // This function scalar multiplies an Fp2 by an Fp
     pub fn scale(self, x: Fp) -> Fp2 {
         Fp2 {
             re: x * self.re,
@@ -174,7 +175,11 @@ impl Fp2 {
         }
     }
 
-    // This function takes the complex conjugate
+    /// Return the complex conjugate z' of z: Fp2
+    /// This also happens to be the frobenius map 
+    ///     z -> z^p
+    /// since p == 3 mod 4 and hence
+    ///     i^p = i^3 = -i
     fn conj(self) -> Fp2 {
         Fp2 {
             re: self.re,
@@ -182,20 +187,15 @@ impl Fp2 {
         }
     }
 
-    // Return the magnitude of the complex number
-    fn norm(self) -> Fp {
+    // Return the magnitude squared of a complex number
+    fn norm_sq(self) -> Fp {
         self.re * self.re + self.im * self.im
     }
 
-    // This function normalizes the input to the complex unit circle
-    fn normalize(self) -> Fp2 {
-        let norm = self.norm();
-        self.scale(UNIT_FP / norm)
-    }
-    /// The inverse of z is given by z'/||z|| since ||z|| = zz'
+    /// The inverse of z is given by z'/||z||^2 since ||z||^2 = zz'
     pub fn inv(self) -> Fp2 {
         let norm = self.re * self.re + self.im * self.im;
-        self.conj().scale(norm)
+        self.conj().scale(norm.inv())
     }
 }
 
@@ -300,11 +300,10 @@ impl Fp6 {
     ///     x to x^(p^n)
     /// which sends a + bt + ct^2: Fp6 to
     ///     a^(p^n) + b^(p^n) * t^(p^n) + c^(p^n) * t^(2p^n)
-    /// Note that p == 3 mod 4, and i^3 = -i, so x + yi gets mapped to
-    ///     (x + yi)^(p^n) = x^(p^n) + y^(p^n) i^(p^n) = x + y i^(p^n mod 4)
-    /// which reduces to x + yi for n even and x - yi for n odd
-    /// The values of t^(p^n) and t^(2p^n) are precomputed in
-    /// the constant arrays FROB_T1 and FROB_T2
+    /// The Fp2 coefficients are determined by the comment in the conj method, 
+    /// while the values of 
+    ///     t^(p^n) and t^(2p^n) 
+    /// are precomputed in the constant arrays FROB_T1 and FROB_T2
     fn frob(self, n: usize) -> Fp6 {
         let n = n % 6;
         let frob_t1 = FROB_T1[n];
@@ -336,12 +335,15 @@ impl Fp6 {
     ///     (x_1 * x_3) * x_5 * (x_1 * x_3)_1
     /// By Galois theory, the following are in Fp2 and are complex conjugates
     ///     x_1 * x_3 * x_5,  x_0 * x_2 * x_4
-    /// Thus phi = norm(x_1 * x_3 * x_5), and hence the inverse is given by
-    ///     normalize([x_1 * x_3] * x_5) * [x_1 * x_3]_1
+    /// and therefore 
+    ///     phi = ||x_1 * x_3 * x_5||^2
+    /// and hence the inverse is given by
+    ///     ([x_1 * x_3] * x_5) * [x_1 * x_3]_1 / ||[x_1 * x_3] * x_5||^2
     pub fn inv(self) -> Fp6 {
         let prod_13 = self.frob(1) * self.frob(3);
         let prod_135 = (prod_13 * self.frob(5)).t0;
-        let prod_odds_over_phi = prod_135.normalize();
+        let phi = prod_135.norm_sq();
+        let prod_odds_over_phi = prod_135.scale(phi.inv());
         let prod_24 = prod_13.frob(1);
         prod_24.scale(prod_odds_over_phi)
     }
@@ -427,10 +429,11 @@ impl Fp12 {
         let prod_17 = (self.frob(1) * self.frob(7)).z0;
         let prod_1379 = prod_17 * prod_17.frob(2);
         let prod_odds = (prod_1379 * prod_17.frob(4)).t0;
-        let prod_odds_over_phi = prod_odds.normalize();
+        let phi = prod_odds.norm_sq();
+        let prod_odds_over_phi = prod_odds.scale(phi.inv());
         let prod_evens_except_six = prod_1379.frob(1);
-        let prod_penultimate = prod_evens_except_six.scale(prod_odds_over_phi);
-        self.conj().scale(prod_penultimate)
+        let prod_except_six = prod_evens_except_six.scale(prod_odds_over_phi);
+        self.conj().scale(prod_except_six)
     }
 }
 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
index 515fab1b..942ba4fa 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
@@ -1,5 +1,5 @@
 /// def final_exp(y):
-///     y0, y4, y2 = 1, 1, 1
+///     y4, y2, y0 = 1, 1, 1
 ///     power_loop_4()
 ///     power_loop_2()
 ///     power_loop_0()

From 9c8f11666f8d694d2a039a4943ae12125cccaaf6 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 14:15:46 +0700
Subject: [PATCH 158/201] ocd

---
 evm/src/bn254_arithmetic.rs | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 26445f06..72f006a1 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -194,8 +194,8 @@ impl Fp2 {
 
     /// The inverse of z is given by z'/||z||^2 since ||z||^2 = zz'
     pub fn inv(self) -> Fp2 {
-        let norm = self.re * self.re + self.im * self.im;
-        self.conj().scale(norm.inv())
+        let norm_sq = self.norm_sq();
+        self.conj().scale(norm_sq.inv())
     }
 }
 
@@ -278,6 +278,7 @@ impl Mul for Fp6 {
 }
 
 impl Fp6 {
+    // This function scalar multiplies an Fp6 by an Fp2
     fn scale(self, x: Fp2) -> Fp6 {
         Fp6 {
             t0: x * self.t0,
@@ -386,13 +387,7 @@ impl Mul for Fp12 {
 }
 
 impl Fp12 {
-    fn conj(self) -> Fp12 {
-        Fp12 {
-            z0: self.z0,
-            z1: -self.z1,
-        }
-    }
-
+    // This function scalar multiplies an Fp12 by an Fp6
     fn scale(self, x: Fp6) -> Fp12 {
         Fp12 {
             z0: x * self.z0,
@@ -400,6 +395,13 @@ impl Fp12 {
         }
     }
 
+    fn conj(self) -> Fp12 {
+        Fp12 {
+            z0: self.z0,
+            z1: -self.z1,
+        }
+    }
+
     /// The nth frobenius endomorphism of a p^q field is given by mapping
     ///     x to x^(p^n)
     /// which sends a + bz: Fp12 to

From f0a6ec953522b688ef9c83aa33573c2da4f1cad2 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 14:42:30 +0700
Subject: [PATCH 159/201] clean asm

---
 .../bn254/curve_arithmetic/curve_add.asm      |  47 ++++--
 .../bn254/curve_arithmetic/miller_loop.asm    | 146 +++++++++++-------
 .../curve/bn254/field_arithmetic/fp12_mul.asm |  43 ++++--
 3 files changed, 148 insertions(+), 88 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
index e090e4e9..0ac947da 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@@ -4,11 +4,13 @@ global ec_add:
     // stack:                                    x0, y0, x1, y1, retdest
 
     // Check if points are valid BN254 points.
-    DUP2  DUP2    
+    DUP2
+    DUP2    
     // stack:                            x0, y0, x0, y0, x1, y1, retdest
     %ec_check
     // stack:                   isValid(x0, y0), x0, y0, x1, y1, retdest
-    DUP5  DUP5    
+    DUP5
+    DUP5    
     // stack:         x1, y1  , isValid(x0, y0), x0, y0, x1, y1, retdest
     %ec_check
     // stack: isValid(x1, y1) , isValid(x0, y0), x0, y0, x1, y1, retdest
@@ -28,7 +30,8 @@ global ec_add_valid_points:
     // stack:                   x0, y0, x1, y1, retdest
 
     // Check if the first point is the identity.
-    DUP2  DUP2
+    DUP2
+    DUP2
     // stack:           x0,y0 , x0, y0, x1, y1, retdest
     %ec_isidentity
     // stack:   (0,0)==(x0,y0), x0, y0, x1, y1, retdest
@@ -36,7 +39,8 @@ global ec_add_valid_points:
     // stack:                   x0, y0, x1, y1, retdest
 
     // Check if the second point is the identity.
-    DUP4  DUP4    
+    DUP4
+    DUP4    
     // stack:           x1,y1 , x0, y0, x1, y1, retdest
     %ec_isidentity
     // stack:   (0,0)==(x1,y1), x0, y0, x1, y1, retdest
@@ -44,7 +48,8 @@ global ec_add_valid_points:
     // stack:                   x0, y0, x1, y1, retdest
 
     // Check if both points have the same x-coordinate.
-    DUP3  DUP2    
+    DUP3
+    DUP2    
     // stack:         x0 ,  x1, x0, y0, x1, y1, retdest
     EQ
     // stack:         x0 == x1, x0, y0, x1, y1, retdest
@@ -54,11 +59,13 @@ global ec_add_valid_points:
     // stack:                   x0, y0, x1, y1, retdest
     // Otherwise, we can use the standard formula.
     // Compute lambda = (y0 - y1)/(x0 - x1)
-    DUP4  DUP3
+    DUP4
+    DUP3
     // stack:          y0 , y1, x0, y0, x1, y1, retdest
     SUBFP254
     // stack:          y0 - y1, x0, y0, x1, y1, retdest
-    DUP4  DUP3
+    DUP4
+    DUP3
     // stack: x0 , x1, y0 - y1, x0, y0, x1, y1, retdest
     SUBFP254
     // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest
@@ -88,11 +95,13 @@ ec_add_valid_points_with_lambda:
     // stack:                             lambda, x0, y0, x1, y1, retdest
 
     // Compute x2 = lambda^2 - x1 - x0
-    DUP2  DUP5
+    DUP2
+    DUP5
     // stack:                     x1, x0, lambda, x0, y0, x1, y1, retdest
     DUP3
     // stack:          lambda   , x1, x0, lambda, x0, y0, x1, y1, retdest
-    DUP1  MULFP254
+    DUP1
+    MULFP254
     // stack:          lambda^2 , x1, x0, lambda, x0, y0, x1, y1, retdest
     SUBFP254
     // stack:          lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest
@@ -127,7 +136,8 @@ ec_add_equal_first_coord:
     // stack:           x0, y0, x1, y1, retdest with x0 == x1
 
     // Check if the points are equal
-    DUP2  DUP5
+    DUP2
+    DUP5
     // stack: y1  , y0, x0, y0, x1, y1, retdest
     EQ
     // stack: y1 == y0, x0, y0, x1, y1, retdest
@@ -153,7 +163,8 @@ ec_add_equal_points:
 
     DUP1
     // stack:           x0  , x0, y0, x1, y1, retdest
-    DUP1  MULFP254
+    DUP1
+    MULFP254
     // stack:           x0^2, x0, y0, x1, y1, retdest
     %bn_3_over_2
     // stack:     3/2 , x0^2, x0, y0, x1, y1, retdest
@@ -170,7 +181,8 @@ ec_add_equal_points:
 // Standard doubling formula.
 global ec_double:
     // stack:         x0, y0, retdest
-    DUP2  DUP2    
+    DUP2
+    DUP2    
     // stack: x0, y0, x0, y0, retdest
     %jump(ec_add_equal_points)
 
@@ -213,13 +225,18 @@ global ec_double:
     // stack:                y, x, range
     DUP2 
     // stack:           x  , y, x, range
-    DUP1  DUP1  MULFP254  MULFP254
+    DUP1 
+    DUP1
+    MULFP254
+    MULFP254
     // stack:           x^3, y, x, range
-    PUSH 3  ADDFP254
+    PUSH 3
+    ADDFP254
     // stack:       3 + x^3, y, x, range
     DUP2
     // stack:  y  , 3 + x^3, y, x, range
-    DUP1  MULFP254
+    DUP1
+    MULFP254
     // stack:  y^2, 3 + x^3, y, x, range
     EQ
     // stack:         curve, y, x, range
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 0c92143b..cd13f80e 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -27,23 +27,20 @@
 
 global miller:
     // stack:         ptr, out, retdest
-    PUSH 1
-    // stack:      1, ptr, out, retdest
-    DUP3
+    %stack (ptr, out) -> (out, 1, ptr, out)
     // stack: out, 1, ptr, out, retdest
     %mstore_kernel_general
     // stack:         ptr, out, retdest
     %load_fp6
     // stack:        P, Q, out, retdest
-    DUP2  DUP2
-    // stack:     O, P, Q, out, retdest
-    PUSH 53
-    // stack: 53, O, P, Q, out, retdest
-    PUSH 0 // this placeholder lets miller_loop start with POP
+    %stack (P: 2) -> (0, 53, P, P)
+    // stack: 0, 53, O, P, Q, out, retdest
+    // the head 0 lets miller_loop start with POP
 global miller_loop:
     POP
     // stack:          times  , O, P, Q, out, retdest
-    DUP1  ISZERO
+    DUP1  
+    ISZERO
     // stack:  break?, times  , O, P, Q, out, retdest
     %jumpi(miller_return)
     // stack:          times  , O, P, Q, out, retdest
@@ -56,13 +53,14 @@ global miller_loop:
     %jump(miller_one)
 miller_return:
     // stack: times, O, P, Q, out, retdest
-    POP  %pop2  %pop2  %pop4  POP
+    %stack (times, O: 2, P: 2, Q: 4, out, retdest) -> (retdest)
     // stack:                      retdest
     JUMP 
 
 miller_one:
     // stack:               0xnm, times, O, P, Q, out, retdest
-    DUP1  %lt_const(0x20) 
+    DUP1  
+    %lt_const(0x20) 
     // stack:        skip?, 0xnm, times, O, P, Q, out, retdest
     %jumpi(miller_zero)
     // stack:               0xnm, times, O, P, Q, out, retdest
@@ -74,7 +72,8 @@ miller_one:
 
 miller_zero:
     // stack:              m  , times, O, P, Q, out, retdest
-    DUP1  ISZERO
+    DUP1  
+    ISZERO
     // stack:       skip?, m  , times, O, P, Q, out, retdest
     %jumpi(miller_loop)
     // stack:              m  , times, O, P, Q, out, retdest
@@ -93,32 +92,42 @@ miller_zero:
 
 mul_tangent:
     // stack:                                              retdest, 0xnm, times, O, P, Q, out
-    PUSH mul_tangent_2  DUP13  PUSH mul_tangent_1
+    PUSH mul_tangent_2  
+    DUP13  
+    PUSH mul_tangent_1
     // stack:           mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP2  DUP1
+    %stack (mul_tangent_1, out) -> (out, out, mul_tangent_1, out)
     // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %jump(square_fp12)
 mul_tangent_1:
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP13  DUP13  DUP13  DUP13
+    DUP13
+    DUP13
+    DUP13
+    DUP13
     // stack:        Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP11  DUP11
+    DUP11  
+    DUP11
     // stack:     O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %tangent
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
-    PUSH 100  DUP2
+    %stack (out) -> (out, 100, out)
     // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
     %jump(mul_fp12_sparse)
 mul_tangent_2:
     // stack:                  retdest, 0xnm, times,   O, P, Q, out  {100: line}
     PUSH after_double
     // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    DUP6  DUP6
+    DUP6  
+    DUP6
     // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
     %jump(ec_double)
 after_double:
     // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    SWAP5  POP  SWAP5  POP
+    SWAP5
+    POP
+    SWAP5
+    POP
     // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
     JUMP
 
@@ -131,31 +140,40 @@ mul_cord:
     // stack:                            0xnm, times, O, P, Q, out
     PUSH mul_cord_1
     // stack:                mul_cord_1, 0xnm, times, O, P, Q, out
-    DUP11  DUP11  DUP11  DUP11
+    DUP11  
+    DUP11  
+    DUP11  
+    DUP11
     // stack:             Q, mul_cord_1, 0xnm, times, O, P, Q, out
-    DUP9  DUP9
+    DUP9  
+    DUP9
     // stack:          O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
-    DUP13  DUP13
+    DUP13  
+    DUP13
     // stack:       P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
     %cord 
     // stack:                mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     DUP12
     // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
-    PUSH 100
-    // stack:      100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
-    DUP2
+    %stack (out) -> (out, 100, out)
     // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     %jump(mul_fp12_sparse)
 mul_cord_1:
     // stack:                   0xnm, times, O  , P, Q, out
     PUSH after_add
     // stack:        after_add, 0xnm, times, O  , P, Q, out
-    DUP7  DUP7  DUP7  DUP7
+    DUP7  
+    DUP7  
+    DUP7  
+    DUP7
     // stack: O , P, after_add, 0xnm, times, O  , P, Q, out
     %jump(ec_add_valid_points)
 after_add:
     // stack:            O + P, 0xnm, times, O  , P, Q, out
-    SWAP4  POP  SWAP4  POP
+    SWAP4
+    POP
+    SWAP4
+    POP
     // stack:                   0xnm, times, O+P, P, Q, out
     %jump(miller_one)
 
@@ -169,38 +187,42 @@ after_add:
 
 %macro tangent
     // stack:                px, py, qx, qx_,  qy, qy_
-    PUSH 9
-    // stack:             9, px, py, qx, qx_,  qy, qy_
-    DUP3
-    // stack:        py , 9, px, py, qx, qx_,  qy, qy_
-    DUP1  MULFP254
-    // stack:     py**2 , 9, px, py, qx, qx_,  qy, qy_
+    %stack (px, py) -> (py, py , 9, px, py)
+    // stack:    py, py , 9, px, py, qx, qx_,  qy, qy_
+    MULFP254
+    // stack:      py^2 , 9, px, py, qx, qx_,  qy, qy_
     SUBFP254
-    // stack:     py**2 - 9, px, py, qx, qx_,  qy, qy_
+    // stack:      py^2 - 9, px, py, qx, qx_,  qy, qy_
     %mstore_kernel_general(100)
     // stack:                px, py, qx, qx_,  qy, qy_
-    DUP1  MULFP254
-    // stack:             px**2, py, qx, qx_,  qy, qy_
-    PUSH 3  MULFP254
-    // stack:           3*px**2, py, qx, qx_,  qy, qy_
-    PUSH 0  SUBFP254
-    // stack:          -3*px**2, py, qx, qx_,  qy, qy_
-    SWAP2
-    // stack:           qx, py, -3px**2, qx_,  qy, qy_
-    DUP3  MULFP254
-    // stack: (-3*px**2)qx, py, -3px**2, qx_,  qy, qy_ 
-    %mstore_kernel_general(102)
-    // stack:               py, -3px**2, qx_,  qy, qy_ 
-    PUSH 2  MULFP254
-    // stack:              2py, -3px**2, qx_,  qy, qy_ 
-    SWAP3 
-    // stack:               qy, -3px**2, qx_, 2py, qy_ 
-    DUP4  MULFP254
-    // stack:          (2py)qy, -3px**2, qx_, 2py, qy_ 
-    %mstore_kernel_general(108)
-    // stack:                   -3px**2, qx_, 2py, qy_ 
+    DUP1  
     MULFP254
-    // stack:                  (-3px**2)*qx_, 2py, qy_ 
+    // stack:              px^2, py, qx, qx_,  qy, qy_
+    PUSH 3  
+    MULFP254
+    // stack:            3*px^2, py, qx, qx_,  qy, qy_
+    PUSH 0  
+    SUBFP254
+    // stack:           -3*px^2, py, qx, qx_,  qy, qy_
+    SWAP2
+    // stack:            qx, py, -3px^2, qx_,  qy, qy_
+    DUP3  
+    MULFP254
+    // stack:   (-3*px^2)qx, py, -3px^2, qx_,  qy, qy_ 
+    %mstore_kernel_general(102)
+    // stack:                py, -3px^2, qx_,  qy, qy_ 
+    PUSH 2  
+    MULFP254
+    // stack:               2py, -3px^2, qx_,  qy, qy_ 
+    SWAP3 
+    // stack:                qy, -3px^2, qx_, 2py, qy_ 
+    DUP4  
+    MULFP254
+    // stack:           (2py)qy, -3px^2, qx_, 2py, qy_ 
+    %mstore_kernel_general(108)
+    // stack:                    -3px^2, qx_, 2py, qy_ 
+    MULFP254
+    // stack:                   (-3px^2)*qx_, 2py, qy_ 
     %mstore_kernel_general(103)
     // stack:                                 2py, qy_ 
     MULFP254
@@ -217,9 +239,13 @@ after_add:
 
 %macro cord
     // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    DUP1  DUP5  MULFP254
+    DUP1  
+    DUP5  
+    MULFP254
     // stack:           p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    DUP3  DUP5  MULFP254
+    DUP3  
+    DUP5  
+    MULFP254
     // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     SUBFP254
     // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
@@ -235,13 +261,15 @@ after_add:
     // stack:                    p1x - p2x, p2y - p1y, qx, qx_, qy, qy_
     SWAP4
     // stack:                    qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
-    DUP5  MULFP254
+    DUP5
+    MULFP254
     // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
     %mstore_kernel_general(108)
     // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
     SWAP1
     // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
-    DUP2  MULFP254
+    DUP2
+    MULFP254
     // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
     %mstore_kernel_general(102)
     // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index ee8804c7..3069107f 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -45,11 +45,13 @@
 
 global mul_fp12:
     // stack:                                inA, inB, out 
-    DUP1  %offset_fp6 
+    DUP1  
+    %offset_fp6 
     // stack:                          inA', inA, inB, out 
     %load_fp6
     // stack:                            f', inA, inB, out 
-    DUP8  %offset_fp6
+    DUP8  
+    %offset_fp6
     // stack:                      inB', f', inA, inB, out 
     %load_fp6
     // stack:                        g', f', inA, inB, out 
@@ -118,7 +120,8 @@ mul_fp12_3:
     // stack:          f'g'+fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %subr_fp6
     // stack:       (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
-    DUP14  %offset_fp6 
+    DUP14  
+    %offset_fp6 
     // stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
     %store_fp6
     // stack:                                 fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
@@ -130,7 +133,8 @@ mul_fp12_3:
     // stack:                 out, sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %store_fp6
     // stack:                                     inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %pop2  JUMP
+    %pop2  
+    JUMP
 
 
 //////////////////////////////////////
@@ -178,7 +182,8 @@ mul_fp12_3:
 
 global mul_fp12_sparse:
     // stack:                                                                    inA, inB, out
-    DUP1  %offset_fp6
+    DUP1  
+    %offset_fp6
     // stack:                                                              inA', inA, inB, out
     %load_fp6
     // stack:                                                                f', inA, inB, out
@@ -210,7 +215,8 @@ global mul_fp12_sparse:
     // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     %swap_fp6
     // stack:                    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    DUP13  %add_const(8)
+    DUP13
+    %add_const(8)
     // stack:           inB2,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
     %load_fp2
     // stack:           G2  ,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
@@ -238,7 +244,8 @@ global mul_fp12_sparse:
     // stack:                                          g0 * f', out, f, inB, f', inA, inB, out
     %swap_fp6_hole
     // stack:                                        f  , out, g0 * f', inB, f', inA, inB, out
-    DUP14  %add_const(8)
+    DUP14
+    %add_const(8)
     // stack:                               inB2,    f  , out, g0 * f', inB, f', inA, inB, out
     %load_fp2
     // stack:                                G2 ,    f  , out, g0 * f', inB, f', inA, inB, out
@@ -248,7 +255,8 @@ global mul_fp12_sparse:
     // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
     %swap_fp6_hole
     // stack:                                    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
-    DUP7  %add_const(2)
+    DUP7
+    %add_const(2)
     // stack:                           inB1,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
     %load_fp2
     // stack:                            G1 ,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
@@ -256,11 +264,13 @@ global mul_fp12_sparse:
     // stack:                            G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
     %add_fp6_hole
     // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
-    DUP9  %offset_fp6
+    DUP9
+    %offset_fp6
     // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
     %store_fp6
     // stack:                                                                    inA, inB, out
-    %pop3  JUMP
+    %pop3
+    JUMP
 
 
 /////////////////////////
@@ -324,11 +334,13 @@ global square_fp12:
     // stack:                                  square_fp12_2, inp, f, square_fp12_3, out 
     %dup_fp6_2
     // stack:                              f , square_fp12_2, inp, f, square_fp12_3, out
-    DUP16  %offset_fp6
+    DUP16
+    %offset_fp6
     // stack:                        out', f , square_fp12_2, inp, f, square_fp12_3, out
     PUSH square_fp12_1
     // stack:         square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
-    DUP10  %offset_fp6
+    DUP10
+    %offset_fp6
     // stack:   inp', square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
     %load_fp6
     // stack:     f', square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
@@ -352,7 +364,9 @@ square_fp12_2:
     // stack:                                       sh(f'f'), inp, f, square_fp12_3, out
     %swap_fp6_hole
     // stack:                                       f, inp, sh(f'f'), square_fp12_3, out
-    SWAP6  SWAP13  SWAP6
+    SWAP6
+    SWAP13
+    SWAP6
     // stack:                                       f, square_fp12_3, sh(f'f'), inp, out
     %jump(square_fp6)
 square_fp12_3:
@@ -363,4 +377,5 @@ square_fp12_3:
     // stack:                                               out, ff + sh(f'f'), inp, out
     %store_fp6
     // stack:                                                                   inp, out
-    %pop2  JUMP
+    %pop2
+    JUMP

From f70243e70c0fdfd5532e15fe1d4e257e958f75ba Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 15:31:32 +0700
Subject: [PATCH 160/201] better comments

---
 evm/src/bn254_arithmetic.rs |  10 +-
 evm/src/bn254_pairing.rs    | 302 +++++++++++++++++++-----------------
 2 files changed, 164 insertions(+), 148 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 72f006a1..3db48591 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -176,7 +176,7 @@ impl Fp2 {
     }
 
     /// Return the complex conjugate z' of z: Fp2
-    /// This also happens to be the frobenius map 
+    /// This also happens to be the frobenius map
     ///     z -> z^p
     /// since p == 3 mod 4 and hence
     ///     i^p = i^3 = -i
@@ -301,9 +301,9 @@ impl Fp6 {
     ///     x to x^(p^n)
     /// which sends a + bt + ct^2: Fp6 to
     ///     a^(p^n) + b^(p^n) * t^(p^n) + c^(p^n) * t^(2p^n)
-    /// The Fp2 coefficients are determined by the comment in the conj method, 
-    /// while the values of 
-    ///     t^(p^n) and t^(2p^n) 
+    /// The Fp2 coefficients are determined by the comment in the conj method,
+    /// while the values of
+    ///     t^(p^n) and t^(2p^n)
     /// are precomputed in the constant arrays FROB_T1 and FROB_T2
     fn frob(self, n: usize) -> Fp6 {
         let n = n % 6;
@@ -336,7 +336,7 @@ impl Fp6 {
     ///     (x_1 * x_3) * x_5 * (x_1 * x_3)_1
     /// By Galois theory, the following are in Fp2 and are complex conjugates
     ///     x_1 * x_3 * x_5,  x_0 * x_2 * x_4
-    /// and therefore 
+    /// and therefore
     ///     phi = ||x_1 * x_3 * x_5||^2
     /// and hence the inverse is given by
     ///     ([x_1 * x_3] * x_5) * [x_1 * x_3]_1 / ||[x_1 * x_3] * x_5||^2
diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 2df002c1..79c9b43e 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -129,158 +129,26 @@ pub fn invariance_inducing_power(f: Fp12) -> Fp12 {
     y.frob(3) * y_a2.frob(2) * y_a1.frob(1) * y_a0
 }
 
-/// Given an f: Fp12, this function computes
-///     y^a2, y^(-a1), y^(-a0)
-/// by first computing
+/// We first together (so as to avoid repeated steps) compute
 ///     y^a4, y^a2, y^a0
 /// where a1 is given by
 ///     a1 = a4 + 2a2 - a0
-/// thus what remains is inverting y^a0 and returning
-///     y^a2, y^a4 * y^a2 * y^a2 * y^(-a0), y^(-a0)
+/// we then invert y^a0 and return
+///     y^a2, y^a1 = y^a4 * y^a2 * y^a2 * y^(-a0), y^(-a0)
+///
+/// Represent a4, a2, a0 in *little endian* binary, define
+///     EXPS4 = [(a4[i], a2[i], a0[i]) for i in       0..len(a4)]
+///     EXPS2 = [       (a2[i], a0[i]) for i in len(a4)..len(a2)]
+///     EXPS0 = [               a0[i]  for i in len(a2)..len(a0)]
 fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
-    const EXPS4: [(usize, usize, usize); 64] = [
-        (1, 1, 0),
-        (1, 1, 1),
-        (1, 1, 1),
-        (0, 0, 0),
-        (0, 0, 1),
-        (1, 0, 1),
-        (0, 1, 0),
-        (1, 0, 1),
-        (1, 1, 0),
-        (1, 0, 1),
-        (0, 1, 0),
-        (1, 1, 0),
-        (1, 1, 0),
-        (1, 1, 0),
-        (0, 1, 0),
-        (0, 1, 0),
-        (0, 0, 1),
-        (1, 0, 1),
-        (1, 1, 0),
-        (0, 1, 0),
-        (1, 1, 0),
-        (1, 1, 0),
-        (1, 1, 0),
-        (0, 0, 1),
-        (0, 0, 1),
-        (1, 0, 1),
-        (1, 0, 1),
-        (1, 1, 0),
-        (1, 0, 0),
-        (1, 1, 0),
-        (0, 1, 0),
-        (1, 1, 0),
-        (1, 0, 0),
-        (0, 1, 0),
-        (0, 0, 0),
-        (1, 0, 0),
-        (1, 0, 0),
-        (1, 0, 1),
-        (0, 0, 1),
-        (0, 1, 1),
-        (0, 0, 1),
-        (0, 1, 1),
-        (0, 1, 1),
-        (0, 0, 0),
-        (1, 1, 1),
-        (1, 0, 1),
-        (1, 0, 1),
-        (0, 1, 1),
-        (1, 0, 1),
-        (0, 1, 1),
-        (0, 1, 1),
-        (1, 1, 0),
-        (1, 1, 0),
-        (1, 1, 0),
-        (1, 0, 0),
-        (0, 0, 1),
-        (1, 0, 0),
-        (0, 0, 1),
-        (1, 0, 1),
-        (1, 1, 0),
-        (1, 1, 1),
-        (0, 1, 1),
-        (0, 1, 0),
-        (1, 1, 1),
-    ];
-
-    const EXPS2: [(usize, usize); 62] = [
-        (1, 0),
-        (1, 1),
-        (0, 0),
-        (1, 0),
-        (1, 0),
-        (1, 1),
-        (1, 0),
-        (1, 1),
-        (1, 0),
-        (0, 1),
-        (0, 1),
-        (1, 1),
-        (1, 1),
-        (0, 0),
-        (1, 1),
-        (0, 0),
-        (0, 0),
-        (0, 1),
-        (0, 1),
-        (1, 1),
-        (1, 1),
-        (1, 1),
-        (0, 1),
-        (1, 1),
-        (0, 0),
-        (1, 1),
-        (1, 0),
-        (1, 1),
-        (0, 0),
-        (1, 1),
-        (1, 1),
-        (1, 0),
-        (0, 0),
-        (0, 1),
-        (0, 0),
-        (1, 1),
-        (0, 1),
-        (0, 0),
-        (1, 0),
-        (0, 1),
-        (0, 1),
-        (1, 0),
-        (0, 1),
-        (0, 0),
-        (0, 0),
-        (0, 0),
-        (0, 1),
-        (1, 0),
-        (1, 1),
-        (0, 1),
-        (1, 1),
-        (1, 0),
-        (0, 1),
-        (0, 0),
-        (1, 0),
-        (0, 1),
-        (1, 0),
-        (1, 1),
-        (1, 0),
-        (1, 1),
-        (0, 1),
-        (1, 1),
-    ];
-
-    const EXPS0: [usize; 65] = [
-        0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,
-        0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1,
-        0, 0, 1, 1, 0,
-    ];
-
     let mut sq: Fp12 = f;
     let mut y0: Fp12 = UNIT_FP12;
     let mut y2: Fp12 = UNIT_FP12;
     let mut y4: Fp12 = UNIT_FP12;
 
+    // proceed via standard squaring algorithm for exponentiation
+
+    // must keep multiplying all three values: a4, a2, a0
     for (a, b, c) in EXPS4 {
         if a != 0 {
             y4 = y4 * sq;
@@ -293,8 +161,10 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
         }
         sq = sq * sq;
     }
+    // leading term of a4 is always 1
     y4 = y4 * sq;
 
+    // must keep multiplying remaining two values: a2, a0
     for (a, b) in EXPS2 {
         if a != 0 {
             y2 = y2 * sq;
@@ -304,17 +174,23 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
         }
         sq = sq * sq;
     }
+    // leading term of a2 is always 1
     y2 = y2 * sq;
 
+    // must keep multiplying remaining value: a0
     for a in EXPS0 {
         if a != 0 {
             y0 = y0 * sq;
         }
         sq = sq * sq;
     }
+    // leading term of a0 is always 1
     y0 = y0 * sq;
 
+    // invert y0 to compute y^(-a0)
     let y0_inv = y0.inv();
+
+    // return y2, y1 = y4 * y2^2 * y^(-a0), y^(-a0)
     (y2, y4 * y2 * y2 * y0_inv, y0_inv)
 }
 
@@ -369,3 +245,143 @@ pub const TWISTED_GENERATOR: TwistedCurve = {
         },
     }
 };
+
+/// The folowing constants are defined above get_custom_powers
+
+const EXPS4: [(usize, usize, usize); 64] = [
+    (1, 1, 0),
+    (1, 1, 1),
+    (1, 1, 1),
+    (0, 0, 0),
+    (0, 0, 1),
+    (1, 0, 1),
+    (0, 1, 0),
+    (1, 0, 1),
+    (1, 1, 0),
+    (1, 0, 1),
+    (0, 1, 0),
+    (1, 1, 0),
+    (1, 1, 0),
+    (1, 1, 0),
+    (0, 1, 0),
+    (0, 1, 0),
+    (0, 0, 1),
+    (1, 0, 1),
+    (1, 1, 0),
+    (0, 1, 0),
+    (1, 1, 0),
+    (1, 1, 0),
+    (1, 1, 0),
+    (0, 0, 1),
+    (0, 0, 1),
+    (1, 0, 1),
+    (1, 0, 1),
+    (1, 1, 0),
+    (1, 0, 0),
+    (1, 1, 0),
+    (0, 1, 0),
+    (1, 1, 0),
+    (1, 0, 0),
+    (0, 1, 0),
+    (0, 0, 0),
+    (1, 0, 0),
+    (1, 0, 0),
+    (1, 0, 1),
+    (0, 0, 1),
+    (0, 1, 1),
+    (0, 0, 1),
+    (0, 1, 1),
+    (0, 1, 1),
+    (0, 0, 0),
+    (1, 1, 1),
+    (1, 0, 1),
+    (1, 0, 1),
+    (0, 1, 1),
+    (1, 0, 1),
+    (0, 1, 1),
+    (0, 1, 1),
+    (1, 1, 0),
+    (1, 1, 0),
+    (1, 1, 0),
+    (1, 0, 0),
+    (0, 0, 1),
+    (1, 0, 0),
+    (0, 0, 1),
+    (1, 0, 1),
+    (1, 1, 0),
+    (1, 1, 1),
+    (0, 1, 1),
+    (0, 1, 0),
+    (1, 1, 1),
+];
+
+const EXPS2: [(usize, usize); 62] = [
+    (1, 0),
+    (1, 1),
+    (0, 0),
+    (1, 0),
+    (1, 0),
+    (1, 1),
+    (1, 0),
+    (1, 1),
+    (1, 0),
+    (0, 1),
+    (0, 1),
+    (1, 1),
+    (1, 1),
+    (0, 0),
+    (1, 1),
+    (0, 0),
+    (0, 0),
+    (0, 1),
+    (0, 1),
+    (1, 1),
+    (1, 1),
+    (1, 1),
+    (0, 1),
+    (1, 1),
+    (0, 0),
+    (1, 1),
+    (1, 0),
+    (1, 1),
+    (0, 0),
+    (1, 1),
+    (1, 1),
+    (1, 0),
+    (0, 0),
+    (0, 1),
+    (0, 0),
+    (1, 1),
+    (0, 1),
+    (0, 0),
+    (1, 0),
+    (0, 1),
+    (0, 1),
+    (1, 0),
+    (0, 1),
+    (0, 0),
+    (0, 0),
+    (0, 0),
+    (0, 1),
+    (1, 0),
+    (1, 1),
+    (0, 1),
+    (1, 1),
+    (1, 0),
+    (0, 1),
+    (0, 0),
+    (1, 0),
+    (0, 1),
+    (1, 0),
+    (1, 1),
+    (1, 0),
+    (1, 1),
+    (0, 1),
+    (1, 1),
+];
+
+const EXPS0: [usize; 65] = [
+    0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0,
+    1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
+    0,
+];

From b46af11f3a6fb79c0ed96bd7a6c03724f9ba0104 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 16:10:53 +0700
Subject: [PATCH 161/201] move consts

---
 evm/src/bn254_pairing.rs          | 54 +----------------------------
 evm/src/cpu/kernel/tests/bn254.rs | 56 +++++++++++++++++++++++++++++--
 2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 79c9b43e..f67cb020 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -194,59 +194,7 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     (y2, y4 * y2 * y2 * y0_inv, y0_inv)
 }
 
-// The curve is cyclic with generator (1, 2)
-pub const CURVE_GENERATOR: Curve = {
-    Curve {
-        x: Fp { val: U256::one() },
-        y: Fp {
-            val: U256([2, 0, 0, 0]),
-        },
-    }
-};
-
-// The twisted curve is cyclic with generator (x, y) as follows
-pub const TWISTED_GENERATOR: TwistedCurve = {
-    TwistedCurve {
-        x: Fp2 {
-            re: Fp {
-                val: U256([
-                    0x46debd5cd992f6ed,
-                    0x674322d4f75edadd,
-                    0x426a00665e5c4479,
-                    0x1800deef121f1e76,
-                ]),
-            },
-            im: Fp {
-                val: U256([
-                    0x97e485b7aef312c2,
-                    0xf1aa493335a9e712,
-                    0x7260bfb731fb5d25,
-                    0x198e9393920d483a,
-                ]),
-            },
-        },
-        y: Fp2 {
-            re: Fp {
-                val: U256([
-                    0x4ce6cc0166fa7daa,
-                    0xe3d1e7690c43d37b,
-                    0x4aab71808dcb408f,
-                    0x12c85ea5db8c6deb,
-                ]),
-            },
-            im: Fp {
-                val: U256([
-                    0x55acdadcd122975b,
-                    0xbc4b313370b38ef3,
-                    0xec9e99ad690c3395,
-                    0x090689d0585ff075,
-                ]),
-            },
-        },
-    }
-};
-
-/// The folowing constants are defined above get_custom_powers
+// The folowing constants are defined above get_custom_powers
 
 const EXPS4: [(usize, usize, usize); 64] = [
     (1, 1, 0),
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index a801300c..2d695f70 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -4,8 +4,8 @@ use std::ops::Range;
 use anyhow::Result;
 use ethereum_types::U256;
 
-use crate::bn254_arithmetic::{gen_fp12, Fp12};
-use crate::bn254_pairing::{gen_fp12_sparse, tate, CURVE_GENERATOR, TWISTED_GENERATOR};
+use crate::bn254_arithmetic::{gen_fp12, Fp, Fp12, Fp2};
+use crate::bn254_pairing::{gen_fp12_sparse, tate, Curve, TwistedCurve};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
 use crate::memory::segments::Segment;
@@ -204,6 +204,58 @@ fn test_inv_fp12() -> Result<()> {
 //     Ok(())
 // }
 
+// The curve is cyclic with generator (1, 2)
+pub const CURVE_GENERATOR: Curve = {
+    Curve {
+        x: Fp { val: U256::one() },
+        y: Fp {
+            val: U256([2, 0, 0, 0]),
+        },
+    }
+};
+
+// The twisted curve is cyclic with generator (x, y) as follows
+pub const TWISTED_GENERATOR: TwistedCurve = {
+    TwistedCurve {
+        x: Fp2 {
+            re: Fp {
+                val: U256([
+                    0x46debd5cd992f6ed,
+                    0x674322d4f75edadd,
+                    0x426a00665e5c4479,
+                    0x1800deef121f1e76,
+                ]),
+            },
+            im: Fp {
+                val: U256([
+                    0x97e485b7aef312c2,
+                    0xf1aa493335a9e712,
+                    0x7260bfb731fb5d25,
+                    0x198e9393920d483a,
+                ]),
+            },
+        },
+        y: Fp2 {
+            re: Fp {
+                val: U256([
+                    0x4ce6cc0166fa7daa,
+                    0xe3d1e7690c43d37b,
+                    0x4aab71808dcb408f,
+                    0x12c85ea5db8c6deb,
+                ]),
+            },
+            im: Fp {
+                val: U256([
+                    0x55acdadcd122975b,
+                    0xbc4b313370b38ef3,
+                    0xec9e99ad690c3395,
+                    0x090689d0585ff075,
+                ]),
+            },
+        },
+    }
+};
+
 #[test]
 fn test_tate() -> Result<()> {
     let ptr: usize = 300;

From 56be7317e0a826a46a99cd07fae786116a629cac Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 16:20:24 +0700
Subject: [PATCH 162/201] comments

---
 evm/src/bn254_pairing.rs                      | 27 ++++++++++---------
 .../bn254/curve_arithmetic/constants.asm      | 13 +++++++--
 2 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index f67cb020..be6c1348 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -45,18 +45,6 @@ pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
 }
 
 pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
-    const EXP: [usize; 253] = [
-        1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1,
-        1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
-        1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
-        1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
-        1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,
-        1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
-        0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
-        1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    ];
-
     let mut o = p;
     let mut acc = UNIT_FP12;
     let mut line;
@@ -87,6 +75,8 @@ pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
     sparse_embed(p1.y * p2.x - p2.y * p1.x, q.x.scale(cx), q.y.scale(cy))
 }
 
+/// The tangent and cord functions output sparse Fp12 elements.
+/// This map embeds the nonzero coefficients into an Fp12.
 pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
     let g0 = Fp6 {
         t0: Fp2 {
@@ -194,6 +184,19 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     (y2, y4 * y2 * y2 * y0_inv, y0_inv)
 }
 
+
+const EXP: [usize; 253] = [
+    1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1,
+    1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
+    1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
+    1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
+    1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,
+    1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
+    0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
+    1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+];
+
 // The folowing constants are defined above get_custom_powers
 
 const EXPS4: [(usize, usize, usize); 64] = [
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
index 93878b57..ba0631fd 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@@ -1,10 +1,10 @@
-/// the following is defined by
+/// miller_data is defined by
 /// (1) taking the binary expansion of N254, the order of the elliptic curve group
 /// (2) popping the first and last elements, then appending a 0:
 ///     exp = bin(N254)[1:-1] + [0]
 /// (3) counting the lengths of runs of 1s then 0s in exp, e.g.
 ///     if exp = 1100010011110, then EXP = [(2,3), (1,2), (4,1)]
-/// (4) encoding each pair (n,m) as 0xnm:
+/// (4) byte encoding each pair (n,m) as follows:
 ///     miller_data = [(0x20)n + m for (n,m) in EXP]
 
 global miller_data:
@@ -23,6 +23,15 @@ global miller_data:
     BYTES 0x62, 0x22, 0x23, 0x42
     BYTES 0x25
 
+
+/// final_exp first computes y^a4, y^a2, y^a0
+/// representing a4, a2, a0 in *little endian* binary, define
+///     EXPS4 = [(a4[i], a2[i], a0[i]) for i in       0..len(a4)]
+///     EXPS2 = [       (a2[i], a0[i]) for i in len(a4)..len(a2)]
+///     EXPS0 = [               a0[i]  for i in len(a2)..len(a0)]
+/// power_data_n is simply a reverse-order byte encoding of EXPSn
+///     where (i,j,k) is sent to (0b100)i + (0b10)j + k
+
 global power_data_4:
     BYTES 111, 010, 011, 111
     BYTES 110, 101, 001, 100

From 6958d46c56b3fffd192f9f0107c8e23b668a4469 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 16:26:41 +0700
Subject: [PATCH 163/201] names and comments

---
 evm/src/bn254_pairing.rs                      |  2 -
 .../bn254/curve_arithmetic/final_power.asm    |  2 +-
 .../bn254/curve_arithmetic/tate_pairing.asm   |  2 +-
 .../curve/bn254/field_arithmetic/inverse.asm  | 59 ++++---------------
 .../asm/curve/bn254/field_arithmetic/util.asm | 15 +++++
 .../kernel/asm/curve/secp256k1/ecrecover.asm  |  2 +-
 .../cpu/kernel/asm/curve/secp256k1/moddiv.asm |  2 +-
 evm/src/cpu/kernel/tests/bn254.rs             |  4 +-
 8 files changed, 33 insertions(+), 55 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index be6c1348..c5c01eba 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -1,7 +1,5 @@
 use std::ops::Add;
 
-use ethereum_types::U256;
-
 use crate::bn254_arithmetic::{gen_fp, gen_fp2, Fp, Fp12, Fp2, Fp6, UNIT_FP12, ZERO_FP, ZERO_FP2};
 
 // The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
index 942ba4fa..a38c8a65 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
@@ -37,7 +37,7 @@ custom_powers:
     // stack:                             val, retdest  {200: y0, 212: y2, 224: y4}
     %stack () -> (200, 236, make_term_1)
     // stack:      200, 236, make_term_1, val, retdest  {200: y0, 212: y2, 224: y4}
-    %jump(inv_fp12)
+    %jump(inv_fp254_12)
 make_term_1:
     // stack:                             val, retdest  {212: y2, 224: y4, 236: y0^-1}
     %stack () -> (212, 224, 224, make_term_2)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index b4931ac8..d4395aa8 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -20,7 +20,7 @@ global make_invariant:
     // stack:                      out, retdest  {out: y}
     %stack (out) -> (out, 100, first_exp, out)         
     // stack: out, 100, first_exp, out, retdest  {out: y}
-    %jump(inv_fp12)
+    %jump(inv_fp254_12)
 global first_exp:
     // stack:                             out, retdest  {out: y  , 100: y^-1}
     %frob_fp12_6
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 8f42e047..a813b98b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -3,13 +3,13 @@
 // Returns y * (x^-1) where the inverse is taken modulo N
 %macro divfp254
     // stack: x   , y
-    %inverse
+    %inv_fp254
     // stack: x^-1, y
     MULFP254
 %endmacro
 
 // Non-deterministically provide the inverse modulo N.
-%macro inverse
+%macro inv_fp254
     // stack:        x
     PROVER_INPUT(ff::bn254_base::inverse)
     // stack: x^-1 , x
@@ -22,38 +22,18 @@
 %endmacro
 
 
-global inv_fp12:
-    // stack:                ptr, inv, retdest
-    %prover_inv_fp12
-    // stack:          f^-1, ptr, inv, retdest
+global inv_fp254_12:
+    // stack:                         inp, out, retdest
+    %prover_inv_fp254_12
+    // stack:                   f^-1, inp, out, retdest
     DUP14
-    // stack:     inv, f^-1, ptr, inv, retdest
+    // stack:              out, f^-1, inp, out, retdest
     %store_fp12
-    // stack:                ptr, inv, retdest
-    %stack (ptr, inv) -> (ptr, inv, 50, check_inv)
-    // stack: ptr, inv, 50, check_inv, retdest 
+    // stack:                         inp, out, retdest
+    %stack (inp, out) -> (inp, out, 50, check_inv_fp254_12)
+    // stack: inp, out, 50, check_inv_fp254_12, retdest 
     %jump(mul_fp12)
-
-global inv_fp12_old:
-    // stack:                ptr, inv, retdest
-    DUP1  %load_fp12
-    // stack:             f, ptr, inv, retdest
-    DUP14
-    // stack:        inv, f, ptr, inv, retdest
-    %prover_inv_fp12
-    // stack:  f^-1, inv, f, ptr, inv, retdest
-    DUP13  %store_fp12
-    // stack:        inv, f, ptr, inv, retdest
-    POP  %pop4  %pop4  %pop4
-    // stack:                ptr, inv, retdest
-    PUSH 50  PUSH check_inv
-    // stack: check_inv, 50, ptr, inv, retdest 
-    %stack (check_inv, mem, ptr, inv) -> (ptr, inv, mem, check_inv)
-    // stack: ptr, inv, 50, check_inv, retdest 
-    %jump(mul_fp12)
-
-
-global check_inv:
+check_inv_fp254_12:
     // stack:        retdest
     PUSH 50  
     %load_fp12
@@ -62,7 +42,7 @@ global check_inv:
     // stack:        retdest
     JUMP
 
-%macro prover_inv_fp12
+%macro prover_inv_fp254_12
     PROVER_INPUT(ffe::bn254_base::component_11)
     PROVER_INPUT(ffe::bn254_base::component_10)
     PROVER_INPUT(ffe::bn254_base::component_9)
@@ -76,18 +56,3 @@ global check_inv:
     PROVER_INPUT(ffe::bn254_base::component_1)
     PROVER_INPUT(ffe::bn254_base::component_0)
 %endmacro
-
-%macro assert_eq_unit_fp12
-    %assert_eq_const(1)
-    %assert_zero
-    %assert_zero
-    %assert_zero
-    %assert_zero
-    %assert_zero
-    %assert_zero
-    %assert_zero
-    %assert_zero
-    %assert_zero
-    %assert_zero
-    %assert_zero
-%endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index 5d081a19..89915038 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -1070,3 +1070,18 @@
     // stack:      ind11', x11, ptr'
     %mstore_kernel_general
 %endmacro
+
+%macro assert_eq_unit_fp12
+    %assert_eq_const(1)
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+    %assert_zero
+%endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm
index a1c2ff3c..6ebd89dc 100644
--- a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm
+++ b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm
@@ -53,7 +53,7 @@ ecrecover_valid_input:
     // stack: y, hash, r, s, retdest
     DUP3
     // stack: r, y, hash, x, s, retdest (r=x)
-    %inverse_secp_scalar
+    %inv_fp254_secp_scalar
     // stack: r^(-1), y, hash, x, s, retdest
     DUP1
     // stack: r^(-1), r^(-1), y, hash, x, s, retdest
diff --git a/evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm
index d878dc14..17d5c623 100644
--- a/evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm
@@ -4,7 +4,7 @@
 // Returns y * (x^-1) where the inverse is taken modulo N
 %macro moddiv_secp_base
     // stack: x, y
-    %inverse_secp_base
+    %inv_fp254_secp_base
     // stack: x^-1, y
     %mulmodn_secp_base
 %endmacro
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 2d695f70..19ff138c 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -147,13 +147,13 @@ fn test_frob_fp12() -> Result<()> {
 }
 
 #[test]
-fn test_inv_fp12() -> Result<()> {
+fn test_inv_fp254_12() -> Result<()> {
     let ptr: usize = 100;
     let inv: usize = 112;
     let f: Fp12 = gen_fp12();
 
     let setup = InterpreterSetup {
-        label: "inv_fp12".to_string(),
+        label: "inv_fp254_12".to_string(),
         stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)],
         memory: vec![(ptr, fp12_on_stack(f))],
     };

From 3ea8ad9062f14b10034b7bbf0ac0a3b76236a679 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 16:26:50 +0700
Subject: [PATCH 164/201] fmt

---
 evm/src/bn254_pairing.rs | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index c5c01eba..c78edee6 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -182,17 +182,15 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     (y2, y4 * y2 * y2 * y0_inv, y0_inv)
 }
 
-
 const EXP: [usize; 253] = [
-    1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1,
-    1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
-    1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0,
-    1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,
-    1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,
-    1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
-    0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
-    1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1,
+    0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
+    1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
+    0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1,
+    0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
+    1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
+    0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
+    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 ];
 
 // The folowing constants are defined above get_custom_powers

From 0eef28af660b2ff82db03b5a5ce1554ebbc5e871 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 16:41:42 +0700
Subject: [PATCH 165/201] bools

---
 evm/src/bn254_pairing.rs                      | 309 ++++++++--------
 .../bn254/field_arithmetic/frobenius.asm      |  12 +-
 .../asm/curve/bn254/field_arithmetic/util.asm | 338 +++++++++++-------
 3 files changed, 377 insertions(+), 282 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index c78edee6..873f2b16 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -52,7 +52,7 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
         line = tangent(o, q);
         acc = line * acc;
         o = o + o;
-        if i != 0 {
+        if i {
             line = cord(p, o, q);
             acc = line * acc;
             o = o + p;
@@ -138,13 +138,13 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
 
     // must keep multiplying all three values: a4, a2, a0
     for (a, b, c) in EXPS4 {
-        if a != 0 {
+        if a {
             y4 = y4 * sq;
         }
-        if b != 0 {
+        if b {
             y2 = y2 * sq;
         }
-        if c != 0 {
+        if c {
             y0 = y0 * sq;
         }
         sq = sq * sq;
@@ -154,10 +154,10 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
 
     // must keep multiplying remaining two values: a2, a0
     for (a, b) in EXPS2 {
-        if a != 0 {
+        if a {
             y2 = y2 * sq;
         }
-        if b != 0 {
+        if b {
             y0 = y0 * sq;
         }
         sq = sq * sq;
@@ -167,7 +167,7 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
 
     // must keep multiplying remaining value: a0
     for a in EXPS0 {
-        if a != 0 {
+        if a {
             y0 = y0 * sq;
         }
         sq = sq * sq;
@@ -182,153 +182,166 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     (y2, y4 * y2 * y2 * y0_inv, y0_inv)
 }
 
-const EXP: [usize; 253] = [
-    1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1,
-    0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
-    1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
-    0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1,
-    0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
-    1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0,
-    0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
-    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+const EXP: [bool; 253] = [
+    true, false, false, false, false, false, true, true, false, false, true, false, false, false,
+    true, false, false, true, true, true, false, false, true, true, true, false, false, true,
+    false, true, true, true, false, false, false, false, true, false, false, true, true, false,
+    false, false, true, true, false, true, false, false, false, false, false, false, false, true,
+    false, true, false, false, true, true, false, true, true, true, false, false, false, false,
+    true, false, true, false, false, false, false, false, true, false, false, false, true, false,
+    true, true, false, true, true, false, true, true, false, true, false, false, false, false,
+    false, false, true, true, false, false, false, false, false, false, true, false, true, false,
+    true, true, false, false, false, false, true, false, true, true, true, false, true, false,
+    false, true, false, true, false, false, false, false, false, true, true, false, false, true,
+    true, true, true, true, false, true, false, false, false, false, true, false, false, true,
+    false, false, false, false, true, true, true, true, false, false, true, true, false, true,
+    true, true, false, false, true, false, true, true, true, false, false, false, false, true,
+    false, false, true, false, false, false, true, false, true, false, false, false, false, true,
+    true, true, true, true, false, false, false, false, true, true, true, true, true, false, true,
+    false, true, true, false, false, true, false, false, true, true, true, true, true, true, false,
+    false, false, false, false, false, false, false, false, false, false, false, false, false,
+    false, false, false, false, false, false, false, false, false, false, false, false, false,
+    false,
 ];
 
 // The folowing constants are defined above get_custom_powers
 
-const EXPS4: [(usize, usize, usize); 64] = [
-    (1, 1, 0),
-    (1, 1, 1),
-    (1, 1, 1),
-    (0, 0, 0),
-    (0, 0, 1),
-    (1, 0, 1),
-    (0, 1, 0),
-    (1, 0, 1),
-    (1, 1, 0),
-    (1, 0, 1),
-    (0, 1, 0),
-    (1, 1, 0),
-    (1, 1, 0),
-    (1, 1, 0),
-    (0, 1, 0),
-    (0, 1, 0),
-    (0, 0, 1),
-    (1, 0, 1),
-    (1, 1, 0),
-    (0, 1, 0),
-    (1, 1, 0),
-    (1, 1, 0),
-    (1, 1, 0),
-    (0, 0, 1),
-    (0, 0, 1),
-    (1, 0, 1),
-    (1, 0, 1),
-    (1, 1, 0),
-    (1, 0, 0),
-    (1, 1, 0),
-    (0, 1, 0),
-    (1, 1, 0),
-    (1, 0, 0),
-    (0, 1, 0),
-    (0, 0, 0),
-    (1, 0, 0),
-    (1, 0, 0),
-    (1, 0, 1),
-    (0, 0, 1),
-    (0, 1, 1),
-    (0, 0, 1),
-    (0, 1, 1),
-    (0, 1, 1),
-    (0, 0, 0),
-    (1, 1, 1),
-    (1, 0, 1),
-    (1, 0, 1),
-    (0, 1, 1),
-    (1, 0, 1),
-    (0, 1, 1),
-    (0, 1, 1),
-    (1, 1, 0),
-    (1, 1, 0),
-    (1, 1, 0),
-    (1, 0, 0),
-    (0, 0, 1),
-    (1, 0, 0),
-    (0, 0, 1),
-    (1, 0, 1),
-    (1, 1, 0),
-    (1, 1, 1),
-    (0, 1, 1),
-    (0, 1, 0),
-    (1, 1, 1),
+const EXPS4: [(bool, bool, bool); 64] = [
+    (true, true, false),
+    (true, true, true),
+    (true, true, true),
+    (false, false, false),
+    (false, false, true),
+    (true, false, true),
+    (false, true, false),
+    (true, false, true),
+    (true, true, false),
+    (true, false, true),
+    (false, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (false, true, false),
+    (false, true, false),
+    (false, false, true),
+    (true, false, true),
+    (true, true, false),
+    (false, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (false, false, true),
+    (false, false, true),
+    (true, false, true),
+    (true, false, true),
+    (true, true, false),
+    (true, false, false),
+    (true, true, false),
+    (false, true, false),
+    (true, true, false),
+    (true, false, false),
+    (false, true, false),
+    (false, false, false),
+    (true, false, false),
+    (true, false, false),
+    (true, false, true),
+    (false, false, true),
+    (false, true, true),
+    (false, false, true),
+    (false, true, true),
+    (false, true, true),
+    (false, false, false),
+    (true, true, true),
+    (true, false, true),
+    (true, false, true),
+    (false, true, true),
+    (true, false, true),
+    (false, true, true),
+    (false, true, true),
+    (true, true, false),
+    (true, true, false),
+    (true, true, false),
+    (true, false, false),
+    (false, false, true),
+    (true, false, false),
+    (false, false, true),
+    (true, false, true),
+    (true, true, false),
+    (true, true, true),
+    (false, true, true),
+    (false, true, false),
+    (true, true, true),
 ];
 
-const EXPS2: [(usize, usize); 62] = [
-    (1, 0),
-    (1, 1),
-    (0, 0),
-    (1, 0),
-    (1, 0),
-    (1, 1),
-    (1, 0),
-    (1, 1),
-    (1, 0),
-    (0, 1),
-    (0, 1),
-    (1, 1),
-    (1, 1),
-    (0, 0),
-    (1, 1),
-    (0, 0),
-    (0, 0),
-    (0, 1),
-    (0, 1),
-    (1, 1),
-    (1, 1),
-    (1, 1),
-    (0, 1),
-    (1, 1),
-    (0, 0),
-    (1, 1),
-    (1, 0),
-    (1, 1),
-    (0, 0),
-    (1, 1),
-    (1, 1),
-    (1, 0),
-    (0, 0),
-    (0, 1),
-    (0, 0),
-    (1, 1),
-    (0, 1),
-    (0, 0),
-    (1, 0),
-    (0, 1),
-    (0, 1),
-    (1, 0),
-    (0, 1),
-    (0, 0),
-    (0, 0),
-    (0, 0),
-    (0, 1),
-    (1, 0),
-    (1, 1),
-    (0, 1),
-    (1, 1),
-    (1, 0),
-    (0, 1),
-    (0, 0),
-    (1, 0),
-    (0, 1),
-    (1, 0),
-    (1, 1),
-    (1, 0),
-    (1, 1),
-    (0, 1),
-    (1, 1),
+const EXPS2: [(bool, bool); 62] = [
+    (true, false),
+    (true, true),
+    (false, false),
+    (true, false),
+    (true, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (true, false),
+    (false, true),
+    (false, true),
+    (true, true),
+    (true, true),
+    (false, false),
+    (true, true),
+    (false, false),
+    (false, false),
+    (false, true),
+    (false, true),
+    (true, true),
+    (true, true),
+    (true, true),
+    (false, true),
+    (true, true),
+    (false, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (false, false),
+    (true, true),
+    (true, true),
+    (true, false),
+    (false, false),
+    (false, true),
+    (false, false),
+    (true, true),
+    (false, true),
+    (false, false),
+    (true, false),
+    (false, true),
+    (false, true),
+    (true, false),
+    (false, true),
+    (false, false),
+    (false, false),
+    (false, false),
+    (false, true),
+    (true, false),
+    (true, true),
+    (false, true),
+    (true, true),
+    (true, false),
+    (false, true),
+    (false, false),
+    (true, false),
+    (false, true),
+    (true, false),
+    (true, true),
+    (true, false),
+    (true, true),
+    (false, true),
+    (true, true),
 ];
 
-const EXPS0: [usize; 65] = [
-    0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0,
-    1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
-    0,
+const EXPS0: [bool; 65] = [
+    false, false, true, false, false, true, true, false, true, false, true, true, true, false,
+    true, false, false, false, true, false, false, true, false, true, false, true, true, false,
+    false, false, false, false, true, false, true, false, true, true, true, false, false, true,
+    true, true, true, false, true, false, true, true, false, false, true, false, false, false,
+    true, true, true, true, false, false, true, true, false,
 ];
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index 35c867b4..ac4da123 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -129,11 +129,11 @@ global test_frob_fp12_6:
 
 %macro frob_fp6_1
     // stack: C0 , C1 , C2
-    %conj
+    %conj_fp254_2
     // stack: D0 , C1 , C2
     %swap_fp2_hole_2
     // stack: C2 , C1 , D0
-    %conj
+    %conj_fp254_2
     // stack: C2`, C1 , D0
     %frobt2_1
     // stack: D2 , C1 , D0
@@ -141,7 +141,7 @@ global test_frob_fp12_6:
     // stack: D0 , C1 , D2
     %swap_fp2
     // stack: C1 , D0 , D2
-    %conj
+    %conj_fp254_2
     // stack: C1`, D0 , D2
     %frobt1_1
     // stack: D1 , D0 , D2
@@ -167,11 +167,11 @@ global test_frob_fp12_6:
 
 %macro frob_fp6_3
     // stack: C0 , C1 , C2
-    %conj
+    %conj_fp254_2
     // stack: D0 , C1 , C2
     %swap_fp2_hole_2
     // stack: C2 , C1 , D0
-    %conj
+    %conj_fp254_2
     // stack: C2`, C1 , D0
     %frobt2_3
     // stack: D2 , C1 , D0
@@ -179,7 +179,7 @@ global test_frob_fp12_6:
     // stack: D0 , C1 , D2
     %swap_fp2
     // stack: C1 , D0 , D2
-    %conj
+    %conj_fp254_2
     // stack: C1`, D0 , D2
     %frobt1_3
     // stack: D1 , D0 , D2
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index 89915038..179de101 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -7,7 +7,8 @@
 // cost: 2 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp2
     // stack:       ptr
-    DUP1  %add_const(1)
+    DUP1  
+    %add_const(1)
     // stack: ind1, ptr
     %mload_kernel_general
     // stack:   x1, ptr
@@ -17,7 +18,8 @@
     // stack:   x0, x1
 %endmacro 
 
-%macro conj
+/// complex conjugate
+%macro conj_fp254_2
     // stack: a,  b
     SWAP1 
     PUSH 0
@@ -71,13 +73,15 @@
     // stack:      b,  a , b
     DUP2
     // stack:  a , b,  a , b
-    PUSH 9  MULFP254
+    PUSH 9  
+    MULFP254
     // stack: 9a , b,  a , b
     SUBFP254
     // stack: 9a - b,  a , b
     SWAP2 
     // stack:  b , a, 9a - b
-    PUSH 9  MULFP254
+    PUSH 9  
+    MULFP254
     // stack  9b , a, 9a - b
     ADDFP254
     // stack: 9b + a, 9a - b 
@@ -114,23 +118,28 @@
 // cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp6
     // stack:                         ptr
-    DUP1  %add_const(4)
+    DUP1  
+    %add_const(4)
     // stack:                   ind4, ptr
     %mload_kernel_general
     // stack:                     x4, ptr
-    DUP2  %add_const(3)
+    DUP2  
+    %add_const(3)
     // stack:               ind3, x4, ptr
     %mload_kernel_general
     // stack:                 x3, x4, ptr
-    DUP3  %add_const(2)
+    DUP3  
+    %add_const(2)
     // stack:           ind2, x3, x4, ptr
     %mload_kernel_general
     // stack:             x2, x3, x4, ptr
-    DUP4  %add_const(1)
+    DUP4  
+    %add_const(1)
     // stack:       ind1, x2, x3, x4, ptr
     %mload_kernel_general
     // stack:         x1, x2, x3, x4, ptr
-    DUP5  %add_const(5)
+    DUP5  
+    %add_const(5)
     // stack:   ind5, x1, x2, x3, x4, ptr
     %mload_kernel_general
     // stack:     x5, x1, x2, x3, x4, ptr
@@ -143,23 +152,28 @@
 // cost: 6 loads + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp6(ptr)
     // stack:
-    PUSH $ptr  %add_const(5)
+    PUSH $ptr  
+    %add_const(5)
     // stack:                     ind5
     %mload_kernel_general
     // stack:                       x5
-    PUSH $ptr  %add_const(4)
+    PUSH $ptr  
+    %add_const(4)
     // stack:                 ind4, x5
     %mload_kernel_general
     // stack:                   x4, x5
-    PUSH $ptr  %add_const(3)
+    PUSH $ptr  
+    %add_const(3)
     // stack:             ind3, x4, x5
     %mload_kernel_general
     // stack:               x3, x4, x5
-    PUSH $ptr  %add_const(2)
+    PUSH $ptr  
+    %add_const(2)
     // stack:         ind2, x3, x4, x5
     %mload_kernel_general
     // stack:           x2, x3, x4, x5
-    PUSH $ptr  %add_const(1)
+    PUSH $ptr  
+    %add_const(1)
     // stack:     ind1, x2, x3, x4, x5
     %mload_kernel_general
     // stack:       x1, x2, x3, x4, x5
@@ -174,7 +188,8 @@
     // stack:      ptr, x0, x1, x2, x3, x4 , x5
     SWAP5
     // stack:       x4, x0, x1, x2, x3, ptr, x5
-    DUP6  %add_const(4)
+    DUP6  
+    %add_const(4)
     // stack: ind4, x4, x0, x1, x2, x3, ptr, x5
     %mstore_kernel_general
     // stack:           x0, x1, x2, x3, ptr, x5
@@ -182,15 +197,18 @@
     // stack:     ind0, x0, x1, x2, x3, ptr, x5
     %mstore_kernel_general
     // stack:               x1, x2, x3, ptr, x5
-    DUP4  %add_const(1)
+    DUP4  
+    %add_const(1)
     // stack:         ind1, x1, x2, x3, ptr, x5
     %mstore_kernel_general
     // stack:                   x2, x3, ptr, x5
-    DUP3  %add_const(2)
+    DUP3  
+    %add_const(2)
     // stack:             ind2, x2, x3, ptr, x5
     %mstore_kernel_general
     // stack:                       x3, ptr, x5
-    DUP2  %add_const(3)
+    DUP2  
+    %add_const(3)
     // stack:                 ind3, x3, ptr, x5
     %mstore_kernel_general
     // stack:                           ptr, x5
@@ -205,37 +223,47 @@
     // stack:        ptr, x0, x1, x2, x3, x4, x5
     SWAP6
     // stack:         x5, x0, x1, x2, x3, x4, ptr
-    PUSH 2  MULFP254
+    PUSH 2  
+    MULFP254
     // stack:       2*x5, x0, x1, x2, x3, x4, ptr
-    DUP7  %add_const(5)
+    DUP7  
+    %add_const(5)
     // stack: ind5, 2*x5, x0, x1, x2, x3, x4, ptr
     %mstore_kernel_general
     // stack:             x0, x1, x2, x3, x4, ptr
-    PUSH 2  MULFP254
+    PUSH 2  
+    MULFP254
     // stack:           2*x0, x1, x2, x3, x4, ptr
     DUP6
     // stack:     ind0, 2*x0, x1, x2, x3, x4, ptr
     %mstore_kernel_general
     // stack:                 x1, x2, x3, x4, ptr
-    PUSH 2  MULFP254
+    PUSH 2  
+    MULFP254
     // stack:               2*x1, x2, x3, x4, ptr
-    DUP5  %add_const(1)
+    DUP5  
+    %add_const(1)
     // stack:         ind1, 2*x1, x2, x3, x4, ptr
     %mstore_kernel_general
     // stack:                     x2, x3, x4, ptr
-    PUSH 2  MULFP254
+    PUSH 2  
+    MULFP254
     // stack:                   2*x2, x3, x4, ptr
-    DUP4  %add_const(2)
+    DUP4  
+    %add_const(2)
     // stack:             ind2, 2*x2, x3, x4, ptr
     %mstore_kernel_general
     // stack:                         x3, x4, ptr
-    PUSH 2  MULFP254
+    PUSH 2 
+    MULFP254
     // stack:                       2*x3, x4, ptr
-    DUP3  %add_const(3)
+    DUP3  
+    %add_const(3)
     // stack:                 ind3, 2*x3, x4, ptr
     %mstore_kernel_general
     // stack:                             x4, ptr
-    PUSH 2  MULFP254
+    PUSH 2  
+    MULFP254
     // stack:                           2*x4, ptr
     SWAP1
     // stack:                           ptr, 2*x4
@@ -252,23 +280,28 @@
     // stack: ind0, x0, x1, x2, x3, x4, x5
     %mstore_kernel_general
     // stack:           x1, x2, x3, x4, x5
-    PUSH $ptr  %add_const(1)
+    PUSH $ptr  
+    %add_const(1)
     // stack:     ind1, x1, x2, x3, x4, x5
     %mstore_kernel_general
     // stack:               x2, x3, x4, x5
-    PUSH $ptr  %add_const(2)
+    PUSH $ptr  
+    %add_const(2)
     // stack:         ind2, x2, x3, x4, x5
     %mstore_kernel_general
     // stack:                   x3, x4, x5
-    PUSH $ptr  %add_const(3)
+    PUSH $ptr  
+    %add_const(3)
     // stack:             ind3, x3, x4, x5
     %mstore_kernel_general
     // stack:                       x4, x5
-    PUSH $ptr  %add_const(4)
+    PUSH $ptr  
+    %add_const(4)
     // stack:                 ind4, x4, x5
     %mstore_kernel_general
     // stack:                           x5
-    PUSH $ptr  %add_const(5)
+    PUSH $ptr  
+    %add_const(5)
     // stack:                     ind5, x5
     %mstore_kernel_general
     // stack:
@@ -277,25 +310,30 @@
 // cost: store (40) + i9 (9) = 49
 %macro store_fp6_sh(ptr)
     // stack:       x0, x1, x2, x3, x4, x5
-    PUSH $ptr  %add_const(2)
+    PUSH $ptr  
+    %add_const(2)
     // stack: ind2, x0, x1, x2, x3, x4, x5
     %mstore_kernel_general
     // stack:           x1, x2, x3, x4, x5
-    PUSH $ptr  %add_const(3)
+    PUSH $ptr  
+    %add_const(3)
     // stack:     ind3, x1, x2, x3, x4, x5
     %mstore_kernel_general
     // stack:               x2, x3, x4, x5
-    PUSH $ptr  %add_const(4)
+    PUSH $ptr  
+    %add_const(4)
     // stack:         ind4, x2, x3, x4, x5
     %mstore_kernel_general
     // stack:                   x3, x4, x5
-    PUSH $ptr  %add_const(5)
+    PUSH $ptr  
+    %add_const(5)
     // stack:             ind5, x3, x4, x5
     %mstore_kernel_general
     // stack:                       x4, x5
     %i9
     // stack:                       y5, y4
-    PUSH $ptr  %add_const(1)
+    PUSH $ptr  
+    %add_const(1)
     // stack:                 ind1, y5, y4
     %mstore_kernel_general
     // stack:                           y4
@@ -868,47 +906,58 @@
 
 %macro load_fp12
     // stack:                                                          ptr
-    DUP1  %add_const(10)
+    DUP1  
+    %add_const(10)
     // stack:                                                   ind10, ptr
     %mload_kernel_general
     // stack:                                                     x10, ptr
-    DUP2  %add_const(9)
+    DUP2  
+    %add_const(9)
     // stack:                                              ind09, x10, ptr
     %mload_kernel_general
     // stack:                                                x09, x10, ptr
-    DUP3  %add_const(8)
+    DUP3  
+    %add_const(8)
     // stack:                                         ind08, x09, x10, ptr
     %mload_kernel_general
     // stack:                                           x08, x09, x10, ptr
-    DUP4  %add_const(7)
+    DUP4  
+    %add_const(7)
     // stack:                                    ind07, x08, x09, x10, ptr
     %mload_kernel_general
     // stack:                                      x07, x08, x09, x10, ptr
-    DUP5  %add_const(6)
+    DUP5  
+    %add_const(6)
     // stack:                               ind06, x07, x08, x09, x10, ptr
     %mload_kernel_general
     // stack:                                 x06, x07, x08, x09, x10, ptr
-    DUP6  %add_const(5)
+    DUP6  
+    %add_const(5)
     // stack:                          ind05, x06, x07, x08, x09, x10, ptr
     %mload_kernel_general
     // stack:                            x05, x06, x07, x08, x09, x10, ptr
-    DUP7  %add_const(4)
+    DUP7  
+    %add_const(4)
     // stack:                     ind04, x05, x06, x07, x08, x09, x10, ptr
     %mload_kernel_general
     // stack:                       x04, x05, x06, x07, x08, x09, x10, ptr
-    DUP8  %add_const(3)
+    DUP8  
+    %add_const(3)
     // stack:                ind03, x04, x05, x06, x07, x08, x09, x10, ptr
     %mload_kernel_general
     // stack:                  x03, x04, x05, x06, x07, x08, x09, x10, ptr
-    DUP9  %add_const(2)
+    DUP9  
+    %add_const(2)
     // stack:           ind02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
     %mload_kernel_general
     // stack:             x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
-    DUP10  %add_const(1)
+    DUP10  
+    %add_const(1)
     // stack:      ind01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
     %mload_kernel_general
     // stack:        x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
-    DUP11  %add_const(11)
+    DUP11  
+    %add_const(11)
     // stack: ind11, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
     %mload_kernel_general
     // stack:   x11, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
@@ -922,7 +971,8 @@
     // stack:        ptr, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11
     SWAP11
     // stack:        x10, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    DUP12  %add_const(10)
+    DUP12  
+    %add_const(10)
     // stack: ind10, x10, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:             x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
@@ -930,39 +980,48 @@
     // stack:      ind00, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                  x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    DUP10  %add_const(01)
+    DUP10  
+    %add_const(01)
     // stack:           ind01, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                       x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    DUP9   %add_const(02)
+    DUP9   
+    %add_const(02)
     // stack:                ind02, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                            x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    DUP8   %add_const(03)
+    DUP8   
+    %add_const(03)
     // stack:                     ind03, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                 x04, x05, x06, x07, x08, x09, ptr, x11
-    DUP7   %add_const(04)
+    DUP7   
+    %add_const(04)
     // stack:                          ind04, x04, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                      x05, x06, x07, x08, x09, ptr, x11
-    DUP6   %add_const(05)
+    DUP6   
+    %add_const(05)
     // stack:                               ind05, x05, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                           x06, x07, x08, x09, ptr, x11
-    DUP5   %add_const(06)
+    DUP5   
+    %add_const(06)
     // stack:                                    ind06, x06, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                                x07, x08, x09, ptr, x11
-    DUP4   %add_const(07)
+    DUP4   
+    %add_const(07)
     // stack:                                         ind07, x07, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                                     x08, x09, ptr, x11
-    DUP3   %add_const(08)
+    DUP3   
+    %add_const(08)
     // stack:                                              ind08, x08, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                                          x09, ptr, x11
-    DUP2   %add_const(09)
+    DUP2   
+    %add_const(09)
     // stack:                                                   ind09, x09, ptr, x11
     %mstore_kernel_general
     // stack:                                                               ptr, x11
@@ -972,102 +1031,125 @@
     // stack:                                                            
 %endmacro
 
+/// moves fp12 from src..src+12 to dest..dest+12
+/// these should not overlap. leaves dest on stack
 %macro move_fp12
-    // stack:              ptr, ptr'
+    // stack:              src, dest
     DUP1  
-    // stack:       ind00, ptr, ptr'
+    // stack:       ind00, src, dest
     %mload_kernel_general
-    // stack:         x00, ptr, ptr'
+    // stack:         x00, src, dest
     DUP3
-    // stack: ind00', x00, ptr, ptr'
+    // stack: ind00', x00, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
-    DUP1  %add_const(1)
-    // stack:       ind01, ptr, ptr'
+    // stack:              src, dest
+    DUP1  
+    %add_const(1)
+    // stack:       ind01, src, dest
     %mload_kernel_general
-    // stack:         x01, ptr, ptr'
-    DUP3  %add_const(1)
-    // stack: ind01', x01, ptr, ptr'
+    // stack:         x01, src, dest
+    DUP3  
+    %add_const(1)
+    // stack: ind01', x01, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
-    DUP1  %add_const(2)
-    // stack:       ind02, ptr, ptr'
+    // stack:              src, dest
+    DUP1  
+    %add_const(2)
+    // stack:       ind02, src, dest
     %mload_kernel_general
-    // stack:         x02, ptr, ptr'
-    DUP3  %add_const(2)
-    // stack: ind02', x02, ptr, ptr'
+    // stack:         x02, src, dest
+    DUP3  
+    %add_const(2)
+    // stack: ind02', x02, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
-    DUP1  %add_const(3)
-    // stack:       ind03, ptr, ptr'
+    // stack:              src, dest
+    DUP1  
+    %add_const(3)
+    // stack:       ind03, src, dest
     %mload_kernel_general
-    // stack:         x03, ptr, ptr'
-    DUP3  %add_const(3)
-    // stack: ind03', x03, ptr, ptr'
+    // stack:         x03, src, dest
+    DUP3  
+    %add_const(3)
+    // stack: ind03', x03, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
-    DUP1  %add_const(4)
-    // stack:       ind04, ptr, ptr'
+    // stack:              src, dest
+    DUP1  
+    %add_const(4)
+    // stack:       ind04, src, dest
     %mload_kernel_general
-    // stack:         x04, ptr, ptr'
-    DUP3  %add_const(4)
-    // stack: ind04', x04, ptr, ptr'
+    // stack:         x04, src, dest
+    DUP3 
+    %add_const(4)
+    // stack: ind04', x04, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
-    DUP1  %add_const(5)
-    // stack:       ind05, ptr, ptr'
+    // stack:              src, dest
+    DUP1  
+    %add_const(5)
+    // stack:       ind05, src, dest
     %mload_kernel_general
-    // stack:         x05, ptr, ptr'
-    DUP3  %add_const(5)
-    // stack: ind05', x05, ptr, ptr'
+    // stack:         x05, src, dest
+    DUP3  
+    %add_const(5)
+    // stack: ind05', x05, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
-    DUP1  %add_const(6)
-    // stack:       ind06, ptr, ptr'
+    // stack:              src, dest
+    DUP1  
+    %add_const(6)
+    // stack:       ind06, src, dest
     %mload_kernel_general
-    // stack:         x06, ptr, ptr'
-    DUP3  %add_const(6)
-    // stack: ind06', x06, ptr, ptr'
+    // stack:         x06, src, dest
+    DUP3  
+    %add_const(6)
+    // stack: ind06', x06, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
-    DUP1  %add_const(7)
-    // stack:       ind07, ptr, ptr'
+    // stack:              src, dest
+    DUP1  
+    %add_const(7)
+    // stack:       ind07, src, dest
     %mload_kernel_general
-    // stack:         x07, ptr, ptr'
-    DUP3  %add_const(7)
-    // stack: ind07', x07, ptr, ptr'
+    // stack:         x07, src, dest
+    DUP3  
+    %add_const(7)
+    // stack: ind07', x07, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
-    DUP1  %add_const(8)
-    // stack:       ind08, ptr, ptr'
+    // stack:              src, dest
+    DUP1  
+    %add_const(8)
+    // stack:       ind08, src, dest
     %mload_kernel_general
-    // stack:         x08, ptr, ptr'
-    DUP3  %add_const(8)
-    // stack: ind08', x08, ptr, ptr'
+    // stack:         x08, src, dest
+    DUP3  
+    %add_const(8)
+    // stack: ind08', x08, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
-    DUP1  %add_const(9)
-    // stack:       ind09, ptr, ptr'
+    // stack:              src, dest
+    DUP1 
+    %add_const(9)
+    // stack:       ind09, src, dest
     %mload_kernel_general
-    // stack:         x09, ptr, ptr'
-    DUP3  %add_const(9)
-    // stack: ind09', x09, ptr, ptr'
+    // stack:         x09, src, dest
+    DUP3  
+    %add_const(9)
+    // stack: ind09', x09, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
-    DUP1  %add_const(10)
-    // stack:       ind10, ptr, ptr'
+    // stack:              src, dest
+    DUP1  
+    %add_const(10)
+    // stack:       ind10, src, dest
     %mload_kernel_general
-    // stack:         x10, ptr, ptr'
-    DUP3  %add_const(10)
-    // stack: ind10', x10, ptr, ptr'
+    // stack:         x10, src, dest
+    DUP3  
+    %add_const(10)
+    // stack: ind10', x10, src, dest
     %mstore_kernel_general
-    // stack:              ptr, ptr'
+    // stack:              src, dest
     %add_const(11)
-    // stack:            ind11, ptr'
+    // stack:            ind11, dest
     %mload_kernel_general
-    // stack:              x11, ptr'
-    DUP2  %add_const(11)
-    // stack:      ind11', x11, ptr'
+    // stack:              x11, dest
+    DUP2  
+    %add_const(11)
+    // stack:      ind11', x11, dest
     %mstore_kernel_general
 %endmacro
 

From 6e8d4a57a4da268e4df1745a4bf22077fb3f5244 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 16:48:28 +0700
Subject: [PATCH 166/201] fix

---
 evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm | 4 +++-
 evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm         | 2 +-
 evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm            | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index 179de101..9097c72b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -66,7 +66,9 @@
     // stack:     cx, cy 
 %endmacro
 
-// cost: 9; note this returns y, x for the output x + yi
+/// Given z = x + iy: Fp254_2, return complex conjugate z': Fp254_2
+/// where input is represented z.re, z.im and output as z'.im, z'.re
+/// cost: 9; note this returns y, x for the output x + yi
 %macro i9
     // stack:          a , b
     DUP2
diff --git a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm
index 6ebd89dc..a1c2ff3c 100644
--- a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm
+++ b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm
@@ -53,7 +53,7 @@ ecrecover_valid_input:
     // stack: y, hash, r, s, retdest
     DUP3
     // stack: r, y, hash, x, s, retdest (r=x)
-    %inv_fp254_secp_scalar
+    %inverse_secp_scalar
     // stack: r^(-1), y, hash, x, s, retdest
     DUP1
     // stack: r^(-1), r^(-1), y, hash, x, s, retdest
diff --git a/evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm
index 17d5c623..d878dc14 100644
--- a/evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm
+++ b/evm/src/cpu/kernel/asm/curve/secp256k1/moddiv.asm
@@ -4,7 +4,7 @@
 // Returns y * (x^-1) where the inverse is taken modulo N
 %macro moddiv_secp_base
     // stack: x, y
-    %inv_fp254_secp_base
+    %inverse_secp_base
     // stack: x^-1, y
     %mulmodn_secp_base
 %endmacro

From 5b124fb121e2390b689e4efbac60b2e2ccac57fc Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 26 Jan 2023 10:42:31 +0700
Subject: [PATCH 167/201] minor

---
 evm/src/bn254_pairing.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 873f2b16..9ebf4459 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -124,7 +124,7 @@ pub fn invariance_inducing_power(f: Fp12) -> Fp12 {
 /// we then invert y^a0 and return
 ///     y^a2, y^a1 = y^a4 * y^a2 * y^a2 * y^(-a0), y^(-a0)
 ///
-/// Represent a4, a2, a0 in *little endian* binary, define
+/// Representing a4, a2, a0 in *little endian* binary, define
 ///     EXPS4 = [(a4[i], a2[i], a0[i]) for i in       0..len(a4)]
 ///     EXPS2 = [       (a2[i], a0[i]) for i in len(a4)..len(a2)]
 ///     EXPS0 = [               a0[i]  for i in len(a2)..len(a0)]
@@ -165,7 +165,7 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     // leading term of a2 is always 1
     y2 = y2 * sq;
 
-    // must keep multiplying remaining value: a0
+    // must keep multiplying final remaining value: a0
     for a in EXPS0 {
         if a {
             y0 = y0 * sq;
@@ -178,7 +178,7 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) {
     // invert y0 to compute y^(-a0)
     let y0_inv = y0.inv();
 
-    // return y2, y1 = y4 * y2^2 * y^(-a0), y^(-a0)
+    // return y^a2 = y2, y^a1 = y4 * y2^2 * y^(-a0), y^(-a0)
     (y2, y4 * y2 * y2 * y0_inv, y0_inv)
 }
 

From ea01e19c417aa17dd1a5d3c0b781645f47a68bc6 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 28 Jan 2023 14:17:55 +0800
Subject: [PATCH 168/201] comment

---
 evm/src/bn254_arithmetic.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 3db48591..5356ffbc 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -420,13 +420,13 @@ impl Fp12 {
     /// lands in Fp, and hence the inverse of x is given by
     ///     (Prod_{i=1}^11 x_i) / phi
     /// The 6th Frob map is nontrivial but leaves Fp6 fixed and hence must be the conjugate:
-    ///     x_6 = (a + bz)_6 = a - bz = conj_fp12(x)
+    ///     x_6 = (a + bz)_6 = a - bz = x.conj()
     /// Letting prod_17 = x_1 * x_7, the remaining factors in the numerator can be expresed as:
     ///     [(prod_17) * (prod_17)_2] * (prod_17)_4 * [(prod_17) * (prod_17)_2]_1
     /// By Galois theory, both the following are in Fp2 and are complex conjugates
     ///     prod_odds,  prod_evens
-    /// Thus phi = norm(prod_odds), and hence the inverse is given by
-    ///    normalize(prod_odds) * prod_evens_except_six * conj_fp12(x)
+    /// Thus phi = ||prod_odds||^2, and hence the inverse is given by
+    ///    prod_odds * prod_evens_except_six * x.conj() / ||prod_odds||^2
     pub fn inv(self) -> Fp12 {
         let prod_17 = (self.frob(1) * self.frob(7)).z0;
         let prod_1379 = prod_17 * prod_17.frob(2);

From a950a262665d1d19cf33d735052db5ac7ff42a7a Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 1 Feb 2023 18:57:50 -0800
Subject: [PATCH 169/201] add comments

---
 evm/src/bn254_pairing.rs | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 9ebf4459..4e5456bc 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -42,31 +42,35 @@ pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
     invariance_inducing_power(miller_output)
 }
 
+/// Standard code for miller loop, can be found on page 99 at this url:
+/// https://static1.squarespace.com/static/5fdbb09f31d71c1227082339/t/5ff394720493bd28278889c6/1609798774687/PairingsForBeginners.pdf#page=107
+/// where EXP is a hardcoding of the array of Booleans that the loop traverses
 pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 {
-    let mut o = p;
+    let mut r = p;
     let mut acc = UNIT_FP12;
     let mut line;
 
     for i in EXP {
-        acc = acc * acc;
-        line = tangent(o, q);
-        acc = line * acc;
-        o = o + o;
+        line = tangent(r, q);
+        r = r + r;
+        acc = line * acc * acc;
         if i {
-            line = cord(p, o, q);
+            line = cord(p, r, q);
+            r = r + p;
             acc = line * acc;
-            o = o + p;
         }
     }
     acc
 }
 
+/// The sloped line function for doubling a point
 pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 {
     let cx = -Fp::new(3) * p.x * p.x;
     let cy = Fp::new(2) * p.y;
     sparse_embed(p.y * p.y - Fp::new(9), q.x.scale(cx), q.y.scale(cy))
 }
 
+/// The sloped line function for adding two points
 pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 {
     let cx = p2.y - p1.y;
     let cy = p1.x - p2.x;

From 962754be99f296a1810500decf1673bf40f3fb29 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 1 Feb 2023 19:15:56 -0800
Subject: [PATCH 170/201] rand impl

---
 evm/src/bn254_arithmetic.rs       | 62 ++++++++++++++++---------------
 evm/src/bn254_pairing.rs          |  8 ++--
 evm/src/cpu/kernel/tests/bn254.rs | 18 ++++++---
 3 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 5356ffbc..cf26e208 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -1,7 +1,8 @@
 use std::ops::{Add, Div, Mul, Neg, Sub};
 
 use ethereum_types::U256;
-use rand::{thread_rng, Rng};
+use rand::distributions::{Distribution, Standard};
+use rand::Rng;
 
 pub const BN_BASE: U256 = U256([
     0x3c208c16d87cfd47,
@@ -23,6 +24,15 @@ impl Fp {
     }
 }
 
+impl Distribution<Fp> for Standard {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Fp {
+        let (x0, x1, x2, x3) = rng.gen::<(u64, u64, u64, u64)>();
+        Fp {
+            val: U256([x0, x1, x2, x3]) % BN_BASE,
+        }
+    }
+}
+
 impl Add for Fp {
     type Output = Self;
 
@@ -113,6 +123,13 @@ pub const UNIT_FP2: Fp2 = Fp2 {
     im: ZERO_FP,
 };
 
+impl Distribution<Fp2> for Standard {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Fp2 {
+        let (re, im) = rng.gen::<(Fp, Fp)>();
+        Fp2 { re, im }
+    }
+}
+
 impl Add for Fp2 {
     type Output = Self;
 
@@ -229,6 +246,13 @@ pub const UNIT_FP6: Fp6 = Fp6 {
     t2: ZERO_FP2,
 };
 
+impl Distribution<Fp6> for Standard {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Fp6 {
+        let (t0, t1, t2) = rng.gen::<(Fp2, Fp2, Fp2)>();
+        Fp6 { t0, t1, t2 }
+    }
+}
+
 impl Add for Fp6 {
     type Output = Self;
 
@@ -372,6 +396,13 @@ pub const UNIT_FP12: Fp12 = Fp12 {
     z1: ZERO_FP6,
 };
 
+impl Distribution<Fp12> for Standard {
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Fp12 {
+        let (z0, z1) = rng.gen::<(Fp6, Fp6)>();
+        Fp12 { z0, z1 }
+    }
+}
+
 impl Mul for Fp12 {
     type Output = Self;
 
@@ -833,32 +864,3 @@ const FROB_Z: [Fp2; 12] = [
         },
     },
 ];
-
-pub fn gen_fp() -> Fp {
-    let mut rng = thread_rng();
-    let x64 = rng.gen::<u64>();
-    let x256 = U256([x64, x64, x64, x64]) % BN_BASE;
-    Fp { val: x256 }
-}
-
-pub fn gen_fp2() -> Fp2 {
-    Fp2 {
-        re: gen_fp(),
-        im: gen_fp(),
-    }
-}
-
-pub fn gen_fp6() -> Fp6 {
-    Fp6 {
-        t0: gen_fp2(),
-        t1: gen_fp2(),
-        t2: gen_fp2(),
-    }
-}
-
-pub fn gen_fp12() -> Fp12 {
-    Fp12 {
-        z0: gen_fp6(),
-        z1: gen_fp6(),
-    }
-}
diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 4e5456bc..71f9575f 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -1,6 +1,8 @@
 use std::ops::Add;
 
-use crate::bn254_arithmetic::{gen_fp, gen_fp2, Fp, Fp12, Fp2, Fp6, UNIT_FP12, ZERO_FP, ZERO_FP2};
+use rand::Rng;
+
+use crate::bn254_arithmetic::{Fp, Fp12, Fp2, Fp6, UNIT_FP12, ZERO_FP, ZERO_FP2};
 
 // The curve consists of pairs (x, y): (Fp, Fp) | y^2 = x^3 + 2
 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -98,8 +100,8 @@ pub fn sparse_embed(g000: Fp, g01: Fp2, g11: Fp2) -> Fp12 {
     Fp12 { z0: g0, z1: g1 }
 }
 
-pub fn gen_fp12_sparse() -> Fp12 {
-    sparse_embed(gen_fp(), gen_fp2(), gen_fp2())
+pub fn gen_fp12_sparse<R: Rng + ?Sized>(rng: &mut R) -> Fp12 {
+    sparse_embed(rng.gen::<Fp>(), rng.gen::<Fp2>(), rng.gen::<Fp2>())
 }
 
 /// The output y of the miller loop is not an invariant,
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 19ff138c..6e936cd5 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -3,8 +3,9 @@ use std::ops::Range;
 
 use anyhow::Result;
 use ethereum_types::U256;
+use rand::Rng;
 
-use crate::bn254_arithmetic::{gen_fp12, Fp, Fp12, Fp2};
+use crate::bn254_arithmetic::{Fp, Fp12, Fp2};
 use crate::bn254_pairing::{gen_fp12_sparse, tate, Curve, TwistedCurve};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
@@ -78,9 +79,10 @@ fn test_mul_fp12() -> Result<()> {
     let in1: usize = 76;
     let out: usize = 88;
 
-    let f: Fp12 = gen_fp12();
-    let g: Fp12 = gen_fp12();
-    let h: Fp12 = gen_fp12_sparse();
+    let mut rng = rand::thread_rng();
+    let f: Fp12 = rng.gen::<Fp12>();
+    let g: Fp12 = rng.gen::<Fp12>();
+    let h: Fp12 = gen_fp12_sparse(&mut rng);
 
     let setup_normal: InterpreterSetup = setup_mul_test(in0, in1, out, f, g, "mul_fp12");
     let setup_sparse: InterpreterSetup = setup_mul_test(in0, in1, out, f, h, "mul_fp12_sparse");
@@ -116,7 +118,9 @@ fn setup_frob_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
 #[test]
 fn test_frob_fp12() -> Result<()> {
     let ptr: usize = 100;
-    let f: Fp12 = gen_fp12();
+
+    let mut rng = rand::thread_rng();
+    let f: Fp12 = rng.gen::<Fp12>();
 
     let setup_frob_1 = setup_frob_test(ptr, f, "test_frob_fp12_1");
     let setup_frob_2 = setup_frob_test(ptr, f, "test_frob_fp12_2");
@@ -150,8 +154,10 @@ fn test_frob_fp12() -> Result<()> {
 fn test_inv_fp254_12() -> Result<()> {
     let ptr: usize = 100;
     let inv: usize = 112;
-    let f: Fp12 = gen_fp12();
 
+    let mut rng = rand::thread_rng();
+    let f: Fp12 = rng.gen::<Fp12>();
+    
     let setup = InterpreterSetup {
         label: "inv_fp254_12".to_string(),
         stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)],

From e1dca8703c9940bad6496aaf700f301937c0237b Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 1 Feb 2023 19:19:36 -0800
Subject: [PATCH 171/201] name

---
 evm/src/cpu/kernel/tests/bn254.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 6e936cd5..57a4bf8d 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -74,7 +74,7 @@ fn setup_mul_test(
 }
 
 #[test]
-fn test_mul_fp12() -> Result<()> {
+fn test_mul_fp254_12() -> Result<()> {
     let in0: usize = 64;
     let in1: usize = 76;
     let out: usize = 88;
@@ -116,7 +116,7 @@ fn setup_frob_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
 }
 
 #[test]
-fn test_frob_fp12() -> Result<()> {
+fn test_frob_fp254_12() -> Result<()> {
     let ptr: usize = 100;
 
     let mut rng = rand::thread_rng();
@@ -157,7 +157,7 @@ fn test_inv_fp254_12() -> Result<()> {
 
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
-    
+
     let setup = InterpreterSetup {
         label: "inv_fp254_12".to_string(),
         stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)],

From c107c5055e1564d4eeacdcbe8ce4449573a06a99 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 1 Feb 2023 19:30:04 -0800
Subject: [PATCH 172/201] comments

---
 .../asm/curve/bn254/curve_arithmetic/final_power.asm       | 7 ++++++-
 .../kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm   | 2 +-
 .../cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm   | 4 +++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
index a38c8a65..6d27ed93 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
@@ -1,3 +1,8 @@
+/// To make the Tate pairing an invariant, the final step is to exponentiate by
+///     (p^12 - 1)/N = (p^6 - 1)(p^2 + 1)(p^4 - p^2 + 1)/N
+/// The function in this module enacts the final exponentiation, by
+///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
+///
 /// def final_exp(y):
 ///     y4, y2, y0 = 1, 1, 1
 ///     power_loop_4()
@@ -15,7 +20,7 @@
 ///     y  = y.frob(3)
 ///     y2 = y2.frob(2)
 ///     y1 = y1.frob(1)
-///     return y * y2 * y4 * y0
+///     return y * y2 * y1 * y0
 
 global final_exp:
     // stack:                  val, retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index 3069107f..ef0d1c96 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -360,7 +360,7 @@ square_fp12_1:
     %jump(square_fp6)
 square_fp12_2:
     // stack:                                           f'f', inp, f, square_fp12_3, out
-    %sh
+    %sh_fp254_6
     // stack:                                       sh(f'f'), inp, f, square_fp12_3, out
     %swap_fp6_hole
     // stack:                                       f, inp, sh(f'f'), square_fp12_3, out
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index 9097c72b..092d38aa 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -490,7 +490,9 @@
     // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
 %endmacro
 
-%macro sh
+/// multiply (a + bt + ct^2) by t:
+///     t(a + bt + ct^2) = at + bt^2 + ct^3 = (9+i)c + at + bt^2
+%macro sh_fp254_6
     // stack: f0 , f0_, f1,  f1_, f2 , f2_
     SWAP2
     // stack: f1 , f0_, g0 , f1_, f2 , f2_

From 0f030fae4a85f7105b53f2a3d4585182136dc3b1 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sun, 5 Feb 2023 17:40:04 -0800
Subject: [PATCH 173/201] naming for global labels

---
 evm/src/cpu/kernel/aggregator.rs              |  4 +-
 .../bn254/curve_arithmetic/final_power.asm    | 50 +++++++++----------
 .../bn254/curve_arithmetic/miller_loop.asm    | 16 +++---
 .../bn254/curve_arithmetic/tate_pairing.asm   | 26 +++++-----
 .../{fp12_mul.asm => degree_12_mul.asm}       | 20 ++++----
 .../{fp6_mul.asm => degree_6_mul.asm}         |  4 +-
 .../bn254/field_arithmetic/frobenius.asm      | 50 +++++++++----------
 .../curve/bn254/field_arithmetic/inverse.asm  |  2 +-
 evm/src/cpu/kernel/tests/bn254.rs             | 16 +++---
 9 files changed, 94 insertions(+), 94 deletions(-)
 rename evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/{fp12_mul.asm => degree_12_mul.asm} (98%)
 rename evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/{fp6_mul.asm => degree_6_mul.asm} (99%)

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 7246f913..3c998449 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -31,8 +31,8 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/fp6_mul.asm"),
-        include_str!("asm/curve/bn254/field_arithmetic/fp12_mul.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/degree_6_mul.asm"),
+        include_str!("asm/curve/bn254/field_arithmetic/degree_12_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/util.asm"),
         include_str!("asm/curve/common.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
index 6d27ed93..bfb5b812 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
@@ -22,7 +22,7 @@
 ///     y1 = y1.frob(1)
 ///     return y * y2 * y1 * y0
 
-global final_exp:
+global bn254_final_exp:
     // stack:                  val, retdest
     %stack (val) -> (val, 300, val)
     // stack:        val, 300, val, retdest
@@ -47,43 +47,43 @@ make_term_1:
     // stack:                             val, retdest  {212: y2, 224: y4, 236: y0^-1}
     %stack () -> (212, 224, 224, make_term_2)
     // stack: 212, 224, 224, make_term_2, val, retdest  {212: y2, 224: y4, 236: y0^-1}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 make_term_2:
     // stack:                             val, retdest  {212: y2, 224: y4 * y2, 236: y0^-1}
     %stack () -> (212, 224, 224, make_term_3)
     // stack: 212, 224, 224, make_term_3, val, retdest  {212: y2, 224: y4 * y2, 236: y0^-1}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 make_term_3:
     // stack:                             val, retdest  {212: y2, 224: y4 * y2^2, 236: y0^-1}
     %stack () -> (236, 224, 224, final_power)
     // stack: 236, 224, 224, final_power, val, retdest  {212: y2, 224: y4 * y2^2, 236: y0^-1}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 
 final_power:
     // stack:                            val, retdest  {val: y  , 212:  y^a2   , 224:  y^a1   , 236: y^a0}
-    %frob_fp12_3
+    %frob_fp254_12_3
     // stack:                            val, retdest  {val: y_3, 212:  y^a2   , 224:  y^a1   , 236: y^a0}
     %stack () -> (212, 212)
-    %frob_fp12_2_
+    %frob_fp254_12_2_
     POP
     // stack:                            val, retdest  {val: y_3, 212: (y^a2)_2, 224:  y^a1   , 236: y^a0}
     PUSH 224
-    %frob_fp12_1
+    %frob_fp254_12_1
     POP
     // stack:                            val, retdest  {val: y_3, 212: (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
     %stack (val) -> (212, val, val, penult_mul, val)
     // stack: 212, val, val, penult_mul, val, retdest  {val: y_3, 212: (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 penult_mul:
     // stack:                            val, retdest  {val: y_3 * (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
     %stack (val) -> (224, val, val, final_mul, val)
     // stack:  224, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 final_mul: 
     // stack:                            val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 236: y^a0}
     %stack (val) -> (236, val, val)
     // stack:                  236, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 236: y^a0}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 
 
 /// def power_loop_4():
@@ -95,7 +95,7 @@ final_mul:
 ///             y2 *= acc
 ///         if c:
 ///             y0 *= acc
-///         acc = square_fp12(acc)
+///         acc = square_fp254_12(acc)
 ///     y4 *= acc
 ///
 /// def power_loop_2():
@@ -105,7 +105,7 @@ final_mul:
 ///            y2 *= acc
 ///        if b:
 ///            y0 *= acc
-///        acc = square_fp12(acc)
+///        acc = square_fp254_12(acc)
 ///     y2 *= acc
 ///
 /// def power_loop_0():
@@ -113,7 +113,7 @@ final_mul:
 ///         a = load(i, power_data_0)
 ///         if a:
 ///             y0 *= acc
-///         acc = square_fp12(acc)
+///         acc = square_fp254_12(acc)
 ///     y0 *= acc
 
 power_loop_4:
@@ -139,7 +139,7 @@ power_loop_4:
     // stack:      224, 224, power_loop_4_b,  bc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP8
     // stack: sqr, 224, 224, power_loop_4_b,  bc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 power_loop_4_b:
     // stack:                               bc, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP1  
@@ -153,7 +153,7 @@ power_loop_4_b:
     // stack:      212, 212, power_loop_4_c, c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP8
     // stack: sqr, 212, 212, power_loop_4_c, c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 power_loop_4_c:
     // stack:                              c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     ISZERO
@@ -164,7 +164,7 @@ power_loop_4_c:
     // stack:      200, 200, power_loop_4_sq, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP7
     // stack: sqr, 200, 200, power_loop_4_sq, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 power_loop_4_sq:
     // stack:                         i, j, k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_4  
@@ -172,7 +172,7 @@ power_loop_4_sq:
     DUP5  
     DUP1
     // stack: sqr, sqr, power_loop_4, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(square_fp12)
+    %jump(square_fp254_12)
 power_loop_4_end:
     // stack:                           0, j, k, sqr  {200: y0, 212: y2, 224: y4}
     POP  
@@ -181,7 +181,7 @@ power_loop_4_end:
     // stack:      224, 224, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP6
     // stack: sqr, 224, 224, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 
 power_loop_2:
     // stack:                                   j  , k, sqr  {200: y0, 212: y2, 224: y4}
@@ -206,7 +206,7 @@ power_loop_2:
     // stack:      212, 212, power_loop_2_b, b, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP7
     // stack: sqr, 212, 212, power_loop_2_b, b, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 power_loop_2_b:
     // stack:                              b, j, k, sqr  {200: y0, 212: y2, 224: y4}
     ISZERO
@@ -217,7 +217,7 @@ power_loop_2_b:
     // stack:      200, 200, power_loop_2_sq, j, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP6
     // stack: sqr, 200, 200, power_loop_2_sq, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 power_loop_2_sq:
     // stack:                         j, k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_2  
@@ -225,7 +225,7 @@ power_loop_2_sq:
     DUP4  
     DUP1
     // stack: sqr, sqr, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(square_fp12)
+    %jump(square_fp254_12)
 power_loop_2_end:
     // stack:                           0, k, sqr  {200: y0, 212: y2, 224: y4}
     POP  
@@ -234,7 +234,7 @@ power_loop_2_end:
     // stack:      212, 212, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
     DUP5
     // stack: sqr, 212, 212, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 
 power_loop_0:
     // stack:                                 k  , sqr  {200: y0, 212: y2, 224: y4}
@@ -256,7 +256,7 @@ power_loop_0:
     // stack:      200, 200, power_loop_0_sq, k-1, sqr  {200: y0, 212: y2, 224: y4}
     DUP5
     // stack: sqr, 200, 200, power_loop_0_sq, k-1, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 power_loop_0_sq:
     // stack:                         k, sqr  {200: y0, 212: y2, 224: y4}
     PUSH power_loop_0  
@@ -264,9 +264,9 @@ power_loop_0_sq:
     DUP3  
     DUP1
     // stack: sqr, sqr, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
-    %jump(square_fp12)
+    %jump(square_fp254_12)
 power_loop_0_end:
     // stack:                         0, sqr  {200: y0, 212: y2, 224: y4}
     %stack (i, sqr) -> (200, sqr, 200, custom_powers)
     // stack:   200, sqr, 200, custom_powers  {200: y0, 212: y2, 224: y4}
-    %jump(mul_fp12)    
+    %jump(mul_fp254_12)    
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index cd13f80e..21b53a4e 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -25,7 +25,7 @@
 ///     0xnm -= 1
 ///     mul_tangent()
 
-global miller:
+global bn254_miller:
     // stack:         ptr, out, retdest
     %stack (ptr, out) -> (out, 1, ptr, out)
     // stack: out, 1, ptr, out, retdest
@@ -36,7 +36,7 @@ global miller:
     %stack (P: 2) -> (0, 53, P, P)
     // stack: 0, 53, O, P, Q, out, retdest
     // the head 0 lets miller_loop start with POP
-global miller_loop:
+miller_loop:
     POP
     // stack:          times  , O, P, Q, out, retdest
     DUP1  
@@ -85,9 +85,9 @@ miller_zero:
 
 
 /// def mul_tangent()
-///     out = square_fp12(out)
+///     out = square_fp254_12(out)
 ///     line = tangent(O, Q)
-///     out = mul_fp12_sparse(out, line)
+///     out = mul_fp254_12_sparse(out, line)
 ///     O += O
 
 mul_tangent:
@@ -98,7 +98,7 @@ mul_tangent:
     // stack:           mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %stack (mul_tangent_1, out) -> (out, out, mul_tangent_1, out)
     // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    %jump(square_fp12)
+    %jump(square_fp254_12)
 mul_tangent_1:
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     DUP13
@@ -113,7 +113,7 @@ mul_tangent_1:
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
     %stack (out) -> (out, 100, out)
     // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
-    %jump(mul_fp12_sparse)
+    %jump(mul_fp254_12_sparse)
 mul_tangent_2:
     // stack:                  retdest, 0xnm, times,   O, P, Q, out  {100: line}
     PUSH after_double
@@ -133,7 +133,7 @@ after_double:
 
 /// def mul_cord()
 ///     line = cord(P, O, Q)
-///     out = mul_fp12_sparse(out, line)
+///     out = mul_fp254_12_sparse(out, line)
 ///     O += P
 
 mul_cord:
@@ -157,7 +157,7 @@ mul_cord:
     // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     %stack (out) -> (out, 100, out)
     // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
-    %jump(mul_fp12_sparse)
+    %jump(mul_fp254_12_sparse)
 mul_cord_1:
     // stack:                   0xnm, times, O  , P, Q, out
     PUSH after_add
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index d4395aa8..cb3fe066 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -1,18 +1,18 @@
 /// def tate(P: Curve, Q: TwistedCurve) -> Fp12:
 ///     out = miller_loop(P, Q)
 ///     return make_invariant(P, Q)
-global tate:
+global bn254_tate:
     // stack:                      inp, out, retdest
     %stack (inp, out) -> (inp, out, make_invariant, out)
     // stack: inp, out, make_invariant, out, retdest
-    %jump(miller)
+    %jump(bn254_miller)
 
 
 /// def make_invariant(y: Fp12):
 ///     y = first_exp(y)
 ///     y = second_exp(y)
 ///     return final_exponentiation(y)
-global make_invariant:
+make_invariant:
 
 /// map t to t^(p^6 - 1) via 
 ///     def first_exp(t):
@@ -21,21 +21,21 @@ global make_invariant:
     %stack (out) -> (out, 100, first_exp, out)         
     // stack: out, 100, first_exp, out, retdest  {out: y}
     %jump(inv_fp254_12)
-global first_exp:
+first_exp:
     // stack:                             out, retdest  {out: y  , 100: y^-1}
-    %frob_fp12_6
+    %frob_fp254_12_6
     // stack:                             out, retdest  {out: y_6, 100: y^-1}
     %stack (out) -> (out, 100, out, second_exp, out)
     // stack:  out, 100, out, second_exp, out, retdest  {out: y_6, 100: y^-1}
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 
 /// map t to t^(p^2 + 1) via 
 ///     def second_exp(t):
 ///         return t.frob(2) * t
-global second_exp:
-    // stack:                                out, retdest  {out: y}
-    %stack (out) -> (out, 100, out, out, final_exp, out)
-    // stack: out, 100, out, out, final_exp, out, retdest  {out: y}
-    %frob_fp12_2_
-    // stack:      100, out, out, final_exp, out, retdest  {out: y, 100: y_2}
-    %jump(mul_fp12)
+second_exp:
+    // stack:                                      out, retdest  {out: y}
+    %stack (out) -> (out, 100, out, out, bn254_final_exp, out)
+    // stack: out, 100, out, out, bn254_final_exp, out, retdest  {out: y}
+    %frob_fp254_12_2_
+    // stack:      100, out, out, bn254_final_exp, out, retdest  {out: y, 100: y_2}
+    %jump(mul_fp254_12)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
similarity index 98%
rename from evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index ef0d1c96..253904a3 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -43,7 +43,7 @@
 ///
 /// f, f', g, g' consist of six elements on the stack
 
-global mul_fp12:
+global mul_fp254_12:
     // stack:                                inA, inB, out 
     DUP1  
     %offset_fp6 
@@ -61,7 +61,7 @@ global mul_fp12:
     // stack:        f', mul_fp12_1, g', f', inA, inB, out 
     %dup_fp6_7
     // stack:    g', f', mul_fp12_1, g', f', inA, inB, out 
-    %jump(mul_fp6)
+    %jump(mul_fp254_6)
 mul_fp12_1:
     // stack:                f'g', g'  , f', inA, inB, out 
     %dup_fp6_0
@@ -90,7 +90,7 @@ mul_fp12_1:
     // stack:  inA, g, mul_fp12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %load_fp6
     // stack:    f, g, mul_fp12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %jump(mul_fp6)
+    %jump(mul_fp254_6)
 mul_fp12_2:    
     // stack:                  fg, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %store_fp6(12)
@@ -105,7 +105,7 @@ mul_fp12_2:
     // stack:             f,f', g+g', mul_fp12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %add_fp6
     // stack:             f+f', g+g', mul_fp12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %jump(mul_fp6)
+    %jump(mul_fp254_6)
 mul_fp12_3:
     // stack:                       (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %load_fp6(12)
@@ -180,7 +180,7 @@ mul_fp12_3:
 ///
 /// f, f' consist of six elements; G1, G1' consist of two elements; and g0 of one element 
 
-global mul_fp12_sparse:
+global mul_fp254_12_sparse:
     // stack:                                                                    inA, inB, out
     DUP1  
     %offset_fp6
@@ -317,10 +317,10 @@ global mul_fp12_sparse:
 ///
 /// f, f' consist of six elements on the stack
 
-global square_fp12_test:
+global square_fp254_12_test:
     POP
 
-global square_fp12:
+global square_fp254_12:
     // stack:                                                                   inp, out
     DUP1
     // stack:                                                              inp, inp, out
@@ -348,7 +348,7 @@ global square_fp12:
     // stack:     f , square_fp12_1, out', f', square_fp12_2, inp, f, square_fp12_3, out
     %dup_fp6_8
     // stack: f', f , square_fp12_1, out', f', square_fp12_2, inp, f, square_fp12_3, out
-    %jump(mul_fp6)
+    %jump(mul_fp254_6)
 square_fp12_1:
     // stack:                   f'f, out', f', square_fp12_2, inp, f, square_fp12_3, out
     DUP7
@@ -357,7 +357,7 @@ square_fp12_1:
     // stack:                        out', f', square_fp12_2, inp, f, square_fp12_3, out
     POP
     // stack:                              f', square_fp12_2, inp, f, square_fp12_3, out
-    %jump(square_fp6)
+    %jump(square_fp254_6)
 square_fp12_2:
     // stack:                                           f'f', inp, f, square_fp12_3, out
     %sh_fp254_6
@@ -368,7 +368,7 @@ square_fp12_2:
     SWAP13
     SWAP6
     // stack:                                       f, square_fp12_3, sh(f'f'), inp, out
-    %jump(square_fp6)
+    %jump(square_fp254_6)
 square_fp12_3:
     // stack:                                                    ff , sh(f'f'), inp, out
     %add_fp6
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp6_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_6_mul.asm
similarity index 99%
rename from evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp6_mul.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_6_mul.asm
index 3cc563c8..db8b09e0 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp6_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_6_mul.asm
@@ -59,7 +59,7 @@
 /// e2_ = c0d2_ + c0_d2 + c1d1_ +  c1_d1  + c2d0_  + c2_d0
 
 // cost: 157
-global mul_fp6:
+global mul_fp254_6:
     // e2
     // make c0_d2_ + c1_d1_ + c2_d0_
     DUP8
@@ -299,7 +299,7 @@ global mul_fp6:
 /// e2_ = 2(c0_c2 + c2c0_) + 2c1c1_
 
 // cost: 101
-global square_fp6:
+global square_fp254_6:
     /// e0  = (c0^2 - c0_^2) + x0
     /// e0_ = 2c0c0_ + x0_
     ///     where x0_, x0 = %i9 2(c1c2 - c1_c2_), 2(c1_c2 + c1c2_)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index ac4da123..af881631 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -1,42 +1,42 @@
-global test_frob_fp12_1:
+global test_frob_fp254_12_1:
     // stack:         ptr
-    %frob_fp12_1
+    %frob_fp254_12_1
     // stack:         ptr
     %jump(0xdeadbeef)
 
-global test_frob_fp12_2:
+global test_frob_fp254_12_2:
     // stack:         ptr 
     DUP1
     // stack:    ptr, ptr
-    %frob_fp12_2_
+    %frob_fp254_12_2_
     // stack:         ptr
     %jump(0xdeadbeef)
 
-global test_frob_fp12_3:
+global test_frob_fp254_12_3:
     // stack:         ptr
-    %frob_fp12_3
+    %frob_fp254_12_3
     // stack:         ptr
     %jump(0xdeadbeef)
 
-global test_frob_fp12_6:
+global test_frob_fp254_12_6:
     // stack:         ptr
-    %frob_fp12_6
+    %frob_fp254_12_6
     // stack:         ptr
     %jump(0xdeadbeef)
 
 
-/// def frob_fp12_n(f, f'):
-///     g  =             frob_fp6(n, f )
-///     g' = FROB_z[n] * frob_fp6(n, f')
+/// def frob_fp254_12_n(f, f'):
+///     g  =             frob_fp254_6(n, f )
+///     g' = FROB_z[n] * frob_fp254_6(n, f')
 ///     return g, g'
 
-%macro frob_fp12_1
+%macro frob_fp254_12_1
     // stack:           ptr
     DUP1
     // stack:      ptr, ptr 
     %load_fp6
     // stack:        f, ptr
-    %frob_fp6_1
+    %frob_fp254_6_1
     // stack:        g, ptr
     DUP7
     // stack:   ptr, g, ptr
@@ -55,13 +55,13 @@ global test_frob_fp12_6:
 %endmacro 
 
 // Note: this is the only one with distinct input and output pointers
-%macro frob_fp12_2_
+%macro frob_fp254_12_2_
     // stack:           ptr , out
     DUP1
     // stack:      ptr, ptr , out
     %load_fp6
     // stack:        f, ptr , out
-    %frob_fp6_2
+    %frob_fp254_6_2
     // stack:        g, ptr , out
     DUP8
     // stack:   out, g, ptr , out
@@ -79,13 +79,13 @@ global test_frob_fp12_6:
     // stack:                 out
 %endmacro 
 
-%macro frob_fp12_3
+%macro frob_fp254_12_3
     // stack:           ptr
     DUP1
     // stack:      ptr, ptr 
     %load_fp6
     // stack:        f, ptr
-    %frob_fp6_3
+    %frob_fp254_6_3
     // stack:        g, ptr
     DUP7
     // stack:   ptr, g, ptr
@@ -103,7 +103,7 @@ global test_frob_fp12_6:
     // stack:           ptr
 %endmacro
 
-%macro frob_fp12_6
+%macro frob_fp254_12_6
     // stack:           ptr
     DUP1  %offset_fp6
     // stack:     ptr', ptr
@@ -120,14 +120,14 @@ global test_frob_fp12_6:
 
 /// let Z` denote the complex conjugate of Z
 
-/// def frob_fp6_n(C0, C1, C2):
+/// def frob_fp254_6_n(C0, C1, C2):
 ///     if n%2:
 ///         D0, D1, D2 = C0`, FROB_T1[n] * C1`, FROB_T2[n] * C2`
 ///     else: 
 ///         D0, D1, D2 = C0 , FROB_T1[n] * C1 , FROB_T2[n] * C2
 ///     return D0, D1, D2 
 
-%macro frob_fp6_1
+%macro frob_fp254_6_1
     // stack: C0 , C1 , C2
     %conj_fp254_2
     // stack: D0 , C1 , C2
@@ -149,7 +149,7 @@ global test_frob_fp12_6:
     // stack: D0 , D1 , D2
 %endmacro
 
-%macro frob_fp6_2
+%macro frob_fp254_6_2
     // stack: C0, C1, C2
     %swap_fp2_hole_2
     // stack: C2, C1, C0
@@ -165,7 +165,7 @@ global test_frob_fp12_6:
     // stack: D0, D1, D2
 %endmacro
 
-%macro frob_fp6_3
+%macro frob_fp254_6_3
     // stack: C0 , C1 , C2
     %conj_fp254_2
     // stack: D0 , C1 , C2
@@ -189,20 +189,20 @@ global test_frob_fp12_6:
 
 
 %macro frobz_1
-    %frob_fp6_1
+    %frob_fp254_6_1
     PUSH 0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac
     PUSH 0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470
     %mul_fp2_fp6
 %endmacro
 
 %macro frobz_2
-    %frob_fp6_2
+    %frob_fp254_6_2
     PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49
     %mul_fp_fp6
 %endmacro
 
 %macro frobz_3
-    %frob_fp6_3
+    %frob_fp254_6_3
     PUSH 0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101
     PUSH 0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f
     %mul_fp2_fp6
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index a813b98b..4d767761 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -32,7 +32,7 @@ global inv_fp254_12:
     // stack:                         inp, out, retdest
     %stack (inp, out) -> (inp, out, 50, check_inv_fp254_12)
     // stack: inp, out, 50, check_inv_fp254_12, retdest 
-    %jump(mul_fp12)
+    %jump(mul_fp254_12)
 check_inv_fp254_12:
     // stack:        retdest
     PUSH 50  
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 57a4bf8d..389e4cf3 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -84,9 +84,9 @@ fn test_mul_fp254_12() -> Result<()> {
     let g: Fp12 = rng.gen::<Fp12>();
     let h: Fp12 = gen_fp12_sparse(&mut rng);
 
-    let setup_normal: InterpreterSetup = setup_mul_test(in0, in1, out, f, g, "mul_fp12");
-    let setup_sparse: InterpreterSetup = setup_mul_test(in0, in1, out, f, h, "mul_fp12_sparse");
-    let setup_square: InterpreterSetup = setup_mul_test(in0, in1, out, f, f, "square_fp12_test");
+    let setup_normal: InterpreterSetup = setup_mul_test(in0, in1, out, f, g, "mul_fp254_12");
+    let setup_sparse: InterpreterSetup = setup_mul_test(in0, in1, out, f, h, "mul_fp254_12_sparse");
+    let setup_square: InterpreterSetup = setup_mul_test(in0, in1, out, f, f, "square_fp254_12_test");
 
     let intrptr_normal: Interpreter = run_setup_interpreter(setup_normal).unwrap();
     let intrptr_sparse: Interpreter = run_setup_interpreter(setup_sparse).unwrap();
@@ -122,10 +122,10 @@ fn test_frob_fp254_12() -> Result<()> {
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
 
-    let setup_frob_1 = setup_frob_test(ptr, f, "test_frob_fp12_1");
-    let setup_frob_2 = setup_frob_test(ptr, f, "test_frob_fp12_2");
-    let setup_frob_3 = setup_frob_test(ptr, f, "test_frob_fp12_3");
-    let setup_frob_6 = setup_frob_test(ptr, f, "test_frob_fp12_6");
+    let setup_frob_1 = setup_frob_test(ptr, f, "test_frob_fp254_12_1");
+    let setup_frob_2 = setup_frob_test(ptr, f, "test_frob_fp254_12_2");
+    let setup_frob_3 = setup_frob_test(ptr, f, "test_frob_fp254_12_3");
+    let setup_frob_6 = setup_frob_test(ptr, f, "test_frob_fp254_12_6");
 
     let intrptr_frob_1: Interpreter = run_setup_interpreter(setup_frob_1).unwrap();
     let intrptr_frob_2: Interpreter = run_setup_interpreter(setup_frob_2).unwrap();
@@ -276,7 +276,7 @@ fn test_tate() -> Result<()> {
     ];
 
     let setup = InterpreterSetup {
-        label: "tate".to_string(),
+        label: "bn254_tate".to_string(),
         stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
         memory: vec![(ptr, inputs)],
     };

From 61ac0eff46f0c8d7f582b352509dd78047e89b02 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sun, 5 Feb 2023 17:40:45 -0800
Subject: [PATCH 174/201] fmt

---
 evm/src/cpu/kernel/tests/bn254.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 389e4cf3..e8886e8e 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -86,7 +86,8 @@ fn test_mul_fp254_12() -> Result<()> {
 
     let setup_normal: InterpreterSetup = setup_mul_test(in0, in1, out, f, g, "mul_fp254_12");
     let setup_sparse: InterpreterSetup = setup_mul_test(in0, in1, out, f, h, "mul_fp254_12_sparse");
-    let setup_square: InterpreterSetup = setup_mul_test(in0, in1, out, f, f, "square_fp254_12_test");
+    let setup_square: InterpreterSetup =
+        setup_mul_test(in0, in1, out, f, f, "square_fp254_12_test");
 
     let intrptr_normal: Interpreter = run_setup_interpreter(setup_normal).unwrap();
     let intrptr_sparse: Interpreter = run_setup_interpreter(setup_sparse).unwrap();

From 1c73e23824dbbaa2c997044848c10314a55d8fa3 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sun, 5 Feb 2023 17:43:31 -0800
Subject: [PATCH 175/201] fp -> fp254

---
 .../bn254/curve_arithmetic/final_power.asm    |   2 +-
 .../bn254/curve_arithmetic/miller_loop.asm    |   2 +-
 .../bn254/field_arithmetic/degree_12_mul.asm  | 228 +++++++++---------
 .../bn254/field_arithmetic/frobenius.asm      |  88 +++----
 .../curve/bn254/field_arithmetic/inverse.asm  |   6 +-
 .../asm/curve/bn254/field_arithmetic/util.asm |  74 +++---
 6 files changed, 200 insertions(+), 200 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
index bfb5b812..7f22587a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
@@ -26,7 +26,7 @@ global bn254_final_exp:
     // stack:                  val, retdest
     %stack (val) -> (val, 300, val)
     // stack:        val, 300, val, retdest
-    %move_fp12
+    %move_fp254_12
     // stack:             300, val, retdest
     %stack () -> (1, 1, 1)
     // stack:    1, 1, 1, 300, val, retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 21b53a4e..63387cb4 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -31,7 +31,7 @@ global bn254_miller:
     // stack: out, 1, ptr, out, retdest
     %mstore_kernel_general
     // stack:         ptr, out, retdest
-    %load_fp6
+    %load_fp254_6
     // stack:        P, Q, out, retdest
     %stack (P: 2) -> (0, 53, P, P)
     // stack: 0, 53, O, P, Q, out, retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index 253904a3..7126ba8b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -4,7 +4,7 @@
 
 /// cost: 1063
 
-/// fp6 functions:
+/// fp254_6 functions:
 ///  fn    | num | ops | cost
 ///  -------------------------
 ///  load  |   8 |  40 |  320
@@ -46,92 +46,92 @@
 global mul_fp254_12:
     // stack:                                inA, inB, out 
     DUP1  
-    %offset_fp6 
+    %offset_fp254_6 
     // stack:                          inA', inA, inB, out 
-    %load_fp6
+    %load_fp254_6
     // stack:                            f', inA, inB, out 
     DUP8  
-    %offset_fp6
+    %offset_fp254_6
     // stack:                      inB', f', inA, inB, out 
-    %load_fp6
+    %load_fp254_6
     // stack:                        g', f', inA, inB, out 
-    PUSH mul_fp12_1
-    // stack:            mul_fp12_1, g', f', inA, inB, out 
-    %dup_fp6_7
-    // stack:        f', mul_fp12_1, g', f', inA, inB, out 
-    %dup_fp6_7
-    // stack:    g', f', mul_fp12_1, g', f', inA, inB, out 
+    PUSH mul_fp254_12_1
+    // stack:            mul_fp254_12_1, g', f', inA, inB, out 
+    %dup_fp254_6_7
+    // stack:        f', mul_fp254_12_1, g', f', inA, inB, out 
+    %dup_fp254_6_7
+    // stack:    g', f', mul_fp254_12_1, g', f', inA, inB, out 
     %jump(mul_fp254_6)
-mul_fp12_1:
+mul_fp254_12_1:
     // stack:                f'g', g'  , f', inA, inB, out 
-    %dup_fp6_0
+    %dup_fp254_6_0
     // stack:          f'g', f'g', g'  , f', inA, inB, out 
-    %store_fp6_sh(0)                                    
+    %store_fp254_6_sh(0)                                    
     // stack:                f'g', g'  , f', inA, inB, out  {0: sh(f'g')}
-    %store_fp6(6)
+    %store_fp254_6(6)
     // stack:                      g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     DUP13
     // stack:                 inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     DUP15  
     // stack:            inB, inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %load_fp6
+    %load_fp254_6
     // stack:             g , inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %swap_fp6_hole
+    %swap_fp254_6_hole
     // stack:             g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %dup_fp6_7
+    %dup_fp254_6_7
     // stack:           g,g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %add_fp6
+    %add_fp254_6
     // stack:           g+g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %swap_fp6_hole
+    %swap_fp254_6_hole
     // stack:              g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    PUSH mul_fp12_2
-    // stack:  mul_fp12_2, g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    PUSH mul_fp254_12_2
+    // stack:  mul_fp254_12_2, g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     SWAP7
-    // stack:  inA, g, mul_fp12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %load_fp6
-    // stack:    f, g, mul_fp12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:  inA, g, mul_fp254_12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    %load_fp254_6
+    // stack:    f, g, mul_fp254_12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %jump(mul_fp254_6)
-mul_fp12_2:    
+mul_fp254_12_2:    
     // stack:                  fg, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %store_fp6(12)
+    %store_fp254_6(12)
     // stack:                      g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %swap_fp6
+    %swap_fp254_6
     // stack:                      f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    PUSH mul_fp12_3
-    // stack:          mul_fp12_3, f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    PUSH mul_fp254_12_3
+    // stack:          mul_fp254_12_3, f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     SWAP13
-    // stack:          inA, f', g+g', mul_fp12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %load_fp6
-    // stack:             f,f', g+g', mul_fp12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %add_fp6
-    // stack:             f+f', g+g', mul_fp12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:          inA, f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    %load_fp254_6
+    // stack:             f,f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    %add_fp254_6
+    // stack:             f+f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %jump(mul_fp254_6)
-mul_fp12_3:
+mul_fp254_12_3:
     // stack:                       (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %load_fp6(12)
+    %load_fp254_6(12)
     // stack:                   fg, (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %swap_fp6
+    %swap_fp254_6
     // stack:                   (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %dup_fp6_6
+    %dup_fp254_6_6
     // stack:               fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %load_fp6(6)
+    %load_fp254_6(6)
     // stack:          f'g',fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %add_fp6
+    %add_fp254_6
     // stack:          f'g'+fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %subr_fp6
+    %subr_fp254_6
     // stack:       (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
     DUP14  
-    %offset_fp6 
+    %offset_fp254_6 
     // stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
-    %store_fp6
+    %store_fp254_6
     // stack:                                 fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %load_fp6(0)
+    %load_fp254_6(0)
     // stack:                      sh(f'g') , fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %add_fp6
+    %add_fp254_6
     // stack:                      sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     DUP8
     // stack:                 out, sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %store_fp6
+    %store_fp254_6
     // stack:                                     inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %pop2  
     JUMP
@@ -143,7 +143,7 @@ mul_fp12_3:
 
 /// cost: 645
 
-/// fp6 functions:
+/// fp254_6 functions:
 ///  fn      | num | ops | cost
 ///  ---------------------------
 ///  load    |   2 |  40 |   80
@@ -151,8 +151,8 @@ mul_fp12_3:
 ///  dup     |   4 |   6 |   24
 ///  swap    |   4 |  16 |   64
 ///  add     |   4 |  16 |   64
-///  mul_fp  |   2 |  21 |   42
-///  mul_fp2 |   4 |  59 |  236
+///  mul_fp254_  |   2 |  21 |   42
+///  mul_fp254_2 |   4 |  59 |  236
 ///
 /// lone stack operations:
 ///  op    | num 
@@ -183,91 +183,91 @@ mul_fp12_3:
 global mul_fp254_12_sparse:
     // stack:                                                                    inA, inB, out
     DUP1  
-    %offset_fp6
+    %offset_fp254_6
     // stack:                                                              inA', inA, inB, out
-    %load_fp6
+    %load_fp254_6
     // stack:                                                                f', inA, inB, out
     DUP8 
     // stack:                                                           inB, f', inA, inB, out
     DUP8
     // stack:                                                      inA, inB, f', inA, inB, out
-    %load_fp6
+    %load_fp254_6
     // stack:                                                        f, inB, f', inA, inB, out
     DUP16
     // stack:                                                   out, f, inB, f', inA, inB, out
-    %dup_fp6_8 
+    %dup_fp254_6_8 
     // stack:                                               f', out, f, inB, f', inA, inB, out
     DUP14
     // stack:                                          inB, f', out, f, inB, f', inA, inB, out
-    %dup_fp6_8
+    %dup_fp254_6_8
     // stack:                                       f, inB, f', out, f, inB, f', inA, inB, out
     DUP7
     // stack:                                  inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %dup_fp6_8
+    %dup_fp254_6_8
     // stack:                              f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %dup_fp6_7
+    %dup_fp254_6_7
     // stack:                           f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     DUP13 
     // stack:                      inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     %mload_kernel_general
     // stack:                      g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp_fp6
+    %mul_fp254__fp254_6
     // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %swap_fp6
+    %swap_fp254_6
     // stack:                    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
     DUP13
     %add_const(8)
     // stack:           inB2,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %load_fp2
+    %load_fp254_2
     // stack:           G2  ,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp2_fp6_sh2
+    %mul_fp254_2_fp254_6_sh2
     // stack:           G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %add_fp6
+    %add_fp254_6
     // stack:           G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %swap_fp6_hole
+    %swap_fp254_6_hole
     // stack:          f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
     DUP7  %add_const(2)
     // stack: inB1,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    %load_fp2
+    %load_fp254_2
     // stack:  G1 ,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp2_fp6_sh
+    %mul_fp254_2_fp254_6_sh
     // stack:  G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    %add_fp6_hole
+    %add_fp254_6_hole
     // stack:      G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
     DUP14
     // stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    %store_fp6
+    %store_fp254_6
     // stack:                                          inB, f', out, f, inB, f', inA, inB, out
     %mload_kernel_general
     // stack:                                          g0 , f', out, f, inB, f', inA, inB, out
-    %mul_fp_fp6
+    %mul_fp254__fp254_6
     // stack:                                          g0 * f', out, f, inB, f', inA, inB, out
-    %swap_fp6_hole
+    %swap_fp254_6_hole
     // stack:                                        f  , out, g0 * f', inB, f', inA, inB, out
     DUP14
     %add_const(8)
     // stack:                               inB2,    f  , out, g0 * f', inB, f', inA, inB, out
-    %load_fp2
+    %load_fp254_2
     // stack:                                G2 ,    f  , out, g0 * f', inB, f', inA, inB, out
-    %mul_fp2_fp6_sh
+    %mul_fp254_2_fp254_6_sh
     // stack:                                G2 * sh(f) , out, g0 * f', inB, f', inA, inB, out
-    %add_fp6_hole
+    %add_fp254_6_hole
     // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
-    %swap_fp6_hole
+    %swap_fp254_6_hole
     // stack:                                    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
     DUP7
     %add_const(2)
     // stack:                           inB1,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
-    %load_fp2
+    %load_fp254_2
     // stack:                            G1 ,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
-    %mul_fp2_fp6_sh
+    %mul_fp254_2_fp254_6_sh
     // stack:                            G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
-    %add_fp6_hole
+    %add_fp254_6_hole
     // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
     DUP9
-    %offset_fp6
+    %offset_fp254_6
     // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
-    %store_fp6
+    %store_fp254_6
     // stack:                                                                    inA, inB, out
     %pop3
     JUMP
@@ -279,7 +279,7 @@ global mul_fp254_12_sparse:
 
 /// cost: 646
 
-/// fp6 functions:
+/// fp254_6 functions:
 ///  fn    | num | ops | cost
 ///  -------------------------
 ///  load  |   2 |  40 |   80
@@ -324,58 +324,58 @@ global square_fp254_12:
     // stack:                                                                   inp, out
     DUP1
     // stack:                                                              inp, inp, out
-    %load_fp6 
+    %load_fp254_6 
     // stack:                                                                f, inp, out
-    PUSH square_fp12_3
-    // stack:                                                 square_fp12_3, f, inp, out
+    PUSH square_fp254_12_3
+    // stack:                                                 square_fp254_12_3, f, inp, out
     SWAP7
-    // stack:                                                 inp, f, square_fp12_3, out
-    PUSH square_fp12_2
-    // stack:                                  square_fp12_2, inp, f, square_fp12_3, out 
-    %dup_fp6_2
-    // stack:                              f , square_fp12_2, inp, f, square_fp12_3, out
+    // stack:                                                 inp, f, square_fp254_12_3, out
+    PUSH square_fp254_12_2
+    // stack:                                  square_fp254_12_2, inp, f, square_fp254_12_3, out 
+    %dup_fp254_6_2
+    // stack:                              f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     DUP16
-    %offset_fp6
-    // stack:                        out', f , square_fp12_2, inp, f, square_fp12_3, out
-    PUSH square_fp12_1
-    // stack:         square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
+    %offset_fp254_6
+    // stack:                        out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
+    PUSH square_fp254_12_1
+    // stack:         square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     DUP10
-    %offset_fp6
-    // stack:   inp', square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
-    %load_fp6
-    // stack:     f', square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
-    %swap_fp6_hole_2
-    // stack:     f , square_fp12_1, out', f', square_fp12_2, inp, f, square_fp12_3, out
-    %dup_fp6_8
-    // stack: f', f , square_fp12_1, out', f', square_fp12_2, inp, f, square_fp12_3, out
+    %offset_fp254_6
+    // stack:   inp', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
+    %load_fp254_6
+    // stack:     f', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
+    %swap_fp254_6_hole_2
+    // stack:     f , square_fp254_12_1, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    %dup_fp254_6_8
+    // stack: f', f , square_fp254_12_1, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
     %jump(mul_fp254_6)
-square_fp12_1:
-    // stack:                   f'f, out', f', square_fp12_2, inp, f, square_fp12_3, out
+square_fp254_12_1:
+    // stack:                   f'f, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
     DUP7
-    // stack:             out', f'f, out', f', square_fp12_2, inp, f, square_fp12_3, out
-    %store_fp6_double
-    // stack:                        out', f', square_fp12_2, inp, f, square_fp12_3, out
+    // stack:             out', f'f, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    %store_fp254_6_double
+    // stack:                        out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
     POP
-    // stack:                              f', square_fp12_2, inp, f, square_fp12_3, out
+    // stack:                              f', square_fp254_12_2, inp, f, square_fp254_12_3, out
     %jump(square_fp254_6)
-square_fp12_2:
-    // stack:                                           f'f', inp, f, square_fp12_3, out
+square_fp254_12_2:
+    // stack:                                           f'f', inp, f, square_fp254_12_3, out
     %sh_fp254_6
-    // stack:                                       sh(f'f'), inp, f, square_fp12_3, out
-    %swap_fp6_hole
-    // stack:                                       f, inp, sh(f'f'), square_fp12_3, out
+    // stack:                                       sh(f'f'), inp, f, square_fp254_12_3, out
+    %swap_fp254_6_hole
+    // stack:                                       f, inp, sh(f'f'), square_fp254_12_3, out
     SWAP6
     SWAP13
     SWAP6
-    // stack:                                       f, square_fp12_3, sh(f'f'), inp, out
+    // stack:                                       f, square_fp254_12_3, sh(f'f'), inp, out
     %jump(square_fp254_6)
-square_fp12_3:
+square_fp254_12_3:
     // stack:                                                    ff , sh(f'f'), inp, out
-    %add_fp6
+    %add_fp254_6
     // stack:                                                    ff + sh(f'f'), inp, out
     DUP8
     // stack:                                               out, ff + sh(f'f'), inp, out
-    %store_fp6
+    %store_fp254_6
     // stack:                                                                   inp, out
     %pop2
     JUMP
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index af881631..8c062b2a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -34,23 +34,23 @@ global test_frob_fp254_12_6:
     // stack:           ptr
     DUP1
     // stack:      ptr, ptr 
-    %load_fp6
+    %load_fp254_6
     // stack:        f, ptr
     %frob_fp254_6_1
     // stack:        g, ptr
     DUP7
     // stack:   ptr, g, ptr
-    %store_fp6
+    %store_fp254_6
     // stack:           ptr
-    DUP1  %offset_fp6
+    DUP1  %offset_fp254_6
     // stack:     ptr', ptr
-    %load_fp6
+    %load_fp254_6
     // stack:       f', ptr
     %frobz_1
     // stack:       g', ptr
-    DUP7  %offset_fp6
+    DUP7  %offset_fp254_6
     // stack: ptr', g', ptr
-    %store_fp6
+    %store_fp254_6
     // stack:           ptr
 %endmacro 
 
@@ -59,23 +59,23 @@ global test_frob_fp254_12_6:
     // stack:           ptr , out
     DUP1
     // stack:      ptr, ptr , out
-    %load_fp6
+    %load_fp254_6
     // stack:        f, ptr , out
     %frob_fp254_6_2
     // stack:        g, ptr , out
     DUP8
     // stack:   out, g, ptr , out
-    %store_fp6 
+    %store_fp254_6 
     // stack:           ptr , out
-    %offset_fp6
+    %offset_fp254_6
     // stack:           ptr', out
-    %load_fp6
+    %load_fp254_6
     // stack:             f', out
     %frobz_2
     // stack:             g', out
-    DUP7  %offset_fp6
+    DUP7  %offset_fp254_6
     // stack:       out', g', out
-    %store_fp6
+    %store_fp254_6
     // stack:                 out
 %endmacro 
 
@@ -83,37 +83,37 @@ global test_frob_fp254_12_6:
     // stack:           ptr
     DUP1
     // stack:      ptr, ptr 
-    %load_fp6
+    %load_fp254_6
     // stack:        f, ptr
     %frob_fp254_6_3
     // stack:        g, ptr
     DUP7
     // stack:   ptr, g, ptr
-    %store_fp6
+    %store_fp254_6
     // stack:           ptr
-    DUP1  %offset_fp6
+    DUP1  %offset_fp254_6
     // stack:     ptr', ptr
-    %load_fp6
+    %load_fp254_6
     // stack:       f', ptr
     %frobz_3
     // stack:       g', ptr
-    DUP7  %offset_fp6
+    DUP7  %offset_fp254_6
     // stack: ptr', g', ptr
-    %store_fp6
+    %store_fp254_6
     // stack:           ptr
 %endmacro
 
 %macro frob_fp254_12_6
     // stack:           ptr
-    DUP1  %offset_fp6
+    DUP1  %offset_fp254_6
     // stack:     ptr', ptr
-    %load_fp6
+    %load_fp254_6
     // stack:       f', ptr
     %frobz_6
     // stack:       g', ptr
-    DUP7  %offset_fp6
+    DUP7  %offset_fp254_6
     // stack: ptr', g', ptr
-    %store_fp6
+    %store_fp254_6
     // stack:           ptr
 %endmacro
 
@@ -131,37 +131,37 @@ global test_frob_fp254_12_6:
     // stack: C0 , C1 , C2
     %conj_fp254_2
     // stack: D0 , C1 , C2
-    %swap_fp2_hole_2
+    %swap_fp254_2_hole_2
     // stack: C2 , C1 , D0
     %conj_fp254_2
     // stack: C2`, C1 , D0
     %frobt2_1
     // stack: D2 , C1 , D0
-    %swap_fp2_hole_2
+    %swap_fp254_2_hole_2
     // stack: D0 , C1 , D2
-    %swap_fp2
+    %swap_fp254_2
     // stack: C1 , D0 , D2
     %conj_fp254_2
     // stack: C1`, D0 , D2
     %frobt1_1
     // stack: D1 , D0 , D2
-    %swap_fp2
+    %swap_fp254_2
     // stack: D0 , D1 , D2
 %endmacro
 
 %macro frob_fp254_6_2
     // stack: C0, C1, C2
-    %swap_fp2_hole_2
+    %swap_fp254_2_hole_2
     // stack: C2, C1, C0
     %frobt2_2
     // stack: D2, C1, C0
-    %swap_fp2_hole_2
+    %swap_fp254_2_hole_2
     // stack: C0, C1, D2
-    %swap_fp2
+    %swap_fp254_2
     // stack: C1, C0, D2
     %frobt1_2
     // stack: D1, C0, D2
-    %swap_fp2
+    %swap_fp254_2
     // stack: D0, D1, D2
 %endmacro
 
@@ -169,21 +169,21 @@ global test_frob_fp254_12_6:
     // stack: C0 , C1 , C2
     %conj_fp254_2
     // stack: D0 , C1 , C2
-    %swap_fp2_hole_2
+    %swap_fp254_2_hole_2
     // stack: C2 , C1 , D0
     %conj_fp254_2
     // stack: C2`, C1 , D0
     %frobt2_3
     // stack: D2 , C1 , D0
-    %swap_fp2_hole_2
+    %swap_fp254_2_hole_2
     // stack: D0 , C1 , D2
-    %swap_fp2
+    %swap_fp254_2
     // stack: C1 , D0 , D2
     %conj_fp254_2
     // stack: C1`, D0 , D2
     %frobt1_3
     // stack: D1 , D0 , D2
-    %swap_fp2
+    %swap_fp254_2
     // stack: D0 , D1 , D2
 %endmacro
 
@@ -192,59 +192,59 @@ global test_frob_fp254_12_6:
     %frob_fp254_6_1
     PUSH 0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac
     PUSH 0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470
-    %mul_fp2_fp6
+    %mul_fp254_2_fp254_6
 %endmacro
 
 %macro frobz_2
     %frob_fp254_6_2
     PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49
-    %mul_fp_fp6
+    %mul_fp254__fp254_6
 %endmacro
 
 %macro frobz_3
     %frob_fp254_6_3
     PUSH 0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101
     PUSH 0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f
-    %mul_fp2_fp6
+    %mul_fp254_2_fp254_6
 %endmacro
 
 %macro frobz_6
     PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46
-    %mul_fp_fp6
+    %mul_fp254__fp254_6
 %endmacro
 
 
 %macro frobt1_1
     PUSH 0x16c9e55061ebae204ba4cc8bd75a079432ae2a1d0b7c9dce1665d51c640fcba2
     PUSH 0x2fb347984f7911f74c0bec3cf559b143b78cc310c2c3330c99e39557176f553d
-    %mul_fp2
+    %mul_fp254_2
 %endmacro
 
 %macro frobt2_1
     PUSH 0x2c145edbe7fd8aee9f3a80b03b0b1c923685d2ea1bdec763c13b4711cd2b8126
     PUSH 0x5b54f5e64eea80180f3c0b75a181e84d33365f7be94ec72848a1f55921ea762
-    %mul_fp2
+    %mul_fp254_2
 %endmacro
 
 %macro frobt1_2
     PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48
-    %mul_fp_fp2
+    %mul_fp254__fp254_2
 %endmacro
 
 %macro frobt2_2
     PUSH 0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe
-    %mul_fp_fp2
+    %mul_fp254__fp254_2
 %endmacro
 
 
 %macro frobt1_3
     PUSH 0x4f1de41b3d1766fa9f30e6dec26094f0fdf31bf98ff2631380cab2baaa586de
     PUSH 0x856e078b755ef0abaff1c77959f25ac805ffd3d5d6942d37b746ee87bdcfb6d
-    %mul_fp2
+    %mul_fp254_2
 %endmacro
 
 %macro frobt2_3
     PUSH 0x23d5e999e1910a12feb0f6ef0cd21d04a44a9e08737f96e55fe3ed9d730c239f
     PUSH 0xbc58c6611c08dab19bee0f7b5b2444ee633094575b06bcb0e1a92bc3ccbf066
-    %mul_fp2
+    %mul_fp254_2
 %endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 4d767761..3e26d355 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -28,7 +28,7 @@ global inv_fp254_12:
     // stack:                   f^-1, inp, out, retdest
     DUP14
     // stack:              out, f^-1, inp, out, retdest
-    %store_fp12
+    %store_fp254_12
     // stack:                         inp, out, retdest
     %stack (inp, out) -> (inp, out, 50, check_inv_fp254_12)
     // stack: inp, out, 50, check_inv_fp254_12, retdest 
@@ -36,9 +36,9 @@ global inv_fp254_12:
 check_inv_fp254_12:
     // stack:        retdest
     PUSH 50  
-    %load_fp12
+    %load_fp254_12
     // stack: unit?, retdest
-    %assert_eq_unit_fp12
+    %assert_eq_unit_fp254_12
     // stack:        retdest
     JUMP
 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index 092d38aa..e2bed9bf 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -1,11 +1,11 @@
-%macro offset_fp6
+%macro offset_fp254_6
     %add_const(6)
 %endmacro
 
-// fp2 macros
+// fp254_2 macros
 
 // cost: 2 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
-%macro load_fp2
+%macro load_fp254_2
     // stack:       ptr
     DUP1  
     %add_const(1)
@@ -28,7 +28,7 @@
     // stack: a, -b 
 %endmacro
 
-%macro swap_fp2
+%macro swap_fp254_2
     // stack: a , a_, b , b_
     SWAP2
     // stack: b , a_, a , b_
@@ -40,7 +40,7 @@
     // stack: b , b_, a , a_
 %endmacro
 
-%macro swap_fp2_hole_2
+%macro swap_fp254_2_hole_2
     // stack: a , a_, X, b , b_
     SWAP4
     // stack: b , a_, X, a , b_
@@ -52,7 +52,7 @@
     // stack: b , b_, X, a , a_
 %endmacro
 
-%macro mul_fp_fp2
+%macro mul_fp254__fp254_2
     // stack:    c, x, y
     SWAP2
     // stack:    y, x, c 
@@ -89,7 +89,7 @@
     // stack: 9b + a, 9a - b 
 %endmacro
 
-%macro mul_fp2
+%macro mul_fp254_2
     // stack:          a, b, c, d
     DUP4
     DUP3
@@ -115,10 +115,10 @@
     // stack:    ac - bd, bc + ad
 %endmacro 
 
-// fp6 macros
+// fp254_6 macros
 
 // cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
-%macro load_fp6
+%macro load_fp254_6
     // stack:                         ptr
     DUP1  
     %add_const(4)
@@ -152,7 +152,7 @@
 %endmacro
 
 // cost: 6 loads + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
-%macro load_fp6(ptr)
+%macro load_fp254_6(ptr)
     // stack:
     PUSH $ptr  
     %add_const(5)
@@ -186,7 +186,7 @@
 %endmacro
 
 // cost: 6 stores + 6 swaps/dups + 5 adds = 6*4 + 6*1 + 5*2 = 40
-%macro store_fp6
+%macro store_fp254_6
     // stack:      ptr, x0, x1, x2, x3, x4 , x5
     SWAP5
     // stack:       x4, x0, x1, x2, x3, ptr, x5
@@ -221,7 +221,7 @@
 %endmacro
 
 // cost: 6 stores + 7 swaps/dups + 5 adds + 6 doubles = 6*4 + 7*1 + 5*2 + 6*2 = 53
-%macro store_fp6_double
+%macro store_fp254_6_double
     // stack:        ptr, x0, x1, x2, x3, x4, x5
     SWAP6
     // stack:         x5, x0, x1, x2, x3, x4, ptr
@@ -276,7 +276,7 @@
 %endmacro
 
 // cost: 6 stores + 6 pushes + 5 adds = 6*4 + 6*1 + 5*2 = 40
-%macro store_fp6(ptr)
+%macro store_fp254_6(ptr)
     // stack:       x0, x1, x2, x3, x4, x5
     PUSH $ptr
     // stack: ind0, x0, x1, x2, x3, x4, x5
@@ -310,7 +310,7 @@
 %endmacro
 
 // cost: store (40) + i9 (9) = 49
-%macro store_fp6_sh(ptr)
+%macro store_fp254_6_sh(ptr)
     // stack:       x0, x1, x2, x3, x4, x5
     PUSH $ptr  
     %add_const(2)
@@ -346,7 +346,7 @@
 %endmacro
 
 // cost: 6
-%macro dup_fp6_0
+%macro dup_fp254_6_0
     // stack:       f: 6
     DUP6
     DUP6
@@ -358,7 +358,7 @@
 %endmacro 
 
 // cost: 6
-%macro dup_fp6_2
+%macro dup_fp254_6_2
     // stack:       X: 2, f: 6
     DUP8
     DUP8
@@ -370,7 +370,7 @@
 %endmacro 
 
 // cost: 6
-%macro dup_fp6_6
+%macro dup_fp254_6_6
     // stack:       X: 6, f: 6
     DUP12
     DUP12
@@ -382,7 +382,7 @@
 %endmacro
 
 // cost: 6
-%macro dup_fp6_7
+%macro dup_fp254_6_7
     // stack:       X: 7, f: 6
     DUP13
     DUP13
@@ -394,7 +394,7 @@
 %endmacro
 
 // cost: 6
-%macro dup_fp6_8
+%macro dup_fp254_6_8
     // stack:       X: 8, f: 6
     DUP14
     DUP14
@@ -406,7 +406,7 @@
 %endmacro
 
 // cost: 16
-%macro swap_fp6
+%macro swap_fp254_6
     // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
     SWAP6
     // stack: g0, f1, f2, f3, f4, f5, f0, g1, g2, g3, g4, g5
@@ -433,9 +433,9 @@
 %endmacro
 
 // cost: 16
-// swap two fp6 elements with a stack term separating them
+// swap two fp254_6 elements with a stack term separating them
 //    (f: 6, X, g: 6) -> (g: 6, X, f: 6)
-%macro swap_fp6_hole
+%macro swap_fp254_6_hole
     // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
     SWAP7
     // stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5
@@ -462,9 +462,9 @@
 %endmacro
 
 // cost: 16
-// swap two fp6 elements with two stack terms separating them
+// swap two fp254_6 elements with two stack terms separating them
 //    (f: 6, X: 2, g: 6) -> (g: 6, X: 2, f: 6)
-%macro swap_fp6_hole_2
+%macro swap_fp254_6_hole_2
     // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
     SWAP8
     // stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5
@@ -513,7 +513,7 @@
 %endmacro
 
 // cost: 16
-%macro add_fp6
+%macro add_fp254_6
     // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
     SWAP7
     ADDFP254
@@ -540,9 +540,9 @@
 %endmacro
 
 // cost: 18
-// add two fp6 elements with a to-be-popped stack term separating them
+// add two fp254_6 elements with a to-be-popped stack term separating them
 //    (f: 6, X, g: 6) -> (f + g: 6)
-%macro add_fp6_hole
+%macro add_fp254_6_hole
     // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
     SWAP8
     ADDFP254
@@ -571,7 +571,7 @@
 %endmacro
 
 // *reversed argument subtraction* cost: 17
-%macro subr_fp6
+%macro subr_fp254_6
     // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
     SWAP7
     SUBFP254
@@ -599,7 +599,7 @@
 %endmacro
 
 // cost: 21
-%macro mul_fp_fp6
+%macro mul_fp254__fp254_6
     // stack: c , f0,      f1,    f2,     f3,     f4,     f5
     SWAP6
     DUP7
@@ -639,7 +639,7 @@
 /// G1 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i
 /// G2 = (a+bi)(f2+f2_i) = (af2 - bf2_) + (bf2 + af2_)i
 
-%macro mul_fp2_fp6
+%macro mul_fp254_2_fp254_6
     // stack:             a, b, f0, f0_, f1, f1_, f2, f2_
     DUP2
     DUP5
@@ -730,7 +730,7 @@
 /// G1 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i
 /// G2 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i
 
-%macro mul_fp2_fp6_sh
+%macro mul_fp254_2_fp254_6_sh
     // stack:             a, b, f0, f0_, f1, f1_, f2, f2_
     DUP6
     DUP3
@@ -824,7 +824,7 @@
 /// G1 = (a+bi)(f2+f2_i) = (cf2 - df2_) + (df2 + cf2_)i
 /// G2 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i
 
-%macro mul_fp2_fp6_sh2
+%macro mul_fp254_2_fp254_6_sh2
     // stack:             a, b, f0, f0_, f1, f1_, f2, f2_
     DUP4
     DUP3 
@@ -908,7 +908,7 @@
     // stack:                   g0, g0_, g1, g1_, g2, g2_
 %endmacro
 
-%macro load_fp12
+%macro load_fp254_12
     // stack:                                                          ptr
     DUP1  
     %add_const(10)
@@ -971,7 +971,7 @@
     // stack:   x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11
 %endmacro
 
-%macro store_fp12
+%macro store_fp254_12
     // stack:        ptr, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11
     SWAP11
     // stack:        x10, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
@@ -1035,9 +1035,9 @@
     // stack:                                                            
 %endmacro
 
-/// moves fp12 from src..src+12 to dest..dest+12
+/// moves fp254_12 from src..src+12 to dest..dest+12
 /// these should not overlap. leaves dest on stack
-%macro move_fp12
+%macro move_fp254_12
     // stack:              src, dest
     DUP1  
     // stack:       ind00, src, dest
@@ -1157,7 +1157,7 @@
     %mstore_kernel_general
 %endmacro
 
-%macro assert_eq_unit_fp12
+%macro assert_eq_unit_fp254_12
     %assert_eq_const(1)
     %assert_zero
     %assert_zero

From 9f808fc78c9b870710e12c96d221a9344c38635e Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 7 Feb 2023 08:53:23 -0800
Subject: [PATCH 176/201] align

---
 .../bn254/field_arithmetic/degree_12_mul.asm  | 124 +++++++++---------
 1 file changed, 62 insertions(+), 62 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index 7126ba8b..ff6fb72c 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -44,95 +44,95 @@
 /// f, f', g, g' consist of six elements on the stack
 
 global mul_fp254_12:
-    // stack:                                inA, inB, out 
+    // stack:                                   inA, inB, out 
     DUP1  
     %offset_fp254_6 
-    // stack:                          inA', inA, inB, out 
+    // stack:                             inA', inA, inB, out 
     %load_fp254_6
-    // stack:                            f', inA, inB, out 
+    // stack:                               f', inA, inB, out 
     DUP8  
     %offset_fp254_6
-    // stack:                      inB', f', inA, inB, out 
+    // stack:                         inB', f', inA, inB, out 
     %load_fp254_6
-    // stack:                        g', f', inA, inB, out 
+    // stack:                           g', f', inA, inB, out 
     PUSH mul_fp254_12_1
-    // stack:            mul_fp254_12_1, g', f', inA, inB, out 
+    // stack:           mul_fp254_12_1, g', f', inA, inB, out 
     %dup_fp254_6_7
-    // stack:        f', mul_fp254_12_1, g', f', inA, inB, out 
+    // stack:       f', mul_fp254_12_1, g', f', inA, inB, out 
     %dup_fp254_6_7
-    // stack:    g', f', mul_fp254_12_1, g', f', inA, inB, out 
+    // stack:   g', f', mul_fp254_12_1, g', f', inA, inB, out 
     %jump(mul_fp254_6)
 mul_fp254_12_1:
-    // stack:                f'g', g'  , f', inA, inB, out 
+    // stack:                   f'g', g'  , f', inA, inB, out 
     %dup_fp254_6_0
-    // stack:          f'g', f'g', g'  , f', inA, inB, out 
+    // stack:             f'g', f'g', g'  , f', inA, inB, out 
     %store_fp254_6_sh(0)                                    
-    // stack:                f'g', g'  , f', inA, inB, out  {0: sh(f'g')}
+    // stack:                   f'g', g'  , f', inA, inB, out  {0: sh(f'g')}
     %store_fp254_6(6)
-    // stack:                      g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:                         g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     DUP13
-    // stack:                 inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:                    inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     DUP15  
-    // stack:            inB, inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:               inB, inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %load_fp254_6
-    // stack:             g , inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:                g , inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %swap_fp254_6_hole
-    // stack:             g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:                g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %dup_fp254_6_7
-    // stack:           g,g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:              g,g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %add_fp254_6
-    // stack:           g+g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:              g+g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %swap_fp254_6_hole
-    // stack:              g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:                 g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     PUSH mul_fp254_12_2
-    // stack:  mul_fp254_12_2, g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack: mul_fp254_12_2, g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     SWAP7
-    // stack:  inA, g, mul_fp254_12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack: inA, g, mul_fp254_12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %load_fp254_6
-    // stack:    f, g, mul_fp254_12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:   f, g, mul_fp254_12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %jump(mul_fp254_6)
 mul_fp254_12_2:    
-    // stack:                  fg, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:                     fg, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %store_fp254_6(12)
-    // stack:                      g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                         g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %swap_fp254_6
-    // stack:                      f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                         f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     PUSH mul_fp254_12_3
-    // stack:          mul_fp254_12_3, f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:         mul_fp254_12_3, f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     SWAP13
-    // stack:          inA, f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:         inA, f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %load_fp254_6
-    // stack:             f,f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:            f,f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %add_fp254_6
-    // stack:             f+f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:            f+f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %jump(mul_fp254_6)
 mul_fp254_12_3:
-    // stack:                       (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                          (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %load_fp254_6(12)
-    // stack:                   fg, (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                      fg, (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %swap_fp254_6
-    // stack:                   (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                      (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %dup_fp254_6_6
-    // stack:               fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                  fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %load_fp254_6(6)
-    // stack:          f'g',fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:             f'g',fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %add_fp254_6
-    // stack:          f'g'+fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:             f'g'+fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %subr_fp254_6
-    // stack:       (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
+    // stack:          (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
     DUP14  
     %offset_fp254_6 
-    // stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
+    // stack:    out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
     %store_fp254_6
-    // stack:                                 fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                                    fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %load_fp254_6(0)
-    // stack:                      sh(f'g') , fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                         sh(f'g') , fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %add_fp254_6
-    // stack:                      sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                         sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     DUP8
-    // stack:                 out, sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                    out, sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %store_fp254_6
-    // stack:                                     inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                                        inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %pop2  
     JUMP
 
@@ -321,22 +321,22 @@ global square_fp254_12_test:
     POP
 
 global square_fp254_12:
-    // stack:                                                                   inp, out
+    // stack:                                                                               inp, out
     DUP1
-    // stack:                                                              inp, inp, out
+    // stack:                                                                          inp, inp, out
     %load_fp254_6 
-    // stack:                                                                f, inp, out
+    // stack:                                                                            f, inp, out
     PUSH square_fp254_12_3
-    // stack:                                                 square_fp254_12_3, f, inp, out
+    // stack:                                                         square_fp254_12_3, f, inp, out
     SWAP7
-    // stack:                                                 inp, f, square_fp254_12_3, out
+    // stack:                                                         inp, f, square_fp254_12_3, out
     PUSH square_fp254_12_2
-    // stack:                                  square_fp254_12_2, inp, f, square_fp254_12_3, out 
+    // stack:                                      square_fp254_12_2, inp, f, square_fp254_12_3, out 
     %dup_fp254_6_2
-    // stack:                              f , square_fp254_12_2, inp, f, square_fp254_12_3, out
+    // stack:                                  f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     DUP16
     %offset_fp254_6
-    // stack:                        out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
+    // stack:                            out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     PUSH square_fp254_12_1
     // stack:         square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     DUP10
@@ -350,32 +350,32 @@ global square_fp254_12:
     // stack: f', f , square_fp254_12_1, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
     %jump(mul_fp254_6)
 square_fp254_12_1:
-    // stack:                   f'f, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    // stack:                       f'f, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
     DUP7
-    // stack:             out', f'f, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    // stack:                 out', f'f, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
     %store_fp254_6_double
-    // stack:                        out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    // stack:                            out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
     POP
-    // stack:                              f', square_fp254_12_2, inp, f, square_fp254_12_3, out
+    // stack:                                  f', square_fp254_12_2, inp, f, square_fp254_12_3, out
     %jump(square_fp254_6)
 square_fp254_12_2:
-    // stack:                                           f'f', inp, f, square_fp254_12_3, out
+    // stack:                                                   f'f', inp, f, square_fp254_12_3, out
     %sh_fp254_6
-    // stack:                                       sh(f'f'), inp, f, square_fp254_12_3, out
+    // stack:                                               sh(f'f'), inp, f, square_fp254_12_3, out
     %swap_fp254_6_hole
-    // stack:                                       f, inp, sh(f'f'), square_fp254_12_3, out
+    // stack:                                               f, inp, sh(f'f'), square_fp254_12_3, out
     SWAP6
     SWAP13
     SWAP6
-    // stack:                                       f, square_fp254_12_3, sh(f'f'), inp, out
+    // stack:                                               f, square_fp254_12_3, sh(f'f'), inp, out
     %jump(square_fp254_6)
 square_fp254_12_3:
-    // stack:                                                    ff , sh(f'f'), inp, out
+    // stack:                                                                ff , sh(f'f'), inp, out
     %add_fp254_6
-    // stack:                                                    ff + sh(f'f'), inp, out
+    // stack:                                                                ff + sh(f'f'), inp, out
     DUP8
-    // stack:                                               out, ff + sh(f'f'), inp, out
+    // stack:                                                           out, ff + sh(f'f'), inp, out
     %store_fp254_6
-    // stack:                                                                   inp, out
+    // stack:                                                                               inp, out
     %pop2
     JUMP

From 57146c83bce09848bbcef1daa60290eb324c1d11 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 7 Feb 2023 09:18:49 -0800
Subject: [PATCH 177/201] miller loop test

---
 evm/src/bn254_arithmetic.rs       |  7 ++-
 evm/src/cpu/kernel/tests/bn254.rs | 94 +++++++++++++++----------------
 2 files changed, 52 insertions(+), 49 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index cf26e208..8fb72d03 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -1,3 +1,4 @@
+use std::mem::transmute;
 use std::ops::{Add, Div, Mul, Neg, Sub};
 
 use ethereum_types::U256;
@@ -432,7 +433,6 @@ impl Fp12 {
             z1: -self.z1,
         }
     }
-
     /// The nth frobenius endomorphism of a p^q field is given by mapping
     ///     x to x^(p^n)
     /// which sends a + bz: Fp12 to
@@ -468,6 +468,11 @@ impl Fp12 {
         let prod_except_six = prod_evens_except_six.scale(prod_odds_over_phi);
         self.conj().scale(prod_except_six)
     }
+
+    pub fn on_stack(self) -> Vec<U256> {
+        let f: [U256; 12] = unsafe { transmute(self) };
+        f.into_iter().collect()
+    }
 }
 
 #[allow(clippy::suspicious_arithmetic_impl)]
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index e8886e8e..daed596d 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -1,4 +1,3 @@
-use std::mem::transmute;
 use std::ops::Range;
 
 use anyhow::Result;
@@ -6,7 +5,7 @@ use ethereum_types::U256;
 use rand::Rng;
 
 use crate::bn254_arithmetic::{Fp, Fp12, Fp2};
-use crate::bn254_pairing::{gen_fp12_sparse, tate, Curve, TwistedCurve};
+use crate::bn254_pairing::{gen_fp12_sparse, miller_loop, tate, Curve, TwistedCurve};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
 use crate::memory::segments::Segment;
@@ -48,19 +47,9 @@ fn extract_kernel_output(range: Range<usize>, interpreter: Interpreter<'static>)
     output
 }
 
-fn fp12_on_stack(f: Fp12) -> Vec<U256> {
-    let f: [U256; 12] = unsafe { transmute(f) };
-    f.into_iter().collect()
-}
-
-fn setup_mul_test(
-    in0: usize,
-    in1: usize,
-    out: usize,
-    f: Fp12,
-    g: Fp12,
-    label: &str,
-) -> InterpreterSetup {
+fn setup_mul_test(out: usize, f: Fp12, g: Fp12, label: &str) -> InterpreterSetup {
+    let in0: usize = 64;
+    let in1: usize = 76;
     InterpreterSetup {
         label: label.to_string(),
         stack: vec![
@@ -69,14 +58,12 @@ fn setup_mul_test(
             U256::from(out),
             U256::from(0xdeadbeefu32),
         ],
-        memory: vec![(in0, fp12_on_stack(f)), (in1, fp12_on_stack(g))],
+        memory: vec![(in0, f.on_stack()), (in1, g.on_stack())],
     }
 }
 
 #[test]
 fn test_mul_fp254_12() -> Result<()> {
-    let in0: usize = 64;
-    let in1: usize = 76;
     let out: usize = 88;
 
     let mut rng = rand::thread_rng();
@@ -84,10 +71,9 @@ fn test_mul_fp254_12() -> Result<()> {
     let g: Fp12 = rng.gen::<Fp12>();
     let h: Fp12 = gen_fp12_sparse(&mut rng);
 
-    let setup_normal: InterpreterSetup = setup_mul_test(in0, in1, out, f, g, "mul_fp254_12");
-    let setup_sparse: InterpreterSetup = setup_mul_test(in0, in1, out, f, h, "mul_fp254_12_sparse");
-    let setup_square: InterpreterSetup =
-        setup_mul_test(in0, in1, out, f, f, "square_fp254_12_test");
+    let setup_normal: InterpreterSetup = setup_mul_test(out, f, g, "mul_fp254_12");
+    let setup_sparse: InterpreterSetup = setup_mul_test(out, f, h, "mul_fp254_12_sparse");
+    let setup_square: InterpreterSetup = setup_mul_test(out, f, f, "square_fp254_12_test");
 
     let intrptr_normal: Interpreter = run_setup_interpreter(setup_normal).unwrap();
     let intrptr_sparse: Interpreter = run_setup_interpreter(setup_sparse).unwrap();
@@ -97,9 +83,9 @@ fn test_mul_fp254_12() -> Result<()> {
     let out_sparse: Vec<U256> = extract_kernel_output(out..out + 12, intrptr_sparse);
     let out_square: Vec<U256> = extract_kernel_output(out..out + 12, intrptr_square);
 
-    let exp_normal: Vec<U256> = fp12_on_stack(f * g);
-    let exp_sparse: Vec<U256> = fp12_on_stack(f * h);
-    let exp_square: Vec<U256> = fp12_on_stack(f * f);
+    let exp_normal: Vec<U256> = (f * g).on_stack();
+    let exp_sparse: Vec<U256> = (f * h).on_stack();
+    let exp_square: Vec<U256> = (f * f).on_stack();
 
     assert_eq!(out_normal, exp_normal);
     assert_eq!(out_sparse, exp_sparse);
@@ -112,7 +98,7 @@ fn setup_frob_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
     InterpreterSetup {
         label: label.to_string(),
         stack: vec![U256::from(ptr)],
-        memory: vec![(ptr, fp12_on_stack(f))],
+        memory: vec![(ptr, f.on_stack())],
     }
 }
 
@@ -138,10 +124,10 @@ fn test_frob_fp254_12() -> Result<()> {
     let out_frob_3: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_3);
     let out_frob_6: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_6);
 
-    let exp_frob_1: Vec<U256> = fp12_on_stack(f.frob(1));
-    let exp_frob_2: Vec<U256> = fp12_on_stack(f.frob(2));
-    let exp_frob_3: Vec<U256> = fp12_on_stack(f.frob(3));
-    let exp_frob_6: Vec<U256> = fp12_on_stack(f.frob(6));
+    let exp_frob_1: Vec<U256> = f.frob(1).on_stack();
+    let exp_frob_2: Vec<U256> = f.frob(2).on_stack();
+    let exp_frob_3: Vec<U256> = f.frob(3).on_stack();
+    let exp_frob_6: Vec<U256> = f.frob(6).on_stack();
 
     assert_eq!(out_frob_1, exp_frob_1);
     assert_eq!(out_frob_2, exp_frob_2);
@@ -162,11 +148,11 @@ fn test_inv_fp254_12() -> Result<()> {
     let setup = InterpreterSetup {
         label: "inv_fp254_12".to_string(),
         stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)],
-        memory: vec![(ptr, fp12_on_stack(f))],
+        memory: vec![(ptr, f.on_stack())],
     };
     let interpreter: Interpreter = run_setup_interpreter(setup).unwrap();
     let output: Vec<U256> = extract_kernel_output(inv..inv + 12, interpreter);
-    let expected: Vec<U256> = fp12_on_stack(f.inv());
+    let expected: Vec<U256> = f.inv().on_stack();
 
     assert_eq!(output, expected);
 
@@ -197,20 +183,6 @@ fn test_inv_fp254_12() -> Result<()> {
 //     Ok(())
 // }
 
-// #[test]
-// fn test_miller() -> Result<()> {
-//     let p: Curve = curve_generator();
-//     let q: TwistedCurve = twisted_curve_generator();
-
-//     let stack = make_tate_stack(p, q);
-//     let output = run_setup_interpreter("test_miller", stack);
-//     let expected = fp12_on_stack(miller_loop(p, q));
-
-//     assert_eq!(output, expected);
-
-//     Ok(())
-// }
-
 // The curve is cyclic with generator (1, 2)
 pub const CURVE_GENERATOR: Curve = {
     Curve {
@@ -263,6 +235,33 @@ pub const TWISTED_GENERATOR: TwistedCurve = {
     }
 };
 
+#[test]
+fn test_miller() -> Result<()> {
+    let ptr: usize = 300;
+    let out: usize = 400;
+    let inputs: Vec<U256> = vec![
+        CURVE_GENERATOR.x.val,
+        CURVE_GENERATOR.y.val,
+        TWISTED_GENERATOR.x.re.val,
+        TWISTED_GENERATOR.x.im.val,
+        TWISTED_GENERATOR.y.re.val,
+        TWISTED_GENERATOR.y.im.val,
+    ];
+
+    let setup = InterpreterSetup {
+        label: "bn254_miller".to_string(),
+        stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
+        memory: vec![(ptr, inputs)],
+    };
+    let interpreter = run_setup_interpreter(setup).unwrap();
+    let output: Vec<U256> = extract_kernel_output(out..out + 12, interpreter);
+    let expected = miller_loop(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
+
+    assert_eq!(output, expected);
+
+    Ok(())
+}
+
 #[test]
 fn test_tate() -> Result<()> {
     let ptr: usize = 300;
@@ -283,8 +282,7 @@ fn test_tate() -> Result<()> {
     };
     let interpreter = run_setup_interpreter(setup).unwrap();
     let output: Vec<U256> = extract_kernel_output(out..out + 12, interpreter);
-    // let output: Vec<U256> = interpreter.stack().to_vec();
-    let expected = fp12_on_stack(tate(CURVE_GENERATOR, TWISTED_GENERATOR));
+    let expected = tate(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
 
     assert_eq!(output, expected);
 

From 69afed9297231d0ff7f0cdb74c2d4fae957b4ee9 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 7 Feb 2023 14:54:07 -0800
Subject: [PATCH 178/201] refactor

---
 evm/src/bn254_pairing.rs                      |   4 +-
 evm/src/cpu/kernel/aggregator.rs              |   3 +-
 ...final_power.asm => invariant_exponent.asm} |  35 +-
 .../bn254/curve_arithmetic/miller_loop.asm    | 283 ----------------
 .../bn254/curve_arithmetic/tate_pairing.asm   | 315 ++++++++++++++++--
 evm/src/cpu/kernel/tests/bn254.rs             | 115 +++----
 6 files changed, 378 insertions(+), 377 deletions(-)
 rename evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/{final_power.asm => invariant_exponent.asm} (90%)
 delete mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm

diff --git a/evm/src/bn254_pairing.rs b/evm/src/bn254_pairing.rs
index 71f9575f..bf5db74a 100644
--- a/evm/src/bn254_pairing.rs
+++ b/evm/src/bn254_pairing.rs
@@ -41,7 +41,7 @@ pub struct TwistedCurve {
 // The tate pairing takes a point each from the curve and its twist and outputs an Fp12 element
 pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 {
     let miller_output = miller_loop(p, q);
-    invariance_inducing_power(miller_output)
+    invariant_exponent(miller_output)
 }
 
 /// Standard code for miller loop, can be found on page 99 at this url:
@@ -116,7 +116,7 @@ pub fn gen_fp12_sparse<R: Rng + ?Sized>(rng: &mut R) -> Fp12 {
 ///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
 /// where 0 < a0, a1, a2 < p. Then the final power is given by
 ///     y = y_3 * (y^a2)_2 * (y^-a1)_1 * (y^-a0)
-pub fn invariance_inducing_power(f: Fp12) -> Fp12 {
+pub fn invariant_exponent(f: Fp12) -> Fp12 {
     let mut y = f.frob(6) / f;
     y = y.frob(2) * y;
     let (y_a2, y_a1, y_a0) = get_custom_powers(y);
diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 3c998449..7fbb9f08 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -27,8 +27,7 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/final_power.asm"),
-        include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/invariant_exponent.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/degree_6_mul.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
similarity index 90%
rename from evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
index 7f22587a..3176dbf5 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_power.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
@@ -22,7 +22,40 @@
 ///     y1 = y1.frob(1)
 ///     return y * y2 * y1 * y0
 
-global bn254_final_exp:
+/// def bn254_invariant_exponent(y: Fp12):
+///     y = first_exp(y)
+///     y = second_exp(y)
+///     return final_exponentiation(y)
+
+global bn254_invariant_exponent:
+
+/// map t to t^(p^6 - 1) via 
+///     def first_exp(t):
+///         return t.frob(6) / t
+    // stack:                      out, retdest  {out: y}
+    %stack (out) -> (out, 100, first_exp, out)         
+    // stack: out, 100, first_exp, out, retdest  {out: y}
+    %jump(inv_fp254_12)
+first_exp:
+    // stack:                             out, retdest  {out: y  , 100: y^-1}
+    %frob_fp254_12_6
+    // stack:                             out, retdest  {out: y_6, 100: y^-1}
+    %stack (out) -> (out, 100, out, second_exp, out)
+    // stack:  out, 100, out, second_exp, out, retdest  {out: y_6, 100: y^-1}
+    %jump(mul_fp254_12)
+
+/// map t to t^(p^2 + 1) via 
+///     def second_exp(t):
+///         return t.frob(2) * t
+second_exp:
+    // stack:                                out, retdest  {out: y}
+    %stack (out) -> (out, 100, out, out, final_exp, out)
+    // stack: out, 100, out, out, final_exp, out, retdest  {out: y}
+    %frob_fp254_12_2_
+    // stack:      100, out, out, final_exp, out, retdest  {out: y, 100: y_2}
+    %jump(mul_fp254_12)
+
+final_exp:
     // stack:                  val, retdest
     %stack (val) -> (val, 300, val)
     // stack:        val, 300, val, retdest
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
deleted file mode 100644
index 63387cb4..00000000
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ /dev/null
@@ -1,283 +0,0 @@
-/// def miller(P, Q):
-///     miller_init()
-///     miller_loop()
-///
-/// def miller_init():
-///     out = 1
-///     O = P
-///     times = 61
-///
-/// def miller_loop():
-///     while times:
-///         0xnm = load(miller_data)
-///         while 0xnm > 0x20:
-///             miller_one()
-///         while 0xnm:
-///             miller_zero()
-///         times -= 1
-///
-/// def miller_one():
-///     0xnm -= 0x20
-///     mul_tangent()
-///     mul_cord()
-///
-/// def miller_zero():
-///     0xnm -= 1
-///     mul_tangent()
-
-global bn254_miller:
-    // stack:         ptr, out, retdest
-    %stack (ptr, out) -> (out, 1, ptr, out)
-    // stack: out, 1, ptr, out, retdest
-    %mstore_kernel_general
-    // stack:         ptr, out, retdest
-    %load_fp254_6
-    // stack:        P, Q, out, retdest
-    %stack (P: 2) -> (0, 53, P, P)
-    // stack: 0, 53, O, P, Q, out, retdest
-    // the head 0 lets miller_loop start with POP
-miller_loop:
-    POP
-    // stack:          times  , O, P, Q, out, retdest
-    DUP1  
-    ISZERO
-    // stack:  break?, times  , O, P, Q, out, retdest
-    %jumpi(miller_return)
-    // stack:          times  , O, P, Q, out, retdest
-    %sub_const(1)
-    // stack:          times-1, O, P, Q, out, retdest
-    DUP1
-    // stack: times-1, times-1, O, P, Q, out, retdest
-    %mload_kernel_code(miller_data)
-    // stack:    0xnm, times-1, O, P, Q, out, retdest
-    %jump(miller_one)
-miller_return:
-    // stack: times, O, P, Q, out, retdest
-    %stack (times, O: 2, P: 2, Q: 4, out, retdest) -> (retdest)
-    // stack:                      retdest
-    JUMP 
-
-miller_one:
-    // stack:               0xnm, times, O, P, Q, out, retdest
-    DUP1  
-    %lt_const(0x20) 
-    // stack:        skip?, 0xnm, times, O, P, Q, out, retdest
-    %jumpi(miller_zero)
-    // stack:               0xnm, times, O, P, Q, out, retdest
-    %sub_const(0x20)
-    // stack:           0x{n-1}m, times, O, P, Q, out, retdest
-    PUSH mul_cord
-    // stack: mul_cord, 0x{n-1}m, times, O, P, Q, out, retdest
-    %jump(mul_tangent)
-
-miller_zero:
-    // stack:              m  , times, O, P, Q, out, retdest
-    DUP1  
-    ISZERO
-    // stack:       skip?, m  , times, O, P, Q, out, retdest
-    %jumpi(miller_loop)
-    // stack:              m  , times, O, P, Q, out, retdest
-    %sub_const(1)
-    // stack:              m-1, times, O, P, Q, out, retdest
-    PUSH miller_zero
-    // stack: miller_zero, m-1, times, O, P, Q, out, retdest
-    %jump(mul_tangent)
-
-
-/// def mul_tangent()
-///     out = square_fp254_12(out)
-///     line = tangent(O, Q)
-///     out = mul_fp254_12_sparse(out, line)
-///     O += O
-
-mul_tangent:
-    // stack:                                              retdest, 0xnm, times, O, P, Q, out
-    PUSH mul_tangent_2  
-    DUP13  
-    PUSH mul_tangent_1
-    // stack:           mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    %stack (mul_tangent_1, out) -> (out, out, mul_tangent_1, out)
-    // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    %jump(square_fp254_12)
-mul_tangent_1:
-    // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP13
-    DUP13
-    DUP13
-    DUP13
-    // stack:        Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP11  
-    DUP11
-    // stack:     O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    %tangent
-    // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
-    %stack (out) -> (out, 100, out)
-    // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
-    %jump(mul_fp254_12_sparse)
-mul_tangent_2:
-    // stack:                  retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    PUSH after_double
-    // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    DUP6  
-    DUP6
-    // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    %jump(ec_double)
-after_double:
-    // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    SWAP5
-    POP
-    SWAP5
-    POP
-    // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
-    JUMP
-
-/// def mul_cord()
-///     line = cord(P, O, Q)
-///     out = mul_fp254_12_sparse(out, line)
-///     O += P
-
-mul_cord:
-    // stack:                            0xnm, times, O, P, Q, out
-    PUSH mul_cord_1
-    // stack:                mul_cord_1, 0xnm, times, O, P, Q, out
-    DUP11  
-    DUP11  
-    DUP11  
-    DUP11
-    // stack:             Q, mul_cord_1, 0xnm, times, O, P, Q, out
-    DUP9  
-    DUP9
-    // stack:          O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
-    DUP13  
-    DUP13
-    // stack:       P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
-    %cord 
-    // stack:                mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
-    DUP12
-    // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
-    %stack (out) -> (out, 100, out)
-    // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
-    %jump(mul_fp254_12_sparse)
-mul_cord_1:
-    // stack:                   0xnm, times, O  , P, Q, out
-    PUSH after_add
-    // stack:        after_add, 0xnm, times, O  , P, Q, out
-    DUP7  
-    DUP7  
-    DUP7  
-    DUP7
-    // stack: O , P, after_add, 0xnm, times, O  , P, Q, out
-    %jump(ec_add_valid_points)
-after_add:
-    // stack:            O + P, 0xnm, times, O  , P, Q, out
-    SWAP4
-    POP
-    SWAP4
-    POP
-    // stack:                   0xnm, times, O+P, P, Q, out
-    %jump(miller_one)
-
-
-/// def tangent(px, py, qx, qy):
-///     return sparse_store(
-///         py**2 - 9, 
-///         (-3px**2) * qx, 
-///         (2py)     * qy,
-///     )
-
-%macro tangent
-    // stack:                px, py, qx, qx_,  qy, qy_
-    %stack (px, py) -> (py, py , 9, px, py)
-    // stack:    py, py , 9, px, py, qx, qx_,  qy, qy_
-    MULFP254
-    // stack:      py^2 , 9, px, py, qx, qx_,  qy, qy_
-    SUBFP254
-    // stack:      py^2 - 9, px, py, qx, qx_,  qy, qy_
-    %mstore_kernel_general(100)
-    // stack:                px, py, qx, qx_,  qy, qy_
-    DUP1  
-    MULFP254
-    // stack:              px^2, py, qx, qx_,  qy, qy_
-    PUSH 3  
-    MULFP254
-    // stack:            3*px^2, py, qx, qx_,  qy, qy_
-    PUSH 0  
-    SUBFP254
-    // stack:           -3*px^2, py, qx, qx_,  qy, qy_
-    SWAP2
-    // stack:            qx, py, -3px^2, qx_,  qy, qy_
-    DUP3  
-    MULFP254
-    // stack:   (-3*px^2)qx, py, -3px^2, qx_,  qy, qy_ 
-    %mstore_kernel_general(102)
-    // stack:                py, -3px^2, qx_,  qy, qy_ 
-    PUSH 2  
-    MULFP254
-    // stack:               2py, -3px^2, qx_,  qy, qy_ 
-    SWAP3 
-    // stack:                qy, -3px^2, qx_, 2py, qy_ 
-    DUP4  
-    MULFP254
-    // stack:           (2py)qy, -3px^2, qx_, 2py, qy_ 
-    %mstore_kernel_general(108)
-    // stack:                    -3px^2, qx_, 2py, qy_ 
-    MULFP254
-    // stack:                   (-3px^2)*qx_, 2py, qy_ 
-    %mstore_kernel_general(103)
-    // stack:                                 2py, qy_ 
-    MULFP254
-    // stack:                                (2py)*qy_ 
-    %mstore_kernel_general(109)
-%endmacro
-
-/// def cord(p1x, p1y, p2x, p2y, qx, qy):
-///     return sparse_store(
-///         p1y*p2x - p2y*p1x, 
-///         (p2y - p1y) * qx, 
-///         (p1x - p2x) * qy,
-///     )
-
-%macro cord
-    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    DUP1  
-    DUP5  
-    MULFP254
-    // stack:           p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    DUP3  
-    DUP5  
-    MULFP254
-    // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    SUBFP254
-    // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    %mstore_kernel_general(100)
-    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    SWAP3
-    // stack:                    p2y , p1y, p2x , p1x, qx, qx_, qy, qy_
-    SUBFP254
-    // stack:                    p2y - p1y, p2x , p1x, qx, qx_, qy, qy_
-    SWAP2
-    // stack:                    p1x , p2x, p2y - p1y, qx, qx_, qy, qy_
-    SUBFP254
-    // stack:                    p1x - p2x, p2y - p1y, qx, qx_, qy, qy_
-    SWAP4
-    // stack:                    qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
-    DUP5
-    MULFP254
-    // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
-    %mstore_kernel_general(108)
-    // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
-    SWAP1
-    // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
-    DUP2
-    MULFP254
-    // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
-    %mstore_kernel_general(102)
-    // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
-    MULFP254
-    // stack:                            (p2y - p1y)qx_, p1x - p2x, qy_
-    %mstore_kernel_general(103)
-    // stack:                                            p1x - p2x, qy_
-    MULFP254
-    // stack:                                           (p1x - p2x)*qy_
-    %mstore_kernel_general(109)
-%endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index cb3fe066..356f002a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -1,41 +1,292 @@
+/// def miller(P, Q):
+///     miller_init()
+///     miller_loop()
+///
+/// def miller_init():
+///     out = 1
+///     O = P
+///     times = 61
+///
+/// def miller_loop():
+///     while times:
+///         0xnm = load(miller_data)
+///         while 0xnm > 0x20:
+///             miller_one()
+///         while 0xnm:
+///             miller_zero()
+///         times -= 1
+///
+/// def miller_one():
+///     0xnm -= 0x20
+///     mul_tangent()
+///     mul_cord()
+///
+/// def miller_zero():
+///     0xnm -= 1
+///     mul_tangent()
+
 /// def tate(P: Curve, Q: TwistedCurve) -> Fp12:
 ///     out = miller_loop(P, Q)
-///     return make_invariant(P, Q)
+///     return bn254_invariant_exponent(P, Q)
 global bn254_tate:
     // stack:                      inp, out, retdest
-    %stack (inp, out) -> (inp, out, make_invariant, out)
-    // stack: inp, out, make_invariant, out, retdest
+    %stack (inp, out) -> (inp, out, bn254_invariant_exponent, out)
+    // stack: inp, out, bn254_invariant_exponent, out, retdest
     %jump(bn254_miller)
 
+global bn254_miller:
+    // stack:         ptr, out, retdest
+    %stack (ptr, out) -> (out, 1, ptr, out)
+    // stack: out, 1, ptr, out, retdest
+    %mstore_kernel_general
+    // stack:         ptr, out, retdest
+    %load_fp254_6
+    // stack:        P, Q, out, retdest
+    %stack (P: 2) -> (0, 53, P, P)
+    // stack: 0, 53, O, P, Q, out, retdest
+    // the head 0 lets miller_loop start with POP
+miller_loop:
+    POP
+    // stack:          times  , O, P, Q, out, retdest
+    DUP1  
+    ISZERO
+    // stack:  break?, times  , O, P, Q, out, retdest
+    %jumpi(miller_return)
+    // stack:          times  , O, P, Q, out, retdest
+    %sub_const(1)
+    // stack:          times-1, O, P, Q, out, retdest
+    DUP1
+    // stack: times-1, times-1, O, P, Q, out, retdest
+    %mload_kernel_code(miller_data)
+    // stack:    0xnm, times-1, O, P, Q, out, retdest
+    %jump(miller_one)
+miller_return:
+    // stack: times, O, P, Q, out, retdest
+    %stack (times, O: 2, P: 2, Q: 4, out, retdest) -> (retdest)
+    // stack:                      retdest
+    JUMP 
 
-/// def make_invariant(y: Fp12):
-///     y = first_exp(y)
-///     y = second_exp(y)
-///     return final_exponentiation(y)
-make_invariant:
+miller_one:
+    // stack:               0xnm, times, O, P, Q, out, retdest
+    DUP1  
+    %lt_const(0x20) 
+    // stack:        skip?, 0xnm, times, O, P, Q, out, retdest
+    %jumpi(miller_zero)
+    // stack:               0xnm, times, O, P, Q, out, retdest
+    %sub_const(0x20)
+    // stack:           0x{n-1}m, times, O, P, Q, out, retdest
+    PUSH mul_cord
+    // stack: mul_cord, 0x{n-1}m, times, O, P, Q, out, retdest
+    %jump(mul_tangent)
 
-/// map t to t^(p^6 - 1) via 
-///     def first_exp(t):
-///         return t.frob(6) / t
-    // stack:                      out, retdest  {out: y}
-    %stack (out) -> (out, 100, first_exp, out)         
-    // stack: out, 100, first_exp, out, retdest  {out: y}
-    %jump(inv_fp254_12)
-first_exp:
-    // stack:                             out, retdest  {out: y  , 100: y^-1}
-    %frob_fp254_12_6
-    // stack:                             out, retdest  {out: y_6, 100: y^-1}
-    %stack (out) -> (out, 100, out, second_exp, out)
-    // stack:  out, 100, out, second_exp, out, retdest  {out: y_6, 100: y^-1}
-    %jump(mul_fp254_12)
+miller_zero:
+    // stack:              m  , times, O, P, Q, out, retdest
+    DUP1  
+    ISZERO
+    // stack:       skip?, m  , times, O, P, Q, out, retdest
+    %jumpi(miller_loop)
+    // stack:              m  , times, O, P, Q, out, retdest
+    %sub_const(1)
+    // stack:              m-1, times, O, P, Q, out, retdest
+    PUSH miller_zero
+    // stack: miller_zero, m-1, times, O, P, Q, out, retdest
+    %jump(mul_tangent)
 
-/// map t to t^(p^2 + 1) via 
-///     def second_exp(t):
-///         return t.frob(2) * t
-second_exp:
-    // stack:                                      out, retdest  {out: y}
-    %stack (out) -> (out, 100, out, out, bn254_final_exp, out)
-    // stack: out, 100, out, out, bn254_final_exp, out, retdest  {out: y}
-    %frob_fp254_12_2_
-    // stack:      100, out, out, bn254_final_exp, out, retdest  {out: y, 100: y_2}
-    %jump(mul_fp254_12)
+
+/// def mul_tangent()
+///     out = square_fp254_12(out)
+///     line = tangent(O, Q)
+///     out = mul_fp254_12_sparse(out, line)
+///     O += O
+
+mul_tangent:
+    // stack:                                              retdest, 0xnm, times, O, P, Q, out
+    PUSH mul_tangent_2  
+    DUP13  
+    PUSH mul_tangent_1
+    // stack:           mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    %stack (mul_tangent_1, out) -> (out, out, mul_tangent_1, out)
+    // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    %jump(square_fp254_12)
+mul_tangent_1:
+    // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    DUP13
+    DUP13
+    DUP13
+    DUP13
+    // stack:        Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    DUP11  
+    DUP11
+    // stack:     O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    %tangent
+    // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
+    %stack (out) -> (out, 100, out)
+    // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
+    %jump(mul_fp254_12_sparse)
+mul_tangent_2:
+    // stack:                  retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    PUSH after_double
+    // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    DUP6  
+    DUP6
+    // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    %jump(ec_double)
+after_double:
+    // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    SWAP5
+    POP
+    SWAP5
+    POP
+    // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
+    JUMP
+
+/// def mul_cord()
+///     line = cord(P, O, Q)
+///     out = mul_fp254_12_sparse(out, line)
+///     O += P
+
+mul_cord:
+    // stack:                            0xnm, times, O, P, Q, out
+    PUSH mul_cord_1
+    // stack:                mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP11  
+    DUP11  
+    DUP11  
+    DUP11
+    // stack:             Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP9  
+    DUP9
+    // stack:          O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    DUP13  
+    DUP13
+    // stack:       P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    %cord 
+    // stack:                mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
+    DUP12
+    // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
+    %stack (out) -> (out, 100, out)
+    // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
+    %jump(mul_fp254_12_sparse)
+mul_cord_1:
+    // stack:                   0xnm, times, O  , P, Q, out
+    PUSH after_add
+    // stack:        after_add, 0xnm, times, O  , P, Q, out
+    DUP7  
+    DUP7  
+    DUP7  
+    DUP7
+    // stack: O , P, after_add, 0xnm, times, O  , P, Q, out
+    %jump(ec_add_valid_points)
+after_add:
+    // stack:            O + P, 0xnm, times, O  , P, Q, out
+    SWAP4
+    POP
+    SWAP4
+    POP
+    // stack:                   0xnm, times, O+P, P, Q, out
+    %jump(miller_one)
+
+
+/// def tangent(px, py, qx, qy):
+///     return sparse_store(
+///         py**2 - 9, 
+///         (-3px**2) * qx, 
+///         (2py)     * qy,
+///     )
+
+%macro tangent
+    // stack:                px, py, qx, qx_,  qy, qy_
+    %stack (px, py) -> (py, py , 9, px, py)
+    // stack:    py, py , 9, px, py, qx, qx_,  qy, qy_
+    MULFP254
+    // stack:      py^2 , 9, px, py, qx, qx_,  qy, qy_
+    SUBFP254
+    // stack:      py^2 - 9, px, py, qx, qx_,  qy, qy_
+    %mstore_kernel_general(100)
+    // stack:                px, py, qx, qx_,  qy, qy_
+    DUP1  
+    MULFP254
+    // stack:              px^2, py, qx, qx_,  qy, qy_
+    PUSH 3  
+    MULFP254
+    // stack:            3*px^2, py, qx, qx_,  qy, qy_
+    PUSH 0  
+    SUBFP254
+    // stack:           -3*px^2, py, qx, qx_,  qy, qy_
+    SWAP2
+    // stack:            qx, py, -3px^2, qx_,  qy, qy_
+    DUP3  
+    MULFP254
+    // stack:   (-3*px^2)qx, py, -3px^2, qx_,  qy, qy_ 
+    %mstore_kernel_general(102)
+    // stack:                py, -3px^2, qx_,  qy, qy_ 
+    PUSH 2  
+    MULFP254
+    // stack:               2py, -3px^2, qx_,  qy, qy_ 
+    SWAP3 
+    // stack:                qy, -3px^2, qx_, 2py, qy_ 
+    DUP4  
+    MULFP254
+    // stack:           (2py)qy, -3px^2, qx_, 2py, qy_ 
+    %mstore_kernel_general(108)
+    // stack:                    -3px^2, qx_, 2py, qy_ 
+    MULFP254
+    // stack:                   (-3px^2)*qx_, 2py, qy_ 
+    %mstore_kernel_general(103)
+    // stack:                                 2py, qy_ 
+    MULFP254
+    // stack:                                (2py)*qy_ 
+    %mstore_kernel_general(109)
+%endmacro
+
+/// def cord(p1x, p1y, p2x, p2y, qx, qy):
+///     return sparse_store(
+///         p1y*p2x - p2y*p1x, 
+///         (p2y - p1y) * qx, 
+///         (p1x - p2x) * qy,
+///     )
+
+%macro cord
+    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    DUP1  
+    DUP5  
+    MULFP254
+    // stack:           p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    DUP3  
+    DUP5  
+    MULFP254
+    // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    SUBFP254
+    // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    %mstore_kernel_general(100)
+    // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
+    SWAP3
+    // stack:                    p2y , p1y, p2x , p1x, qx, qx_, qy, qy_
+    SUBFP254
+    // stack:                    p2y - p1y, p2x , p1x, qx, qx_, qy, qy_
+    SWAP2
+    // stack:                    p1x , p2x, p2y - p1y, qx, qx_, qy, qy_
+    SUBFP254
+    // stack:                    p1x - p2x, p2y - p1y, qx, qx_, qy, qy_
+    SWAP4
+    // stack:                    qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
+    DUP5
+    MULFP254
+    // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
+    %mstore_kernel_general(108)
+    // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
+    SWAP1
+    // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
+    DUP2
+    MULFP254
+    // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
+    %mstore_kernel_general(102)
+    // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
+    MULFP254
+    // stack:                            (p2y - p1y)qx_, p1x - p2x, qy_
+    %mstore_kernel_general(103)
+    // stack:                                            p1x - p2x, qy_
+    MULFP254
+    // stack:                                           (p1x - p2x)*qy_
+    %mstore_kernel_general(109)
+%endmacro
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index daed596d..24a84906 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -5,7 +5,9 @@ use ethereum_types::U256;
 use rand::Rng;
 
 use crate::bn254_arithmetic::{Fp, Fp12, Fp2};
-use crate::bn254_pairing::{gen_fp12_sparse, miller_loop, tate, Curve, TwistedCurve};
+use crate::bn254_pairing::{
+    gen_fp12_sparse, invariant_exponent, miller_loop, tate, Curve, TwistedCurve,
+};
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::Interpreter;
 use crate::memory::segments::Segment;
@@ -17,24 +19,26 @@ struct InterpreterSetup {
     memory: Vec<(usize, Vec<U256>)>,
 }
 
-fn run_setup_interpreter(setup: InterpreterSetup) -> Result<Interpreter<'static>> {
-    let label = KERNEL.global_labels[&setup.label];
-    let mut stack = setup.stack;
-    stack.reverse();
-    let mut interpreter = Interpreter::new_with_kernel(label, stack);
-    for (pointer, data) in setup.memory {
-        for (i, term) in data.iter().enumerate() {
-            interpreter.generation_state.memory.set(
-                MemoryAddress::new(0, Segment::KernelGeneral, pointer + i),
-                *term,
-            )
+impl InterpreterSetup {
+    fn run(self) -> Result<Interpreter<'static>> {
+        let label = KERNEL.global_labels[&self.label];
+        let mut stack = self.stack;
+        stack.reverse();
+        let mut interpreter = Interpreter::new_with_kernel(label, stack);
+        for (pointer, data) in self.memory {
+            for (i, term) in data.iter().enumerate() {
+                interpreter.generation_state.memory.set(
+                    MemoryAddress::new(0, Segment::KernelGeneral, pointer + i),
+                    *term,
+                )
+            }
         }
+        interpreter.run()?;
+        Ok(interpreter)
     }
-    interpreter.run()?;
-    Ok(interpreter)
 }
 
-fn extract_kernel_output(range: Range<usize>, interpreter: Interpreter<'static>) -> Vec<U256> {
+fn extract_kernel_memory(range: Range<usize>, interpreter: Interpreter<'static>) -> Vec<U256> {
     let mut output: Vec<U256> = vec![];
     for i in range {
         let term = interpreter.generation_state.memory.get(MemoryAddress::new(
@@ -63,7 +67,7 @@ fn setup_mul_test(out: usize, f: Fp12, g: Fp12, label: &str) -> InterpreterSetup
 }
 
 #[test]
-fn test_mul_fp254_12() -> Result<()> {
+fn test_mul_fp12() -> Result<()> {
     let out: usize = 88;
 
     let mut rng = rand::thread_rng();
@@ -75,13 +79,13 @@ fn test_mul_fp254_12() -> Result<()> {
     let setup_sparse: InterpreterSetup = setup_mul_test(out, f, h, "mul_fp254_12_sparse");
     let setup_square: InterpreterSetup = setup_mul_test(out, f, f, "square_fp254_12_test");
 
-    let intrptr_normal: Interpreter = run_setup_interpreter(setup_normal).unwrap();
-    let intrptr_sparse: Interpreter = run_setup_interpreter(setup_sparse).unwrap();
-    let intrptr_square: Interpreter = run_setup_interpreter(setup_square).unwrap();
+    let intrptr_normal: Interpreter = setup_normal.run().unwrap();
+    let intrptr_sparse: Interpreter = setup_sparse.run().unwrap();
+    let intrptr_square: Interpreter = setup_square.run().unwrap();
 
-    let out_normal: Vec<U256> = extract_kernel_output(out..out + 12, intrptr_normal);
-    let out_sparse: Vec<U256> = extract_kernel_output(out..out + 12, intrptr_sparse);
-    let out_square: Vec<U256> = extract_kernel_output(out..out + 12, intrptr_square);
+    let out_normal: Vec<U256> = extract_kernel_memory(out..out + 12, intrptr_normal);
+    let out_sparse: Vec<U256> = extract_kernel_memory(out..out + 12, intrptr_sparse);
+    let out_square: Vec<U256> = extract_kernel_memory(out..out + 12, intrptr_square);
 
     let exp_normal: Vec<U256> = (f * g).on_stack();
     let exp_sparse: Vec<U256> = (f * h).on_stack();
@@ -103,7 +107,7 @@ fn setup_frob_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
 }
 
 #[test]
-fn test_frob_fp254_12() -> Result<()> {
+fn test_frob_fp12() -> Result<()> {
     let ptr: usize = 100;
 
     let mut rng = rand::thread_rng();
@@ -114,15 +118,15 @@ fn test_frob_fp254_12() -> Result<()> {
     let setup_frob_3 = setup_frob_test(ptr, f, "test_frob_fp254_12_3");
     let setup_frob_6 = setup_frob_test(ptr, f, "test_frob_fp254_12_6");
 
-    let intrptr_frob_1: Interpreter = run_setup_interpreter(setup_frob_1).unwrap();
-    let intrptr_frob_2: Interpreter = run_setup_interpreter(setup_frob_2).unwrap();
-    let intrptr_frob_3: Interpreter = run_setup_interpreter(setup_frob_3).unwrap();
-    let intrptr_frob_6: Interpreter = run_setup_interpreter(setup_frob_6).unwrap();
+    let intrptr_frob_1: Interpreter = setup_frob_1.run().unwrap();
+    let intrptr_frob_2: Interpreter = setup_frob_2.run().unwrap();
+    let intrptr_frob_3: Interpreter = setup_frob_3.run().unwrap();
+    let intrptr_frob_6: Interpreter = setup_frob_6.run().unwrap();
 
-    let out_frob_1: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_1);
-    let out_frob_2: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_2);
-    let out_frob_3: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_3);
-    let out_frob_6: Vec<U256> = extract_kernel_output(ptr..ptr + 12, intrptr_frob_6);
+    let out_frob_1: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, intrptr_frob_1);
+    let out_frob_2: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, intrptr_frob_2);
+    let out_frob_3: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, intrptr_frob_3);
+    let out_frob_6: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, intrptr_frob_6);
 
     let exp_frob_1: Vec<U256> = f.frob(1).on_stack();
     let exp_frob_2: Vec<U256> = f.frob(2).on_stack();
@@ -138,10 +142,9 @@ fn test_frob_fp254_12() -> Result<()> {
 }
 
 #[test]
-fn test_inv_fp254_12() -> Result<()> {
+fn test_inv_fp12() -> Result<()> {
     let ptr: usize = 100;
     let inv: usize = 112;
-
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
 
@@ -150,8 +153,8 @@ fn test_inv_fp254_12() -> Result<()> {
         stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)],
         memory: vec![(ptr, f.on_stack())],
     };
-    let interpreter: Interpreter = run_setup_interpreter(setup).unwrap();
-    let output: Vec<U256> = extract_kernel_output(inv..inv + 12, interpreter);
+    let interpreter: Interpreter = setup.run().unwrap();
+    let output: Vec<U256> = extract_kernel_memory(inv..inv + 12, interpreter);
     let expected: Vec<U256> = f.inv().on_stack();
 
     assert_eq!(output, expected);
@@ -159,29 +162,27 @@ fn test_inv_fp254_12() -> Result<()> {
     Ok(())
 }
 
-// #[test]
-// fn test_invariance_inducing_power() -> Result<()> {
-//     let ptr = U256::from(300);
-//     let out = U256::from(400);
+#[test]
+fn test_invariant_exponent() -> Result<()> {
+    let ptr: usize = 400;
 
-//     let f: Fp12 = gen_fp12();
+    let mut rng = rand::thread_rng();
+    let f: Fp12 = rng.gen::<Fp12>();
 
-//     let mut stack = vec![ptr];
-//     stack.extend(fp12_on_stack(f));
-//     stack.extend(vec![
-//         ptr,
-//         out,
-//         get_address_from_label("return_fp12_on_stack"),
-//         out,
-//     ]);
+    let setup = InterpreterSetup {
+        label: "bn254_invariant_exponent".to_string(),
+        stack: vec![U256::from(ptr), U256::from(0xdeadbeefu32)],
+        memory: vec![(ptr, f.on_stack())],
+    };
 
-//     let output: Vec<U256> = run_setup_interpreter("test_pow", stack);
-//     let expected: Vec<U256> = fp12_on_stack(invariance_inducing_power(f));
+    let interpreter: Interpreter = setup.run().unwrap();
+    let output: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, interpreter);
+    let expected: Vec<U256> = invariant_exponent(f).on_stack();
 
-//     assert_eq!(output, expected);
+    assert_eq!(output, expected);
 
-//     Ok(())
-// }
+    Ok(())
+}
 
 // The curve is cyclic with generator (1, 2)
 pub const CURVE_GENERATOR: Curve = {
@@ -253,8 +254,8 @@ fn test_miller() -> Result<()> {
         stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
         memory: vec![(ptr, inputs)],
     };
-    let interpreter = run_setup_interpreter(setup).unwrap();
-    let output: Vec<U256> = extract_kernel_output(out..out + 12, interpreter);
+    let interpreter = setup.run().unwrap();
+    let output: Vec<U256> = extract_kernel_memory(out..out + 12, interpreter);
     let expected = miller_loop(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
 
     assert_eq!(output, expected);
@@ -280,8 +281,8 @@ fn test_tate() -> Result<()> {
         stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
         memory: vec![(ptr, inputs)],
     };
-    let interpreter = run_setup_interpreter(setup).unwrap();
-    let output: Vec<U256> = extract_kernel_output(out..out + 12, interpreter);
+    let interpreter = setup.run().unwrap();
+    let output: Vec<U256> = extract_kernel_memory(out..out + 12, interpreter);
     let expected = tate(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
 
     assert_eq!(output, expected);

From cb7c638cbc929f22b12835179d762bd74cfb6695 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 7 Feb 2023 15:09:49 -0800
Subject: [PATCH 179/201] more comments

---
 .../curve_arithmetic/invariant_exponent.asm   | 74 +++++++++++--------
 .../bn254/curve_arithmetic/tate_pairing.asm   | 19 +++--
 2 files changed, 53 insertions(+), 40 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
index 3176dbf5..b21174e1 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
@@ -1,37 +1,17 @@
 /// To make the Tate pairing an invariant, the final step is to exponentiate by
-///     (p^12 - 1)/N = (p^6 - 1)(p^2 + 1)(p^4 - p^2 + 1)/N
-/// The function in this module enacts the final exponentiation, by
-///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
+///     (p^12 - 1)/N = (p^6 - 1) * (p^2 + 1) * (p^4 - p^2 + 1)/N
+/// and thus we can exponentiate by each factor sequentially.
 ///
-/// def final_exp(y):
-///     y4, y2, y0 = 1, 1, 1
-///     power_loop_4()
-///     power_loop_2()
-///     power_loop_0()
-///     custom_powers()
-///     final_power()
-///
-/// def custom_powers()
-///     y0 = y0^{-1}
-///     y1 = y4 * y2^2 * y0
-///     return y2, y1, y0
-///
-/// def final_power()
-///     y  = y.frob(3)
-///     y2 = y2.frob(2)
-///     y1 = y1.frob(1)
-///     return y * y2 * y1 * y0
-
 /// def bn254_invariant_exponent(y: Fp12):
 ///     y = first_exp(y)
 ///     y = second_exp(y)
-///     return final_exponentiation(y)
+///     return final_exp(y)
 
 global bn254_invariant_exponent:
 
-/// map t to t^(p^6 - 1) via 
-///     def first_exp(t):
-///         return t.frob(6) / t
+/// first, exponentiate by (p^6 - 1) via
+///     def first_exp(y):
+///         return y.frob(6) / y
     // stack:                      out, retdest  {out: y}
     %stack (out) -> (out, 100, first_exp, out)         
     // stack: out, 100, first_exp, out, retdest  {out: y}
@@ -44,9 +24,9 @@ first_exp:
     // stack:  out, 100, out, second_exp, out, retdest  {out: y_6, 100: y^-1}
     %jump(mul_fp254_12)
 
-/// map t to t^(p^2 + 1) via 
-///     def second_exp(t):
-///         return t.frob(2) * t
+/// second, exponentiate by (p^2 + 1) via 
+///     def second_exp(y):
+///         return y.frob(2) * y
 second_exp:
     // stack:                                out, retdest  {out: y}
     %stack (out) -> (out, 100, out, out, final_exp, out)
@@ -55,6 +35,22 @@ second_exp:
     // stack:      100, out, out, final_exp, out, retdest  {out: y, 100: y_2}
     %jump(mul_fp254_12)
 
+/// Finally, we must exponentiate by (p^4 - p^2 + 1)/N
+/// To do so efficiently, we can express this power as
+///     (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0
+/// and simultaneously compute y^a4, y^a2, y^a0 where
+///     a1 = a4 + 2a2 - a0
+/// We first initialize these powers as 1 and then use 
+/// binary algorithms for exponentiation.
+///
+/// def final_exp(y):
+///     y4, y2, y0 = 1, 1, 1
+///     power_loop_4()
+///     power_loop_2()
+///     power_loop_0()
+///     custom_powers()
+///     final_power()
+
 final_exp:
     // stack:                  val, retdest
     %stack (val) -> (val, 300, val)
@@ -71,6 +67,25 @@ final_exp:
     // stack: 64, 62, 65, 300, val, retdest  {200: y0, 212: y2, 224: y4}
     %jump(power_loop_4)
 
+/// After computing the powers 
+///     y^a4, y^a2, y^a0
+/// we would like to transform them to
+///     y^a2, y^-a1, y^-a0
+///
+/// def custom_powers()
+///     y0 = y0^{-1}
+///     y1 = y4 * y2^2 * y0
+///     return y2, y1, y0
+///
+/// And finally, upon doing so, compute the final power
+///     y^(p^3) * (y^a2)^(p^2) * (y^-a1)^p * (y^-a0)
+///
+/// def final_power()
+///     y  = y.frob(3)
+///     y2 = y2.frob(2)
+///     y1 = y1.frob(1)
+///     return y * y2 * y1 * y0
+
 custom_powers:
     // stack:                             val, retdest  {200: y0, 212: y2, 224: y4}
     %stack () -> (200, 236, make_term_1)
@@ -91,7 +106,6 @@ make_term_3:
     %stack () -> (236, 224, 224, final_power)
     // stack: 236, 224, 224, final_power, val, retdest  {212: y2, 224: y4 * y2^2, 236: y0^-1}
     %jump(mul_fp254_12)
-
 final_power:
     // stack:                            val, retdest  {val: y  , 212:  y^a2   , 224:  y^a1   , 236: y^a0}
     %frob_fp254_12_3
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 356f002a..0663ba1c 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -1,3 +1,12 @@
+/// def tate(P: Curve, Q: TwistedCurve) -> Fp12:
+///     out = miller_loop(P, Q)
+///     return bn254_invariant_exponent(P, Q)
+global bn254_tate:
+    // stack:                      inp, out, retdest
+    %stack (inp, out) -> (inp, out, bn254_invariant_exponent, out)
+    // stack: inp, out, bn254_invariant_exponent, out, retdest
+    %jump(bn254_miller)
+
 /// def miller(P, Q):
 ///     miller_init()
 ///     miller_loop()
@@ -24,16 +33,6 @@
 /// def miller_zero():
 ///     0xnm -= 1
 ///     mul_tangent()
-
-/// def tate(P: Curve, Q: TwistedCurve) -> Fp12:
-///     out = miller_loop(P, Q)
-///     return bn254_invariant_exponent(P, Q)
-global bn254_tate:
-    // stack:                      inp, out, retdest
-    %stack (inp, out) -> (inp, out, bn254_invariant_exponent, out)
-    // stack: inp, out, bn254_invariant_exponent, out, retdest
-    %jump(bn254_miller)
-
 global bn254_miller:
     // stack:         ptr, out, retdest
     %stack (ptr, out) -> (out, 1, ptr, out)

From 361d6d72628de83e6ba3a26789f35f7f10fb1a54 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Tue, 7 Feb 2023 18:53:58 -0800
Subject: [PATCH 180/201] tests and stacks

---
 evm/src/bn254_arithmetic.rs                   |   7 +-
 .../bn254/curve_arithmetic/tate_pairing.asm   |   2 +-
 .../bn254/field_arithmetic/degree_12_mul.asm  |  17 ++-
 .../bn254/field_arithmetic/frobenius.asm      |  62 ++++++---
 .../asm/curve/bn254/field_arithmetic/util.asm |  29 -----
 evm/src/cpu/kernel/tests/bn254.rs             | 119 +++++++++++++++---
 6 files changed, 157 insertions(+), 79 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index 8fb72d03..d1050560 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -330,7 +330,7 @@ impl Fp6 {
     /// while the values of
     ///     t^(p^n) and t^(2p^n)
     /// are precomputed in the constant arrays FROB_T1 and FROB_T2
-    fn frob(self, n: usize) -> Fp6 {
+    pub fn frob(self, n: usize) -> Fp6 {
         let n = n % 6;
         let frob_t1 = FROB_T1[n];
         let frob_t2 = FROB_T2[n];
@@ -373,6 +373,11 @@ impl Fp6 {
         let prod_24 = prod_13.frob(1);
         prod_24.scale(prod_odds_over_phi)
     }
+
+    pub fn on_stack(self) -> Vec<U256> {
+        let f: [U256; 6] = unsafe { transmute(self) };
+        f.into_iter().collect()
+    }
 }
 
 #[allow(clippy::suspicious_arithmetic_impl)]
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 0663ba1c..c17117ab 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -2,7 +2,7 @@
 ///     out = miller_loop(P, Q)
 ///     return bn254_invariant_exponent(P, Q)
 global bn254_tate:
-    // stack:                      inp, out, retdest
+    // stack:                                inp, out, retdest
     %stack (inp, out) -> (inp, out, bn254_invariant_exponent, out)
     // stack: inp, out, bn254_invariant_exponent, out, retdest
     %jump(bn254_miller)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index ff6fb72c..cd81e6e7 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -46,12 +46,12 @@
 global mul_fp254_12:
     // stack:                                   inA, inB, out 
     DUP1  
-    %offset_fp254_6 
+    %add_const(6) 
     // stack:                             inA', inA, inB, out 
     %load_fp254_6
     // stack:                               f', inA, inB, out 
     DUP8  
-    %offset_fp254_6
+    %add_const(6)
     // stack:                         inB', f', inA, inB, out 
     %load_fp254_6
     // stack:                           g', f', inA, inB, out 
@@ -121,7 +121,7 @@ mul_fp254_12_3:
     %subr_fp254_6
     // stack:          (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
     DUP14  
-    %offset_fp254_6 
+    %add_const(6) 
     // stack:    out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
     %store_fp254_6
     // stack:                                    fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
@@ -183,7 +183,7 @@ mul_fp254_12_3:
 global mul_fp254_12_sparse:
     // stack:                                                                    inA, inB, out
     DUP1  
-    %offset_fp254_6
+    %add_const(6)
     // stack:                                                              inA', inA, inB, out
     %load_fp254_6
     // stack:                                                                f', inA, inB, out
@@ -265,7 +265,7 @@ global mul_fp254_12_sparse:
     %add_fp254_6_hole
     // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
     DUP9
-    %offset_fp254_6
+    %add_const(6)
     // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
     %store_fp254_6
     // stack:                                                                    inA, inB, out
@@ -317,9 +317,6 @@ global mul_fp254_12_sparse:
 ///
 /// f, f' consist of six elements on the stack
 
-global square_fp254_12_test:
-    POP
-
 global square_fp254_12:
     // stack:                                                                               inp, out
     DUP1
@@ -335,12 +332,12 @@ global square_fp254_12:
     %dup_fp254_6_2
     // stack:                                  f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     DUP16
-    %offset_fp254_6
+    %add_const(6)
     // stack:                            out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     PUSH square_fp254_12_1
     // stack:         square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     DUP10
-    %offset_fp254_6
+    %add_const(6)
     // stack:   inp', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     %load_fp254_6
     // stack:     f', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index 8c062b2a..ed282696 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -1,3 +1,5 @@
+// frob_fp12 tests
+
 global test_frob_fp254_12_1:
     // stack:         ptr
     %frob_fp254_12_1
@@ -42,13 +44,13 @@ global test_frob_fp254_12_6:
     // stack:   ptr, g, ptr
     %store_fp254_6
     // stack:           ptr
-    DUP1  %offset_fp254_6
+    DUP1  %add_const(6)
     // stack:     ptr', ptr
     %load_fp254_6
     // stack:       f', ptr
     %frobz_1
     // stack:       g', ptr
-    DUP7  %offset_fp254_6
+    DUP7  %add_const(6)
     // stack: ptr', g', ptr
     %store_fp254_6
     // stack:           ptr
@@ -67,13 +69,13 @@ global test_frob_fp254_12_6:
     // stack:   out, g, ptr , out
     %store_fp254_6 
     // stack:           ptr , out
-    %offset_fp254_6
+    %add_const(6)
     // stack:           ptr', out
     %load_fp254_6
     // stack:             f', out
     %frobz_2
     // stack:             g', out
-    DUP7  %offset_fp254_6
+    DUP7  %add_const(6)
     // stack:       out', g', out
     %store_fp254_6
     // stack:                 out
@@ -91,13 +93,13 @@ global test_frob_fp254_12_6:
     // stack:   ptr, g, ptr
     %store_fp254_6
     // stack:           ptr
-    DUP1  %offset_fp254_6
+    DUP1  %add_const(6)
     // stack:     ptr', ptr
     %load_fp254_6
     // stack:       f', ptr
     %frobz_3
     // stack:       g', ptr
-    DUP7  %offset_fp254_6
+    DUP7  %add_const(6)
     // stack: ptr', g', ptr
     %store_fp254_6
     // stack:           ptr
@@ -105,18 +107,38 @@ global test_frob_fp254_12_6:
 
 %macro frob_fp254_12_6
     // stack:           ptr
-    DUP1  %offset_fp254_6
+    DUP1  %add_const(6)
     // stack:     ptr', ptr
     %load_fp254_6
     // stack:       f', ptr
     %frobz_6
     // stack:       g', ptr
-    DUP7  %offset_fp254_6
+    DUP7  %add_const(6)
     // stack: ptr', g', ptr
     %store_fp254_6
     // stack:           ptr
 %endmacro
 
+// frob_fp12 tests
+
+global test_frob_fp254_6_1:
+    // stack:         ptr
+    %frob_fp254_6_1
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+global test_frob_fp254_6_2:
+    // stack:         ptr 
+    %frob_fp254_6_2
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
+global test_frob_fp254_6_3:
+    // stack:         ptr
+    %frob_fp254_6_3
+    // stack:         ptr
+    %jump(0xdeadbeef)
+
 
 /// let Z` denote the complex conjugate of Z
 
@@ -131,37 +153,37 @@ global test_frob_fp254_12_6:
     // stack: C0 , C1 , C2
     %conj_fp254_2
     // stack: D0 , C1 , C2
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: C2 , C1 , D0
     %conj_fp254_2
     // stack: C2`, C1 , D0
     %frobt2_1
     // stack: D2 , C1 , D0
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: D0 , C1 , D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: C1 , D0 , D2
     %conj_fp254_2
     // stack: C1`, D0 , D2
     %frobt1_1
     // stack: D1 , D0 , D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: D0 , D1 , D2
 %endmacro
 
 %macro frob_fp254_6_2
     // stack: C0, C1, C2
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: C2, C1, C0
     %frobt2_2
     // stack: D2, C1, C0
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: C0, C1, D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: C1, C0, D2
     %frobt1_2
     // stack: D1, C0, D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: D0, D1, D2
 %endmacro
 
@@ -169,21 +191,21 @@ global test_frob_fp254_12_6:
     // stack: C0 , C1 , C2
     %conj_fp254_2
     // stack: D0 , C1 , C2
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: C2 , C1 , D0
     %conj_fp254_2
     // stack: C2`, C1 , D0
     %frobt2_3
     // stack: D2 , C1 , D0
-    %swap_fp254_2_hole_2
+    %stack (x: 2, a: 2, y:2) -> (y, a, x)
     // stack: D0 , C1 , D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: C1 , D0 , D2
     %conj_fp254_2
     // stack: C1`, D0 , D2
     %frobt1_3
     // stack: D1 , D0 , D2
-    %swap_fp254_2
+    %stack (x: 2, y: 2) -> (y, x)
     // stack: D0 , D1 , D2
 %endmacro
 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index e2bed9bf..c5262afd 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -1,10 +1,5 @@
-%macro offset_fp254_6
-    %add_const(6)
-%endmacro
-
 // fp254_2 macros
 
-// cost: 2 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp254_2
     // stack:       ptr
     DUP1  
@@ -28,30 +23,6 @@
     // stack: a, -b 
 %endmacro
 
-%macro swap_fp254_2
-    // stack: a , a_, b , b_
-    SWAP2
-    // stack: b , a_, a , b_
-    SWAP1
-    // stack: a_, b , a , b_
-    SWAP3
-    // stack: b_, b , a , a_
-    SWAP1 
-    // stack: b , b_, a , a_
-%endmacro
-
-%macro swap_fp254_2_hole_2
-    // stack: a , a_, X, b , b_
-    SWAP4
-    // stack: b , a_, X, a , b_
-    SWAP1
-    // stack: a_, b , X, a , b_
-    SWAP5
-    // stack: b_, b , X, a , a_
-    SWAP1 
-    // stack: b , b_, X, a , a_
-%endmacro
-
 %macro mul_fp254__fp254_2
     // stack:    c, x, y
     SWAP2
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 24a84906..3b6734c3 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -4,7 +4,7 @@ use anyhow::Result;
 use ethereum_types::U256;
 use rand::Rng;
 
-use crate::bn254_arithmetic::{Fp, Fp12, Fp2};
+use crate::bn254_arithmetic::{Fp, Fp12, Fp2, Fp6};
 use crate::bn254_pairing::{
     gen_fp12_sparse, invariant_exponent, miller_loop, tate, Curve, TwistedCurve,
 };
@@ -51,17 +51,64 @@ fn extract_kernel_memory(range: Range<usize>, interpreter: Interpreter<'static>)
     output
 }
 
-fn setup_mul_test(out: usize, f: Fp12, g: Fp12, label: &str) -> InterpreterSetup {
-    let in0: usize = 64;
-    let in1: usize = 76;
+fn extract_stack(interpreter: Interpreter<'static>) -> Vec<U256> {
+    let stack = interpreter.stack();
+    stack.iter().rev().cloned().collect::<Vec<U256>>()
+}
+
+fn setup_mul_fp6_test(f: Fp6, g: Fp6, label: &str) -> InterpreterSetup {
+    let mut stack = f.on_stack();
+    if label == "mul_fp254_6" {
+        stack.extend(g.on_stack());
+    }
+    stack.push(U256::from(0xdeadbeefu32));
     InterpreterSetup {
         label: label.to_string(),
-        stack: vec![
-            U256::from(in0),
-            U256::from(in1),
-            U256::from(out),
-            U256::from(0xdeadbeefu32),
-        ],
+        stack,
+        memory: vec![],
+    }
+}
+
+#[test]
+fn test_mul_fp6() -> Result<()> {
+    let mut rng = rand::thread_rng();
+    let f: Fp6 = rng.gen::<Fp6>();
+    let g: Fp6 = rng.gen::<Fp6>();
+
+    let setup_normal: InterpreterSetup = setup_mul_fp6_test(f, g, "mul_fp254_6");
+    let setup_square: InterpreterSetup = setup_mul_fp6_test(f, f, "square_fp254_6");
+
+    let intrptr_normal: Interpreter = setup_normal.run().unwrap();
+    let intrptr_square: Interpreter = setup_square.run().unwrap();
+
+    let out_normal: Vec<U256> = extract_stack(intrptr_normal);
+    let out_square: Vec<U256> = extract_stack(intrptr_square);
+
+    let exp_normal: Vec<U256> = (f * g).on_stack();
+    let exp_square: Vec<U256> = (f * f).on_stack();
+
+    assert_eq!(out_normal, exp_normal);
+    assert_eq!(out_square, exp_square);
+
+    Ok(())
+}
+
+fn setup_mul_fp12_test(out: usize, f: Fp12, g: Fp12, label: &str) -> InterpreterSetup {
+    let in0: usize = 64;
+    let in1: usize = 76;
+
+    let mut stack = vec![
+        U256::from(in0),
+        U256::from(in1),
+        U256::from(out),
+        U256::from(0xdeadbeefu32),
+    ];
+    if label == "square_fp254_12" {
+        stack.remove(0);
+    }
+    InterpreterSetup {
+        label: label.to_string(),
+        stack,
         memory: vec![(in0, f.on_stack()), (in1, g.on_stack())],
     }
 }
@@ -75,9 +122,9 @@ fn test_mul_fp12() -> Result<()> {
     let g: Fp12 = rng.gen::<Fp12>();
     let h: Fp12 = gen_fp12_sparse(&mut rng);
 
-    let setup_normal: InterpreterSetup = setup_mul_test(out, f, g, "mul_fp254_12");
-    let setup_sparse: InterpreterSetup = setup_mul_test(out, f, h, "mul_fp254_12_sparse");
-    let setup_square: InterpreterSetup = setup_mul_test(out, f, f, "square_fp254_12_test");
+    let setup_normal: InterpreterSetup = setup_mul_fp12_test(out, f, g, "mul_fp254_12");
+    let setup_sparse: InterpreterSetup = setup_mul_fp12_test(out, f, h, "mul_fp254_12_sparse");
+    let setup_square: InterpreterSetup = setup_mul_fp12_test(out, f, f, "square_fp254_12");
 
     let intrptr_normal: Interpreter = setup_normal.run().unwrap();
     let intrptr_sparse: Interpreter = setup_sparse.run().unwrap();
@@ -98,7 +145,43 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-fn setup_frob_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
+fn setup_frob_fp6_test(f: Fp6, label: &str) -> InterpreterSetup {
+    InterpreterSetup {
+        label: label.to_string(),
+        stack: f.on_stack(),
+        memory: vec![],
+    }
+}
+
+#[test]
+fn test_frob_fp6() -> Result<()> {
+    let mut rng = rand::thread_rng();
+    let f: Fp6 = rng.gen::<Fp6>();
+
+    let setup_frob_1 = setup_frob_fp6_test(f, "test_frob_fp254_6_1");
+    let setup_frob_2 = setup_frob_fp6_test(f, "test_frob_fp254_6_2");
+    let setup_frob_3 = setup_frob_fp6_test(f, "test_frob_fp254_6_3");
+
+    let intrptr_frob_1: Interpreter = setup_frob_1.run().unwrap();
+    let intrptr_frob_2: Interpreter = setup_frob_2.run().unwrap();
+    let intrptr_frob_3: Interpreter = setup_frob_3.run().unwrap();
+
+    let out_frob_1: Vec<U256> = extract_stack(intrptr_frob_1);
+    let out_frob_2: Vec<U256> = extract_stack(intrptr_frob_2);
+    let out_frob_3: Vec<U256> = extract_stack(intrptr_frob_3);
+
+    let exp_frob_1: Vec<U256> = f.frob(1).on_stack();
+    let exp_frob_2: Vec<U256> = f.frob(2).on_stack();
+    let exp_frob_3: Vec<U256> = f.frob(3).on_stack();
+
+    assert_eq!(out_frob_1, exp_frob_1);
+    assert_eq!(out_frob_2, exp_frob_2);
+    assert_eq!(out_frob_3, exp_frob_3);
+
+    Ok(())
+}
+
+fn setup_frob_fp12_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
     InterpreterSetup {
         label: label.to_string(),
         stack: vec![U256::from(ptr)],
@@ -113,10 +196,10 @@ fn test_frob_fp12() -> Result<()> {
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
 
-    let setup_frob_1 = setup_frob_test(ptr, f, "test_frob_fp254_12_1");
-    let setup_frob_2 = setup_frob_test(ptr, f, "test_frob_fp254_12_2");
-    let setup_frob_3 = setup_frob_test(ptr, f, "test_frob_fp254_12_3");
-    let setup_frob_6 = setup_frob_test(ptr, f, "test_frob_fp254_12_6");
+    let setup_frob_1 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_1");
+    let setup_frob_2 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_2");
+    let setup_frob_3 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_3");
+    let setup_frob_6 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_6");
 
     let intrptr_frob_1: Interpreter = setup_frob_1.run().unwrap();
     let intrptr_frob_2: Interpreter = setup_frob_2.run().unwrap();

From cecad5980d1e21c703075454d9966a19eb420a0b Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 8 Feb 2023 09:10:17 -0800
Subject: [PATCH 181/201] stack macro

---
 .../asm/curve/bn254/field_arithmetic/degree_12_mul.asm      | 6 +++---
 .../kernel/asm/curve/bn254/field_arithmetic/frobenius.asm   | 4 ++--
 .../cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm    | 5 ++---
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index cd81e6e7..013822ec 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -95,7 +95,7 @@ mul_fp254_12_2:
     // stack:                     fg, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %store_fp254_6(12)
     // stack:                         g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %swap_fp254_6
+    %stack (x: 6, y: 6) -> (y, x)
     // stack:                         f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     PUSH mul_fp254_12_3
     // stack:         mul_fp254_12_3, f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
@@ -110,7 +110,7 @@ mul_fp254_12_3:
     // stack:                          (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %load_fp254_6(12)
     // stack:                      fg, (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %swap_fp254_6
+    %stack (x: 6, y: 6) -> (y, x)
     // stack:                      (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %dup_fp254_6_6
     // stack:                  fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
@@ -213,7 +213,7 @@ global mul_fp254_12_sparse:
     // stack:                      g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     %mul_fp254__fp254_6
     // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %swap_fp254_6
+    %stack (x: 6, y: 6) -> (y, x)
     // stack:                    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
     DUP13
     %add_const(8)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index ed282696..a9b29689 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -250,12 +250,12 @@ global test_frob_fp254_6_3:
 
 %macro frobt1_2
     PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd48
-    %mul_fp254__fp254_2
+    %scale_fp254_2
 %endmacro
 
 %macro frobt2_2
     PUSH 0x59e26bcea0d48bacd4f263f1acdb5c4f5763473177fffffe
-    %mul_fp254__fp254_2
+    %scale_fp254_2
 %endmacro
 
 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index c5262afd..676f8c59 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -23,7 +23,7 @@
     // stack: a, -b 
 %endmacro
 
-%macro mul_fp254__fp254_2
+%macro scale_fp254_2
     // stack:    c, x, y
     SWAP2
     // stack:    y, x, c 
@@ -88,7 +88,6 @@
 
 // fp254_6 macros
 
-// cost: 6 loads + 6 dup/swaps + 5 adds = 6*4 + 6*1 + 5*2 = 40
 %macro load_fp254_6
     // stack:                         ptr
     DUP1  
@@ -512,7 +511,7 @@
 
 // cost: 18
 // add two fp254_6 elements with a to-be-popped stack term separating them
-//    (f: 6, X, g: 6) -> (f + g: 6)
+//    (f: 6, X, g: 6) -> (f + g)
 %macro add_fp254_6_hole
     // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
     SWAP8

From a061b88a7aa674a098dfaf7967490a9de280ddfb Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 8 Feb 2023 09:15:38 -0800
Subject: [PATCH 182/201] naming

---
 .../curve/bn254/field_arithmetic/degree_12_mul.asm   | 12 ++++++------
 .../asm/curve/bn254/field_arithmetic/frobenius.asm   |  8 ++++----
 .../kernel/asm/curve/bn254/field_arithmetic/util.asm |  8 ++++----
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index 013822ec..21e11a36 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -211,7 +211,7 @@ global mul_fp254_12_sparse:
     // stack:                      inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     %mload_kernel_general
     // stack:                      g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp254__fp254_6
+    %scale_re_fp254_6
     // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     %stack (x: 6, y: 6) -> (y, x)
     // stack:                    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
@@ -220,7 +220,7 @@ global mul_fp254_12_sparse:
     // stack:           inB2,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
     %load_fp254_2
     // stack:           G2  ,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp254_2_fp254_6_sh2
+    %scale_fp254_6_sh2
     // stack:           G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
     %add_fp254_6
     // stack:           G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
@@ -230,7 +230,7 @@ global mul_fp254_12_sparse:
     // stack: inB1,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
     %load_fp254_2
     // stack:  G1 ,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
-    %mul_fp254_2_fp254_6_sh
+    %scale_fp254_6_sh
     // stack:  G1 * sh(f), inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
     %add_fp254_6_hole
     // stack:      G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
@@ -240,7 +240,7 @@ global mul_fp254_12_sparse:
     // stack:                                          inB, f', out, f, inB, f', inA, inB, out
     %mload_kernel_general
     // stack:                                          g0 , f', out, f, inB, f', inA, inB, out
-    %mul_fp254__fp254_6
+    %scale_re_fp254_6
     // stack:                                          g0 * f', out, f, inB, f', inA, inB, out
     %swap_fp254_6_hole
     // stack:                                        f  , out, g0 * f', inB, f', inA, inB, out
@@ -249,7 +249,7 @@ global mul_fp254_12_sparse:
     // stack:                               inB2,    f  , out, g0 * f', inB, f', inA, inB, out
     %load_fp254_2
     // stack:                                G2 ,    f  , out, g0 * f', inB, f', inA, inB, out
-    %mul_fp254_2_fp254_6_sh
+    %scale_fp254_6_sh
     // stack:                                G2 * sh(f) , out, g0 * f', inB, f', inA, inB, out
     %add_fp254_6_hole
     // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
@@ -260,7 +260,7 @@ global mul_fp254_12_sparse:
     // stack:                           inB1,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
     %load_fp254_2
     // stack:                            G1 ,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
-    %mul_fp254_2_fp254_6_sh
+    %scale_fp254_6_sh
     // stack:                            G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
     %add_fp254_6_hole
     // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
index a9b29689..ee1e4679 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm
@@ -214,25 +214,25 @@ global test_frob_fp254_6_3:
     %frob_fp254_6_1
     PUSH 0x246996f3b4fae7e6a6327cfe12150b8e747992778eeec7e5ca5cf05f80f362ac
     PUSH 0x1284b71c2865a7dfe8b99fdd76e68b605c521e08292f2176d60b35dadcc9e470
-    %mul_fp254_2_fp254_6
+    %scale_fp254_6
 %endmacro
 
 %macro frobz_2
     %frob_fp254_6_2
     PUSH 0x30644e72e131a0295e6dd9e7e0acccb0c28f069fbb966e3de4bd44e5607cfd49
-    %mul_fp254__fp254_6
+    %scale_re_fp254_6
 %endmacro
 
 %macro frobz_3
     %frob_fp254_6_3
     PUSH 0xabf8b60be77d7306cbeee33576139d7f03a5e397d439ec7694aa2bf4c0c101
     PUSH 0x19dc81cfcc82e4bbefe9608cd0acaa90894cb38dbe55d24ae86f7d391ed4a67f
-    %mul_fp254_2_fp254_6
+    %scale_fp254_6
 %endmacro
 
 %macro frobz_6
     PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd46
-    %mul_fp254__fp254_6
+    %scale_re_fp254_6
 %endmacro
 
 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index 676f8c59..87cba2eb 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -569,7 +569,7 @@
 %endmacro
 
 // cost: 21
-%macro mul_fp254__fp254_6
+%macro scale_re_fp254_6
     // stack: c , f0,      f1,    f2,     f3,     f4,     f5
     SWAP6
     DUP7
@@ -609,7 +609,7 @@
 /// G1 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i
 /// G2 = (a+bi)(f2+f2_i) = (af2 - bf2_) + (bf2 + af2_)i
 
-%macro mul_fp254_2_fp254_6
+%macro scale_fp254_6
     // stack:             a, b, f0, f0_, f1, f1_, f2, f2_
     DUP2
     DUP5
@@ -700,7 +700,7 @@
 /// G1 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i
 /// G2 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i
 
-%macro mul_fp254_2_fp254_6_sh
+%macro scale_fp254_6_sh
     // stack:             a, b, f0, f0_, f1, f1_, f2, f2_
     DUP6
     DUP3
@@ -794,7 +794,7 @@
 /// G1 = (a+bi)(f2+f2_i) = (cf2 - df2_) + (df2 + cf2_)i
 /// G2 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i
 
-%macro mul_fp254_2_fp254_6_sh2
+%macro scale_fp254_6_sh2
     // stack:             a, b, f0, f0_, f1, f1_, f2, f2_
     DUP4
     DUP3 

From 31095e1b239051934ca656e6fed963a564b3ae9f Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 8 Feb 2023 09:37:27 -0800
Subject: [PATCH 183/201] stack macro

---
 .../bn254/field_arithmetic/degree_12_mul.asm  | 14 ++--
 .../asm/curve/bn254/field_arithmetic/util.asm | 77 +------------------
 2 files changed, 11 insertions(+), 80 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index 21e11a36..3d384380 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -76,13 +76,13 @@ mul_fp254_12_1:
     // stack:               inB, inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %load_fp254_6
     // stack:                g , inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %swap_fp254_6_hole
+    %stack (f: 6, x, g: 6) -> (g, x, f)
     // stack:                g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %dup_fp254_6_7
     // stack:              g,g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     %add_fp254_6
     // stack:              g+g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %swap_fp254_6_hole
+    %stack (f: 6, x, g: 6) -> (g, x, f)
     // stack:                 g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
     PUSH mul_fp254_12_2
     // stack: mul_fp254_12_2, g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
@@ -224,7 +224,7 @@ global mul_fp254_12_sparse:
     // stack:           G2 * sh2(f') , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
     %add_fp254_6
     // stack:           G2 * sh2(f') + g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %swap_fp254_6_hole
+    %stack (f: 6, x, g: 6) -> (g, x, f)
     // stack:          f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
     DUP7  %add_const(2)
     // stack: inB1,    f , inB, G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
@@ -242,7 +242,7 @@ global mul_fp254_12_sparse:
     // stack:                                          g0 , f', out, f, inB, f', inA, inB, out
     %scale_re_fp254_6
     // stack:                                          g0 * f', out, f, inB, f', inA, inB, out
-    %swap_fp254_6_hole
+    %stack (f: 6, x, g: 6) -> (g, x, f)
     // stack:                                        f  , out, g0 * f', inB, f', inA, inB, out
     DUP14
     %add_const(8)
@@ -253,7 +253,7 @@ global mul_fp254_12_sparse:
     // stack:                                G2 * sh(f) , out, g0 * f', inB, f', inA, inB, out
     %add_fp254_6_hole
     // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
-    %swap_fp254_6_hole
+    %stack (f: 6, x, g: 6) -> (g, x, f)
     // stack:                                    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
     DUP7
     %add_const(2)
@@ -341,7 +341,7 @@ global square_fp254_12:
     // stack:   inp', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
     %load_fp254_6
     // stack:     f', square_fp254_12_1, out', f , square_fp254_12_2, inp, f, square_fp254_12_3, out
-    %swap_fp254_6_hole_2
+    %stack (f: 6, x: 2, g: 6) -> (g, x, f)
     // stack:     f , square_fp254_12_1, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
     %dup_fp254_6_8
     // stack: f', f , square_fp254_12_1, out', f', square_fp254_12_2, inp, f, square_fp254_12_3, out
@@ -359,7 +359,7 @@ square_fp254_12_2:
     // stack:                                                   f'f', inp, f, square_fp254_12_3, out
     %sh_fp254_6
     // stack:                                               sh(f'f'), inp, f, square_fp254_12_3, out
-    %swap_fp254_6_hole
+    %stack (f: 6, x, g: 6) -> (g, x, f)
     // stack:                                               f, inp, sh(f'f'), square_fp254_12_3, out
     SWAP6
     SWAP13
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index 87cba2eb..d40ea03d 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -402,84 +402,15 @@
     // stack: g0, g1, g2, g3, g4, g5, f0, f1, f2, f3, f4, f5
 %endmacro
 
-// cost: 16
-// swap two fp254_6 elements with a stack term separating them
-//    (f: 6, X, g: 6) -> (g: 6, X, f: 6)
-%macro swap_fp254_6_hole
-    // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
-    SWAP7
-    // stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5
-    SWAP1
-    SWAP8
-    SWAP1
-    // stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5
-    SWAP2
-    SWAP9
-    SWAP2
-    // stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5
-    SWAP3
-    SWAP10
-    SWAP3    
-    // stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5
-    SWAP4
-    SWAP11
-    SWAP4
-    // stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5
-    SWAP5
-    SWAP12
-    SWAP5
-    // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
-%endmacro
-
-// cost: 16
-// swap two fp254_6 elements with two stack terms separating them
-//    (f: 6, X: 2, g: 6) -> (g: 6, X: 2, f: 6)
-%macro swap_fp254_6_hole_2
-    // stack: f0, f1, f2, f3, f4, f5, X, g0, g1, g2, g3, g4, g5
-    SWAP8
-    // stack: g0, f1, f2, f3, f4, f5, X, f0, g1, g2, g3, g4, g5
-    SWAP1
-    SWAP9
-    SWAP1
-    // stack: g0, g1, f2, f3, f4, f5, X, f0, f1, g2, g3, g4, g5
-    SWAP2
-    SWAP10
-    SWAP2
-    // stack: g0, g1, g2, f3, f4, f5, X, f0, f1, f2, g3, g4, g5
-    SWAP3
-    SWAP11
-    SWAP3    
-    // stack: g0, g1, g2, g3, f4, f5, X, f0, f1, f2, f3, g4, g5
-    SWAP4
-    SWAP12
-    SWAP4
-    // stack: g0, g1, g2, g3, g4, f5, X, f0, f1, f2, f3, f4, g5
-    SWAP5
-    SWAP13
-    SWAP5
-    // stack: g0, g1, g2, g3, g4, g5, X, f0, f1, f2, f3, f4, f5
-%endmacro
-
 /// multiply (a + bt + ct^2) by t:
 ///     t(a + bt + ct^2) = at + bt^2 + ct^3 = (9+i)c + at + bt^2
 %macro sh_fp254_6
-    // stack: f0 , f0_, f1,  f1_, f2 , f2_
-    SWAP2
-    // stack: f1 , f0_, g0 , f1_, f2 , f2_
-    SWAP4
-    // stack: f2 , f0_, g0 , f1_, g1 , f2_
-    SWAP1
-    // stack: f0_, f2 , g0 , f1_, g1 , f2_
-    SWAP3
-    // stack: f1_, f2 , g0 , g0_, g1 , f2_
-    SWAP5
-    // stack: f2_, f2 , g0 , g0_, g1 , g1_
-    SWAP1 
-    // stack: f2 , f2_, g0 , g0_, g1 , g1_
+    // stack:      a, b, c
+    %stack (a: 2, b: 2, c: 2) -> (c, a, b)
+    // stack:      c, a, b
     %i9
-    // stack: g2_, g2 , g0 , g0_, g1 , g1_
     SWAP1
-    // stack: g2 , g2_, g0 , g0_, g1 , g1_
+    // stack: (9+i)c, a, b 
 %endmacro
 
 // cost: 16

From b585b6a7c77533277f01aa02eddf7278dcd98cbb Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 8 Feb 2023 09:46:54 -0800
Subject: [PATCH 184/201] remove macro

---
 .../asm/curve/bn254/field_arithmetic/util.asm | 27 -------------------
 1 file changed, 27 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index d40ea03d..2543878c 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -375,33 +375,6 @@
     // stack: f: 6, X: 8, f: 6
 %endmacro
 
-// cost: 16
-%macro swap_fp254_6
-    // stack: f0, f1, f2, f3, f4, f5, g0, g1, g2, g3, g4, g5
-    SWAP6
-    // stack: g0, f1, f2, f3, f4, f5, f0, g1, g2, g3, g4, g5
-    SWAP1
-    SWAP7
-    SWAP1
-    // stack: g0, g1, f2, f3, f4, f5, f0, f1, g2, g3, g4, g5
-    SWAP2
-    SWAP8
-    SWAP2
-    // stack: g0, g1, g2, f3, f4, f5, f0, f1, f2, g3, g4, g5
-    SWAP3
-    SWAP9
-    SWAP3    
-    // stack: g0, g1, g2, g3, f4, f5, f0, f1, f2, f3, g4, g5
-    SWAP4
-    SWAP10
-    SWAP4
-    // stack: g0, g1, g2, g3, g4, f5, f0, f1, f2, f3, f4, g5
-    SWAP5
-    SWAP11
-    SWAP5
-    // stack: g0, g1, g2, g3, g4, g5, f0, f1, f2, f3, f4, f5
-%endmacro
-
 /// multiply (a + bt + ct^2) by t:
 ///     t(a + bt + ct^2) = at + bt^2 + ct^3 = (9+i)c + at + bt^2
 %macro sh_fp254_6

From c6492bc5d51c98d1e8a72489b10fd58ef1293316 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 8 Feb 2023 11:28:52 -0800
Subject: [PATCH 185/201] merge fix

---
 evm/src/arithmetic/mod.rs | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/evm/src/arithmetic/mod.rs b/evm/src/arithmetic/mod.rs
index ee94a485..a7cc6d5d 100644
--- a/evm/src/arithmetic/mod.rs
+++ b/evm/src/arithmetic/mod.rs
@@ -61,20 +61,6 @@ impl BinaryOperator {
                     U256::zero()
                 }
             }
-            BinaryOperator::Shl => {
-                if input0 > 255.into() {
-                    U256::zero()
-                } else {
-                    input1 << input0
-                }
-            }
-            BinaryOperator::Shr => {
-                if input0 > 255.into() {
-                    U256::zero()
-                } else {
-                    input1 >> input0
-                }
-            }
             BinaryOperator::AddFp254 => addmod(input0, input1, BN_BASE),
             BinaryOperator::MulFp254 => mulmod(input0, input1, BN_BASE),
             BinaryOperator::SubFp254 => submod(input0, input1, BN_BASE),

From 9e60ee25676e8e2dd0e9c45ecf7fda6b5cc49f3e Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 13 Feb 2023 11:25:46 -0800
Subject: [PATCH 186/201] segment

---
 .../cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm  | 8 ++++++++
 evm/src/memory/segments.rs                                | 6 +++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index 2543878c..e1f043f1 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -1,3 +1,11 @@
+// Load a single value from bn254 pairings memory.
+%macro mload_bn254_pairings
+    // stack: offset
+    %mload_kernel(@SEGMENT_BN254_PAIRINGS)
+    // stack: value
+%endmacro
+
+
 // fp254_2 macros
 
 %macro load_fp254_2
diff --git a/evm/src/memory/segments.rs b/evm/src/memory/segments.rs
index a632e40f..51c5f64d 100644
--- a/evm/src/memory/segments.rs
+++ b/evm/src/memory/segments.rs
@@ -40,10 +40,11 @@ pub(crate) enum Segment {
     ShiftTable = 16,
     JumpdestBits = 17,
     EcdsaTable = 18,
+    BN254Pairings = 19,
 }
 
 impl Segment {
-    pub(crate) const COUNT: usize = 19;
+    pub(crate) const COUNT: usize = 20;
 
     pub(crate) fn all() -> [Self; Self::COUNT] {
         [
@@ -66,6 +67,7 @@ impl Segment {
             Self::ShiftTable,
             Self::JumpdestBits,
             Self::EcdsaTable,
+            Self::BN254Pairings,
         ]
     }
 
@@ -91,6 +93,7 @@ impl Segment {
             Segment::ShiftTable => "SEGMENT_SHIFT_TABLE",
             Segment::JumpdestBits => "SEGMENT_JUMPDEST_BITS",
             Segment::EcdsaTable => "SEGMENT_KERNEL_ECDSA_TABLE",
+            Segment::BN254Pairings => "SEGMENT_BN254_PAIRINGS",
         }
     }
 
@@ -116,6 +119,7 @@ impl Segment {
             Segment::ShiftTable => 256,
             Segment::JumpdestBits => 1,
             Segment::EcdsaTable => 256,
+            Segment::BN254Pairings => 256,
         }
     }
 }

From 71243fd72878eb6180f2a197271cdc5ce30bbbb4 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 13 Feb 2023 11:41:13 -0800
Subject: [PATCH 187/201] fix pairing code after big BN PR merge

---
 evm/src/cpu/kernel/aggregator.rs              |   8 +-
 .../cpu/kernel/asm/curve/bn254/curve_add.asm  | 305 ----------------
 .../bn254/curve_arithmetic/curve_add.asm      | 341 ++++++++++--------
 .../bn254/curve_arithmetic/curve_mul.asm      | 109 ++----
 .../bn254/{ => curve_arithmetic}/glv.asm      |   0
 .../bn254/{ => curve_arithmetic}/msm.asm      |   0
 .../{ => curve_arithmetic}/precomputation.asm |   0
 .../bn254/curve_arithmetic/tate_pairing.asm   |   4 +-
 .../cpu/kernel/asm/curve/bn254/curve_mul.asm  |  41 ---
 .../curve/bn254/field_arithmetic/inverse.asm  |   6 +-
 10 files changed, 223 insertions(+), 591 deletions(-)
 delete mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm
 rename evm/src/cpu/kernel/asm/curve/bn254/{ => curve_arithmetic}/glv.asm (100%)
 rename evm/src/cpu/kernel/asm/curve/bn254/{ => curve_arithmetic}/msm.asm (100%)
 rename evm/src/cpu/kernel/asm/curve/bn254/{ => curve_arithmetic}/precomputation.asm (100%)
 delete mode 100644 evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm

diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs
index 250c69cc..1df24c6e 100644
--- a/evm/src/cpu/kernel/aggregator.rs
+++ b/evm/src/cpu/kernel/aggregator.rs
@@ -27,18 +27,16 @@ pub(crate) fn combined_kernel() -> Kernel {
         include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/glv.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/invariant_exponent.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/msm.asm"),
+        include_str!("asm/curve/bn254/curve_arithmetic/precomputation.asm"),
         include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/degree_6_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/degree_12_mul.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"),
         include_str!("asm/curve/bn254/field_arithmetic/util.asm"),
-        include_str!("asm/curve/bn254/curve_add.asm"),
-        include_str!("asm/curve/bn254/curve_mul.asm"),
-        include_str!("asm/curve/bn254/glv.asm"),
-        include_str!("asm/curve/bn254/msm.asm"),
-        include_str!("asm/curve/bn254/precomputation.asm"),
         include_str!("asm/curve/common.asm"),
         include_str!("asm/curve/secp256k1/curve_add.asm"),
         include_str!("asm/curve/secp256k1/ecrecover.asm"),
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm
deleted file mode 100644
index 3e917120..00000000
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_add.asm
+++ /dev/null
@@ -1,305 +0,0 @@
-// #define N 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 // BN254 base field order
-
-// BN254 elliptic curve addition.
-// Uses the standard affine addition formula.
-global bn_add:
-    // Uncomment for test inputs.
-    // PUSH 0xdeadbeef
-    // PUSH 2
-    // PUSH 1
-    // PUSH 0x1bf9384aa3f0b3ad763aee81940cacdde1af71617c06f46e11510f14f3d5d121
-    // PUSH 0xe7313274bb29566ff0c8220eb9841de1d96c2923c6a4028f7dd3c6a14cee770
-    // stack: x0, y0, x1, y1, retdest
-
-    // Check if points are valid BN254 points.
-    DUP2
-    // stack: y0, x0, y0, x1, y1, retdest
-    DUP2
-    // stack: x0, y0, x0, y0, x1, y1, retdest
-    %bn_check
-    // stack: isValid(x0, y0), x0, y0, x1, y1, retdest
-    DUP5
-    // stack: x1, isValid(x0, y0), x0, y0, x1, y1, retdest
-    DUP5
-    // stack: x1, y1, isValid(x0, y0), x0, y0, x1, y1, retdest
-    %bn_check
-    // stack: isValid(x1, y1), isValid(x0, y0), x0, y0, x1, y1, retdest
-    AND
-    // stack: isValid(x1, y1) & isValid(x0, y0), x0, y0, x1, y1, retdest
-    %jumpi(bn_add_valid_points)
-    // stack: x0, y0, x1, y1, retdest
-
-    // Otherwise return
-    %pop4
-    // stack: retdest
-    %bn_invalid_input
-
-// BN254 elliptic curve addition.
-// Assumption: (x0,y0) and (x1,y1) are valid points.
-global bn_add_valid_points:
-    // stack: x0, y0, x1, y1, retdest
-
-    // Check if the first point is the identity.
-    DUP2
-    // stack: y0, x0, y0, x1, y1, retdest
-    DUP2
-    // stack: x0, y0, x0, y0, x1, y1, retdest
-    %ec_isidentity
-    // stack: (x0,y0)==(0,0), x0, y0, x1, y1, retdest
-    %jumpi(bn_add_first_zero)
-    // stack: x0, y0, x1, y1, retdest
-
-    // Check if the second point is the identity.
-    DUP4
-    // stack: y1, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: x1, y1, x0, y0, x1, y1, retdest
-    %ec_isidentity
-    // stack: (x1,y1)==(0,0), x0, y0, x1, y1, retdest
-    %jumpi(bn_add_snd_zero)
-    // stack: x0, y0, x1, y1, retdest
-
-    // Check if both points have the same x-coordinate.
-    DUP3
-    // stack: x1, x0, y0, x1, y1, retdest
-    DUP2
-    // stack: x0, x1, x0, y0, x1, y1, retdest
-    EQ
-    // stack: x0 == x1, x0, y0, x1, y1, retdest
-    %jumpi(bn_add_equal_first_coord)
-    // stack: x0, y0, x1, y1, retdest
-
-    // Otherwise, we can use the standard formula.
-    // Compute lambda = (y0 - y1)/(x0 - x1)
-    DUP4
-    // stack: y1, x0, y0, x1, y1, retdest
-    DUP3
-    // stack: y0, y1, x0, y0, x1, y1, retdest
-    %submod
-    // stack: y0 - y1, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: x1, y0 - y1, x0, y0, x1, y1, retdest
-    DUP3
-    // stack: x0, x1, y0 - y1, x0, y0, x1, y1, retdest
-    %submod
-    // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest
-    %moddiv
-    // stack: lambda, x0, y0, x1, y1, retdest
-    %jump(bn_add_valid_points_with_lambda)
-
-// BN254 elliptic curve addition.
-// Assumption: (x0,y0) == (0,0)
-bn_add_first_zero:
-    // stack: x0, y0, x1, y1, retdest
-    // Just return (x1,y1)
-    %stack (x0, y0, x1, y1, retdest) -> (retdest, x1, y1)
-    JUMP
-
-// BN254 elliptic curve addition.
-// Assumption: (x1,y1) == (0,0)
-bn_add_snd_zero:
-    // stack: x0, y0, x1, y1, retdest
-
-    // Just return (x0,y0)
-    %stack (x0, y0, x1, y1, retdest) -> (retdest, x0, y0)
-    JUMP
-
-// BN254 elliptic curve addition.
-// Assumption: lambda = (y0 - y1)/(x0 - x1)
-bn_add_valid_points_with_lambda:
-    // stack: lambda, x0, y0, x1, y1, retdest
-
-    // Compute x2 = lambda^2 - x1 - x0
-    DUP2
-    // stack: x0, lambda, x0, y0, x1, y1, retdest
-    DUP5
-    // stack: x1, x0, lambda, x0, y0, x1, y1, retdest
-    %bn_base
-    // stack: N, x1, x0, lambda, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest
-    DUP1
-    // stack: lambda, lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest
-    MULMOD
-    // stack: lambda^2, x1, x0, lambda, x0, y0, x1, y1, retdest
-    %submod
-    // stack: lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest
-    %submod
-    // stack: x2, lambda, x0, y0, x1, y1, retdest
-
-    // Compute y2 = lambda*(x1 - x2) - y1
-    %bn_base
-    // stack: N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP2
-    // stack: x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP7
-    // stack: x1, x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    %submod
-    // stack: x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: lambda, x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    MULMOD
-    // stack: lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
-    DUP7
-    // stack: y1, lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
-    SWAP1
-    // stack: lambda * (x1 - x2), y1, x2, lambda, x0, y0, x1, y1, retdest
-    %submod
-    // stack: y2, x2, lambda, x0, y0, x1, y1, retdest
-
-    // Return x2,y2
-    %stack (y2, x2, lambda, x0, y0, x1, y1, retdest) -> (retdest, x2, y2)
-    JUMP
-
-// BN254 elliptic curve addition.
-// Assumption: (x0,y0) and (x1,y1) are valid points and x0 == x1
-bn_add_equal_first_coord:
-    // stack: x0, y0, x1, y1, retdest with x0 == x1
-
-    // Check if the points are equal
-    DUP2
-    // stack: y0, x0, y0, x1, y1, retdest
-    DUP5
-    // stack: y1, y0, x0, y0, x1, y1, retdest
-    EQ
-    // stack: y1 == y0, x0, y0, x1, y1, retdest
-    %jumpi(bn_add_equal_points)
-    // stack: x0, y0, x1, y1, retdest
-
-    // Otherwise, one is the negation of the other so we can return (0,0).
-    %pop4
-    // stack: retdest
-    PUSH 0
-    // stack: 0, retdest
-    PUSH 0
-    // stack: 0, 0, retdest
-    SWAP2
-    // stack: retdest, 0, 0
-    JUMP
-
-
-// BN254 elliptic curve addition.
-// Assumption: x0 == x1 and y0 == y1
-// Standard doubling formula.
-bn_add_equal_points:
-    // stack: x0, y0, x1, y1, retdest
-
-    // Compute lambda = 3/2 * x0^2 / y0
-    %bn_base
-    // stack: N, x0, y0, x1, y1, retdest
-    %bn_base
-    // stack: N, N, x0, y0, x1, y1, retdest
-    DUP3
-    // stack: x0, N, N, x0, y0, x1, y1, retdest
-    DUP1
-    // stack: x0, x0, N, N, x0, y0, x1, y1, retdest
-    MULMOD
-    // stack: x0^2, N, x0, y0, x1, y1, retdest with
-    PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5 // 3/2 in the base field
-    // stack: 3/2, x0^2, N, x0, y0, x1, y1, retdest
-    MULMOD
-    // stack: 3/2 * x0^2, x0, y0, x1, y1, retdest
-    DUP3
-    // stack: y0, 3/2 * x0^2, x0, y0, x1, y1, retdest
-    %moddiv
-    // stack: lambda, x0, y0, x1, y1, retdest
-    %jump(bn_add_valid_points_with_lambda)
-
-// BN254 elliptic curve doubling.
-// Assumption: (x0,y0) is a valid point.
-// Standard doubling formula.
-global bn_double:
-    // stack: x, y, retdest
-    DUP2 DUP2 %ec_isidentity
-    // stack: (x,y)==(0,0), x, y, retdest
-    %jumpi(ec_double_retself)
-    DUP2 DUP2
-    // stack: x, y, x, y, retdest
-    %jump(bn_add_equal_points)
-
-// Push the order of the BN254 base field.
-%macro bn_base
-    PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47
-%endmacro
-
-// Assumption: x, y < N and 2N < 2^256.
-// Note: Doesn't hold for Secp256k1 base field.
-%macro submod
-    // stack: x, y
-    %bn_base
-    // stack: N, x, y
-    ADD
-    // stack: N + x, y // Doesn't overflow since 2N < 2^256
-    SUB
-    // stack: N + x - y // Doesn't underflow since y < N
-    %bn_base
-    // stack: N, N + x - y
-    SWAP1
-    // stack: N + x - y, N
-    MOD
-    // stack: (N + x - y) % N = (x-y) % N
-%endmacro
-
-// Check if (x,y) is a valid curve point.
-// Puts y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) on top of the stack.
-%macro bn_check
-    // stack: x, y
-    %bn_base
-    // stack: N, x, y
-    DUP2
-    // stack: x, N, x, y
-    LT
-    // stack: x < N, x, y
-    %bn_base
-    // stack: N, x < N, x, y
-    DUP4
-    // stack: y, N, x < N, x, y
-    LT
-    // stack: y < N, x < N, x, y
-    AND
-    // stack: (y < N) & (x < N), x, y
-    %stack (b, x, y) -> (x, x, @BN_BASE, x, @BN_BASE, @BN_BASE, x, y, b)
-    // stack: x, x, N, x, N, N, x, y, b
-    MULMOD
-    // stack: x^2 % N, x, N, N, x, y, b
-    MULMOD
-    // stack: x^3 % N, N, x, y, b
-    PUSH 3
-    // stack: 3, x^3 % N, N, x, y, b
-    ADDMOD
-    // stack: (x^3 + 3) % N, x, y, b
-    DUP3
-    // stack: y, (x^3 + 3) % N, x, y, b
-    %bn_base
-    // stack: N, y, (x^3 + 3) % N, x, y, b
-    SWAP1
-    // stack: y, N, (x^3 + 3) % N, x, y, b
-    DUP1
-    // stack: y, y, N, (x^3 + 3) % N, x, y, b
-    MULMOD
-    // stack: y^2 % N, (x^3 + 3) % N, x, y, b
-    EQ
-    // stack: y^2 % N == (x^3 + 3) % N, x, y, b
-    SWAP2
-    // stack: y, x, y^2 % N == (x^3 + 3) % N, b
-    %ec_isidentity
-    // stack: (x,y)==(0,0), y^2 % N == (x^3 + 3) % N, b
-    SWAP2
-    // stack: b, y^2 % N == (x^3 + 3) % N, (x,y)==(0,0)
-    AND
-    // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N), (x,y)==(0,0)
-    OR
-    // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0)
-%endmacro
-
-// Return (u256::MAX, u256::MAX) which is used to indicate the input was invalid.
-%macro bn_invalid_input
-    // stack: retdest
-    PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-    // stack: u256::MAX, retdest
-    PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
-    // stack: u256::MAX, u256::MAX, retdest
-    SWAP2
-    // stack: retdest, u256::MAX, u256::MAX
-    JUMP
-%endmacro
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
index 0ac947da..e85838ea 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@@ -1,81 +1,95 @@
-// BN254 elliptic curve addition via the standard affine addition formula.
+// #define N 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 // BN254 base field order
 
-global ec_add:
-    // stack:                                    x0, y0, x1, y1, retdest
+// BN254 elliptic curve addition.
+// Uses the standard affine addition formula.
+global bn_add:
+    // Uncomment for test inputs.
+    // PUSH 0xdeadbeef
+    // PUSH 2
+    // PUSH 1
+    // PUSH 0x1bf9384aa3f0b3ad763aee81940cacdde1af71617c06f46e11510f14f3d5d121
+    // PUSH 0xe7313274bb29566ff0c8220eb9841de1d96c2923c6a4028f7dd3c6a14cee770
+    // stack: x0, y0, x1, y1, retdest
 
     // Check if points are valid BN254 points.
     DUP2
-    DUP2    
-    // stack:                            x0, y0, x0, y0, x1, y1, retdest
-    %ec_check
-    // stack:                   isValid(x0, y0), x0, y0, x1, y1, retdest
+    // stack: y0, x0, y0, x1, y1, retdest
+    DUP2
+    // stack: x0, y0, x0, y0, x1, y1, retdest
+    %bn_check
+    // stack: isValid(x0, y0), x0, y0, x1, y1, retdest
     DUP5
-    DUP5    
-    // stack:         x1, y1  , isValid(x0, y0), x0, y0, x1, y1, retdest
-    %ec_check
-    // stack: isValid(x1, y1) , isValid(x0, y0), x0, y0, x1, y1, retdest
+    // stack: x1, isValid(x0, y0), x0, y0, x1, y1, retdest
+    DUP5
+    // stack: x1, y1, isValid(x0, y0), x0, y0, x1, y1, retdest
+    %bn_check
+    // stack: isValid(x1, y1), isValid(x0, y0), x0, y0, x1, y1, retdest
     AND
     // stack: isValid(x1, y1) & isValid(x0, y0), x0, y0, x1, y1, retdest
-    %jumpi(ec_add_valid_points)
-    // stack:                                    x0, y0, x1, y1, retdest
+    %jumpi(bn_add_valid_points)
+    // stack: x0, y0, x1, y1, retdest
 
     // Otherwise return
     %pop4
     // stack: retdest
-    %ec_invalid_input
+    %bn_invalid_input
 
 // BN254 elliptic curve addition.
 // Assumption: (x0,y0) and (x1,y1) are valid points.
-global ec_add_valid_points:
-    // stack:                   x0, y0, x1, y1, retdest
+global bn_add_valid_points:
+    // stack: x0, y0, x1, y1, retdest
 
     // Check if the first point is the identity.
     DUP2
+    // stack: y0, x0, y0, x1, y1, retdest
     DUP2
-    // stack:           x0,y0 , x0, y0, x1, y1, retdest
+    // stack: x0, y0, x0, y0, x1, y1, retdest
     %ec_isidentity
-    // stack:   (0,0)==(x0,y0), x0, y0, x1, y1, retdest
-    %jumpi(ec_add_fst_zero)
-    // stack:                   x0, y0, x1, y1, retdest
+    // stack: (x0,y0)==(0,0), x0, y0, x1, y1, retdest
+    %jumpi(bn_add_first_zero)
+    // stack: x0, y0, x1, y1, retdest
 
     // Check if the second point is the identity.
     DUP4
-    DUP4    
-    // stack:           x1,y1 , x0, y0, x1, y1, retdest
+    // stack: y1, x0, y0, x1, y1, retdest
+    DUP4
+    // stack: x1, y1, x0, y0, x1, y1, retdest
     %ec_isidentity
-    // stack:   (0,0)==(x1,y1), x0, y0, x1, y1, retdest
-    %jumpi(ec_add_snd_zero)
-    // stack:                   x0, y0, x1, y1, retdest
+    // stack: (x1,y1)==(0,0), x0, y0, x1, y1, retdest
+    %jumpi(bn_add_snd_zero)
+    // stack: x0, y0, x1, y1, retdest
 
     // Check if both points have the same x-coordinate.
     DUP3
-    DUP2    
-    // stack:         x0 ,  x1, x0, y0, x1, y1, retdest
+    // stack: x1, x0, y0, x1, y1, retdest
+    DUP2
+    // stack: x0, x1, x0, y0, x1, y1, retdest
     EQ
-    // stack:         x0 == x1, x0, y0, x1, y1, retdest
-    %jumpi(ec_add_equal_first_coord)
+    // stack: x0 == x1, x0, y0, x1, y1, retdest
+    %jumpi(bn_add_equal_first_coord)
+    // stack: x0, y0, x1, y1, retdest
 
-
-    // stack:                   x0, y0, x1, y1, retdest
     // Otherwise, we can use the standard formula.
     // Compute lambda = (y0 - y1)/(x0 - x1)
     DUP4
+    // stack: y1, x0, y0, x1, y1, retdest
     DUP3
-    // stack:          y0 , y1, x0, y0, x1, y1, retdest
-    SUBFP254
-    // stack:          y0 - y1, x0, y0, x1, y1, retdest
+    // stack: y0, y1, x0, y0, x1, y1, retdest
+    %submod
+    // stack: y0 - y1, x0, y0, x1, y1, retdest
     DUP4
+    // stack: x1, y0 - y1, x0, y0, x1, y1, retdest
     DUP3
-    // stack: x0 , x1, y0 - y1, x0, y0, x1, y1, retdest
-    SUBFP254
+    // stack: x0, x1, y0 - y1, x0, y0, x1, y1, retdest
+    %submod
     // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest
-    %divfp254
-    // stack:           lambda, x0, y0, x1, y1, retdest
-    %jump(ec_add_valid_points_with_lambda)
+    %divr_fp254
+    // stack: lambda, x0, y0, x1, y1, retdest
+    %jump(bn_add_valid_points_with_lambda)
 
 // BN254 elliptic curve addition.
 // Assumption: (x0,y0) == (0,0)
-ec_add_fst_zero:
+bn_add_first_zero:
     // stack: x0, y0, x1, y1, retdest
     // Just return (x1,y1)
     %stack (x0, y0, x1, y1, retdest) -> (retdest, x1, y1)
@@ -83,48 +97,55 @@ ec_add_fst_zero:
 
 // BN254 elliptic curve addition.
 // Assumption: (x1,y1) == (0,0)
-ec_add_snd_zero:
+bn_add_snd_zero:
     // stack: x0, y0, x1, y1, retdest
+
     // Just return (x0,y0)
     %stack (x0, y0, x1, y1, retdest) -> (retdest, x0, y0)
     JUMP
 
 // BN254 elliptic curve addition.
 // Assumption: lambda = (y0 - y1)/(x0 - x1)
-ec_add_valid_points_with_lambda:
-    // stack:                             lambda, x0, y0, x1, y1, retdest
+bn_add_valid_points_with_lambda:
+    // stack: lambda, x0, y0, x1, y1, retdest
 
     // Compute x2 = lambda^2 - x1 - x0
     DUP2
+    // stack: x0, lambda, x0, y0, x1, y1, retdest
     DUP5
-    // stack:                     x1, x0, lambda, x0, y0, x1, y1, retdest
-    DUP3
-    // stack:          lambda   , x1, x0, lambda, x0, y0, x1, y1, retdest
+    // stack: x1, x0, lambda, x0, y0, x1, y1, retdest
+    %bn_base
+    // stack: N, x1, x0, lambda, x0, y0, x1, y1, retdest
+    DUP4
+    // stack: lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest
     DUP1
-    MULFP254
-    // stack:          lambda^2 , x1, x0, lambda, x0, y0, x1, y1, retdest
-    SUBFP254
-    // stack:          lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest
-    SUBFP254
-    // stack:                         x2, lambda, x0, y0, x1, y1, retdest
+    // stack: lambda, lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest
+    MULMOD
+    // stack: lambda^2, x1, x0, lambda, x0, y0, x1, y1, retdest
+    %submod
+    // stack: lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest
+    %submod
+    // stack: x2, lambda, x0, y0, x1, y1, retdest
 
     // Compute y2 = lambda*(x1 - x2) - y1
-    DUP1
-    // stack:                    x2 , x2, lambda, x0, y0, x1, y1, retdest
-    DUP6
-    // stack:               x1 , x2 , x2, lambda, x0, y0, x1, y1, retdest
-    SUBFP254
-    // stack:               x1 - x2 , x2, lambda, x0, y0, x1, y1, retdest
-    DUP3
-    // stack:     lambda ,  x1 - x2 , x2, lambda, x0, y0, x1, y1, retdest
-    MULFP254
-    // stack:     lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
+    %bn_base
+    // stack: N, x2, lambda, x0, y0, x1, y1, retdest
+    DUP2
+    // stack: x2, N, x2, lambda, x0, y0, x1, y1, retdest
+    DUP7
+    // stack: x1, x2, N, x2, lambda, x0, y0, x1, y1, retdest
+    %submod
+    // stack: x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest
+    DUP4
+    // stack: lambda, x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest
+    MULMOD
+    // stack: lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
     DUP7
     // stack: y1, lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
     SWAP1
     // stack: lambda * (x1 - x2), y1, x2, lambda, x0, y0, x1, y1, retdest
-    SUBFP254
-    // stack:                     y2, x2, lambda, x0, y0, x1, y1, retdest
+    %submod
+    // stack: y2, x2, lambda, x0, y0, x1, y1, retdest
 
     // Return x2,y2
     %stack (y2, x2, lambda, x0, y0, x1, y1, retdest) -> (retdest, x2, y2)
@@ -132,22 +153,25 @@ ec_add_valid_points_with_lambda:
 
 // BN254 elliptic curve addition.
 // Assumption: (x0,y0) and (x1,y1) are valid points and x0 == x1
-ec_add_equal_first_coord:
-    // stack:           x0, y0, x1, y1, retdest with x0 == x1
+bn_add_equal_first_coord:
+    // stack: x0, y0, x1, y1, retdest with x0 == x1
 
     // Check if the points are equal
     DUP2
+    // stack: y0, x0, y0, x1, y1, retdest
     DUP5
-    // stack: y1  , y0, x0, y0, x1, y1, retdest
+    // stack: y1, y0, x0, y0, x1, y1, retdest
     EQ
     // stack: y1 == y0, x0, y0, x1, y1, retdest
-    %jumpi(ec_add_equal_points)
-    // stack:           x0, y0, x1, y1, retdest
+    %jumpi(bn_add_equal_points)
+    // stack: x0, y0, x1, y1, retdest
 
     // Otherwise, one is the negation of the other so we can return (0,0).
     %pop4
-    // stack:       retdest
-    PUSH 0  PUSH 0
+    // stack: retdest
+    PUSH 0
+    // stack: 0, retdest
+    PUSH 0
     // stack: 0, 0, retdest
     SWAP2
     // stack: retdest, 0, 0
@@ -157,118 +181,125 @@ ec_add_equal_first_coord:
 // BN254 elliptic curve addition.
 // Assumption: x0 == x1 and y0 == y1
 // Standard doubling formula.
-ec_add_equal_points:
-    // stack:                 x0, y0, x1, y1, retdest
-    // Compute lambda = 3/2 * x0^2 / y0
+bn_add_equal_points:
+    // stack: x0, y0, x1, y1, retdest
 
+    // Compute lambda = 3/2 * x0^2 / y0
+    %bn_base
+    // stack: N, x0, y0, x1, y1, retdest
+    %bn_base
+    // stack: N, N, x0, y0, x1, y1, retdest
+    DUP3
+    // stack: x0, N, N, x0, y0, x1, y1, retdest
     DUP1
-    // stack:           x0  , x0, y0, x1, y1, retdest
-    DUP1
-    MULFP254
-    // stack:           x0^2, x0, y0, x1, y1, retdest
-    %bn_3_over_2
-    // stack:     3/2 , x0^2, x0, y0, x1, y1, retdest
-    MULFP254
-    // stack:     3/2 * x0^2, x0, y0, x1, y1, retdest
+    // stack: x0, x0, N, N, x0, y0, x1, y1, retdest
+    MULMOD
+    // stack: x0^2, N, x0, y0, x1, y1, retdest with
+    PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5 // 3/2 in the base field
+    // stack: 3/2, x0^2, N, x0, y0, x1, y1, retdest
+    MULMOD
+    // stack: 3/2 * x0^2, x0, y0, x1, y1, retdest
     DUP3
     // stack: y0, 3/2 * x0^2, x0, y0, x1, y1, retdest
-    %divfp254
-    // stack:         lambda, x0, y0, x1, y1, retdest
-    %jump(ec_add_valid_points_with_lambda)
+    %divr_fp254
+    // stack: lambda, x0, y0, x1, y1, retdest
+    %jump(bn_add_valid_points_with_lambda)
 
 // BN254 elliptic curve doubling.
 // Assumption: (x0,y0) is a valid point.
 // Standard doubling formula.
-global ec_double:
-    // stack:         x0, y0, retdest
-    DUP2
-    DUP2    
-    // stack: x0, y0, x0, y0, retdest
-    %jump(ec_add_equal_points)
+global bn_double:
+    // stack: x, y, retdest
+    DUP2 DUP2 %ec_isidentity
+    // stack: (x,y)==(0,0), x, y, retdest
+    %jumpi(ec_double_retself)
+    DUP2 DUP2
+    // stack: x, y, x, y, retdest
+    %jump(bn_add_equal_points)
 
 // Push the order of the BN254 base field.
 %macro bn_base
     PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47
 %endmacro
 
-%macro bn_3_over_2
-    // 3/2 in the base field
-    PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5
+// Assumption: x, y < N and 2N < 2^256.
+// Note: Doesn't hold for Secp256k1 base field.
+%macro submod
+    // stack: x, y
+    %bn_base
+    // stack: N, x, y
+    ADD
+    // stack: N + x, y // Doesn't overflow since 2N < 2^256
+    SUB
+    // stack: N + x - y // Doesn't underflow since y < N
+    %bn_base
+    // stack: N, N + x - y
+    SWAP1
+    // stack: N + x - y, N
+    MOD
+    // stack: (N + x - y) % N = (x-y) % N
 %endmacro
 
 // Check if (x,y) is a valid curve point.
-// Returns range & curve || is_identity
-// where
-//     range = (x < N) & (y < N) 
-//     curve = y^2 == (x^3 + 3) 
-//     ident = (x,y) == (0,0)
-
-%macro ec_check
-    // stack:                       x, y
-    DUP1
-    // stack:                    x, x, y
+// Puts y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) on top of the stack.
+%macro bn_check
+    // stack: x, y
     %bn_base
-    // stack:                N , x, x, y
-    DUP1
-    // stack:             N, N , x, x, y
-    DUP5
-    // stack:         y , N, N , x, x, y
-    LT  
-    // stack:         y < N, N , x, x, y
-    SWAP2
-    // stack:         x , N, y < N, x, y
-    LT
-    // stack:         x < N, y < N, x, y
-    AND
-    // stack:                range, x, y
-    SWAP2
-    // stack:                y, x, range
-    DUP2 
-    // stack:           x  , y, x, range
-    DUP1 
-    DUP1
-    MULFP254
-    MULFP254
-    // stack:           x^3, y, x, range
-    PUSH 3
-    ADDFP254
-    // stack:       3 + x^3, y, x, range
+    // stack: N, x, y
     DUP2
-    // stack:  y  , 3 + x^3, y, x, range
-    DUP1
-    MULFP254
-    // stack:  y^2, 3 + x^3, y, x, range
-    EQ
-    // stack:         curve, y, x, range
-    SWAP2
-    // stack:         x, y, curve, range
-    %ec_isidentity
-    // stack:       ident , curve, range
-    SWAP2
-    // stack:       range , curve, ident
+    // stack: x, N, x, y
+    LT
+    // stack: x < N, x, y
+    %bn_base
+    // stack: N, x < N, x, y
+    DUP4
+    // stack: y, N, x < N, x, y
+    LT
+    // stack: y < N, x < N, x, y
     AND
-    // stack:       range & curve, ident
+    // stack: (y < N) & (x < N), x, y
+    %stack (b, x, y) -> (x, x, @BN_BASE, x, @BN_BASE, @BN_BASE, x, y, b)
+    // stack: x, x, N, x, N, N, x, y, b
+    MULMOD
+    // stack: x^2 % N, x, N, N, x, y, b
+    MULMOD
+    // stack: x^3 % N, N, x, y, b
+    PUSH 3
+    // stack: 3, x^3 % N, N, x, y, b
+    ADDMOD
+    // stack: (x^3 + 3) % N, x, y, b
+    DUP3
+    // stack: y, (x^3 + 3) % N, x, y, b
+    %bn_base
+    // stack: N, y, (x^3 + 3) % N, x, y, b
+    SWAP1
+    // stack: y, N, (x^3 + 3) % N, x, y, b
+    DUP1
+    // stack: y, y, N, (x^3 + 3) % N, x, y, b
+    MULMOD
+    // stack: y^2 % N, (x^3 + 3) % N, x, y, b
+    EQ
+    // stack: y^2 % N == (x^3 + 3) % N, x, y, b
+    SWAP2
+    // stack: y, x, y^2 % N == (x^3 + 3) % N, b
+    %ec_isidentity
+    // stack: (x,y)==(0,0), y^2 % N == (x^3 + 3) % N, b
+    SWAP2
+    // stack: b, y^2 % N == (x^3 + 3) % N, (x,y)==(0,0)
+    AND
+    // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N), (x,y)==(0,0)
     OR
-    // stack:                   is_valid
-%endmacro
-
-// Check if (x,y)==(0,0)
-%macro ec_isidentity
-    // stack: x , y
-    OR
-    // stack: x | y
-    ISZERO
-    // stack: (x,y) == (0,0)
+    // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0)
 %endmacro
 
 // Return (u256::MAX, u256::MAX) which is used to indicate the input was invalid.
-%macro ec_invalid_input
+%macro bn_invalid_input
     // stack: retdest
     PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
     // stack: u256::MAX, retdest
-    DUP1
+    PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
     // stack: u256::MAX, u256::MAX, retdest
     SWAP2
     // stack: retdest, u256::MAX, u256::MAX
     JUMP
-%endmacro
+%endmacro
\ No newline at end of file
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm
index 843053e9..ecbb3de0 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_mul.asm
@@ -1,90 +1,41 @@
 // BN254 elliptic curve scalar multiplication.
-// Recursive implementation, same algorithm as in `exp.asm`.
-global ec_mul:
-    // stack:                x, y, s, retdest
+// Uses GLV, wNAF with w=5, and a MSM algorithm.
+global bn_mul:
+    // stack: x, y, s, retdest
     DUP2
-    // stack:            y , x, y, s, retdest
+    // stack: y, x, y, s, retdest
     DUP2
-    // stack:          x,y , x, y, s, retdest
+    // stack: x, y, x, y, s, retdest
     %ec_isidentity
-    // stack:  (0,0)==(x,y), x, y, s, retdest
+    // stack: (x,y)==(0,0), x, y, s, retdest
     %jumpi(ret_zero_ec_mul)
-    // stack:                x, y, s, retdest
+    // stack: x, y, s, retdest
     DUP2
-    // stack:             y, x, y, s, retdest
+    // stack: y, x, y, s, retdest
     DUP2
-    // stack:          x, y, x, y, s, retdest
-    %ec_check
+    // stack: x, y, x, y, s, retdest
+    %bn_check
     // stack: isValid(x, y), x, y, s, retdest
-    %jumpi(ec_mul_valid_point)
-    // stack:                x, y, s, retdest
+    %jumpi(bn_mul_valid_point)
+    // stack: x, y, s, retdest
     %pop3
-    %ec_invalid_input
+    %bn_invalid_input
 
-// Same algorithm as in `exp.asm`
-ec_mul_valid_point:
-    // stack:    x, y, s, retdest
-    DUP3
-    // stack: s, x, y, s, retdest
-    %jumpi(step_case)
-    // stack:    x, y, s, retdest
-    %jump(ret_zero_ec_mul)
-
-step_case:
-    // stack:                                                 x, y, s, retdest
-    PUSH recursion_return
-    // stack:                               recursion_return, x, y, s, retdest
-    PUSH 2
-    // stack:                            2, recursion_return, x, y, s, retdest
-    DUP5
-    // stack:                        s , 2, recursion_return, x, y, s, retdest
-    DIV
-    // stack:                        s / 2, recursion_return, x, y, s, retdest
-    PUSH step_case_contd
-    // stack:       step_case_contd, s / 2, recursion_return, x, y, s, retdest
-    DUP5
-    // stack:    y, step_case_contd, s / 2, recursion_return, x, y, s, retdest
-    DUP5
-    // stack: x, y, step_case_contd, s / 2, recursion_return, x, y, s, retdest
-    %jump(ec_double)
-
-// Assumption: 2(x,y) = (x',y')
-step_case_contd:
-    // stack: x', y', s / 2, recursion_return, x, y, s, retdest
-    %jump(ec_mul_valid_point)
-
-recursion_return:
-    // stack:     x', y', x, y, s, retdest
-    SWAP4
-    // stack:     s, y', x, y, x', retdest
-    PUSH 1
-    // stack:  1, s, y', x, y, x', retdest
-    AND
-    // stack: s & 1, y', x, y, x', retdest
-    SWAP1
-    // stack: y', s & 1, x, y, x', retdest
-    SWAP2
-    // stack: x, s & 1, y', y, x', retdest
-    SWAP3
-    // stack: y, s & 1, y', x, x', retdest
-    SWAP4
-    // stack: x', s & 1, y', x, y, retdest
-    SWAP1
-    // stack: s & 1, x', y', x, y, retdest
-    %jumpi(odd_scalar)
-    // stack:        x', y', x, y, retdest
-    SWAP3
-    // stack:        y, y', x, x', retdest
-    POP
-    // stack:           y', x, x', retdest
-    SWAP1
-    // stack:           x, y', x', retdest
-    POP
-    // stack:              y', x', retdest
-    SWAP2
-    // stack:              retdest, x', y'
+bn_mul_valid_point:
+    %stack (x, y, s, retdest) -> (s, bn_mul_after_glv, x, y, bn_msm, bn_mul_end, retdest)
+    %jump(bn_glv_decompose)
+bn_mul_after_glv:
+    // stack: bneg, a, b, x, y, bn_msm, bn_mul_end, retdest
+    // Store bneg at this (otherwise unused) location. Will be used later in the MSM.
+    %mstore_kernel(@SEGMENT_KERNEL_BN_TABLE_Q, @BN_BNEG_LOC)
+    // stack: a, b, x, y, bn_msm, bn_mul_end, retdest
+    PUSH bn_mul_after_a SWAP1 PUSH @SEGMENT_KERNEL_BN_WNAF_A PUSH @BN_SCALAR %jump(wnaf)
+bn_mul_after_a:
+    // stack: b, x, y, bn_msm, bn_mul_end, retdest
+    PUSH bn_mul_after_b SWAP1 PUSH @SEGMENT_KERNEL_BN_WNAF_B PUSH @BN_SCALAR %jump(wnaf)
+bn_mul_after_b:
+    // stack: x, y, bn_msm, bn_mul_end, retdest
+    %jump(bn_precompute_table)
+bn_mul_end:
+    %stack (Ax, Ay, retdest) -> (retdest, Ax, Ay)
     JUMP
-
-odd_scalar:
-    // stack: x', y', x, y, retdest
-    %jump(ec_add_valid_points)
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/glv.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/curve/bn254/glv.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/msm.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/msm.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/curve/bn254/msm.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/msm.asm
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/precomputation.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/precomputation.asm
similarity index 100%
rename from evm/src/cpu/kernel/asm/curve/bn254/precomputation.asm
rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/precomputation.asm
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index c17117ab..414e00ad 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -129,7 +129,7 @@ mul_tangent_2:
     DUP6  
     DUP6
     // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    %jump(ec_double)
+    %jump(bn_double)
 after_double:
     // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
     SWAP5
@@ -175,7 +175,7 @@ mul_cord_1:
     DUP7  
     DUP7
     // stack: O , P, after_add, 0xnm, times, O  , P, Q, out
-    %jump(ec_add_valid_points)
+    %jump(bn_add_valid_points)
 after_add:
     // stack:            O + P, 0xnm, times, O  , P, Q, out
     SWAP4
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm
deleted file mode 100644
index ecbb3de0..00000000
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_mul.asm
+++ /dev/null
@@ -1,41 +0,0 @@
-// BN254 elliptic curve scalar multiplication.
-// Uses GLV, wNAF with w=5, and a MSM algorithm.
-global bn_mul:
-    // stack: x, y, s, retdest
-    DUP2
-    // stack: y, x, y, s, retdest
-    DUP2
-    // stack: x, y, x, y, s, retdest
-    %ec_isidentity
-    // stack: (x,y)==(0,0), x, y, s, retdest
-    %jumpi(ret_zero_ec_mul)
-    // stack: x, y, s, retdest
-    DUP2
-    // stack: y, x, y, s, retdest
-    DUP2
-    // stack: x, y, x, y, s, retdest
-    %bn_check
-    // stack: isValid(x, y), x, y, s, retdest
-    %jumpi(bn_mul_valid_point)
-    // stack: x, y, s, retdest
-    %pop3
-    %bn_invalid_input
-
-bn_mul_valid_point:
-    %stack (x, y, s, retdest) -> (s, bn_mul_after_glv, x, y, bn_msm, bn_mul_end, retdest)
-    %jump(bn_glv_decompose)
-bn_mul_after_glv:
-    // stack: bneg, a, b, x, y, bn_msm, bn_mul_end, retdest
-    // Store bneg at this (otherwise unused) location. Will be used later in the MSM.
-    %mstore_kernel(@SEGMENT_KERNEL_BN_TABLE_Q, @BN_BNEG_LOC)
-    // stack: a, b, x, y, bn_msm, bn_mul_end, retdest
-    PUSH bn_mul_after_a SWAP1 PUSH @SEGMENT_KERNEL_BN_WNAF_A PUSH @BN_SCALAR %jump(wnaf)
-bn_mul_after_a:
-    // stack: b, x, y, bn_msm, bn_mul_end, retdest
-    PUSH bn_mul_after_b SWAP1 PUSH @SEGMENT_KERNEL_BN_WNAF_B PUSH @BN_SCALAR %jump(wnaf)
-bn_mul_after_b:
-    // stack: x, y, bn_msm, bn_mul_end, retdest
-    %jump(bn_precompute_table)
-bn_mul_end:
-    %stack (Ax, Ay, retdest) -> (retdest, Ax, Ay)
-    JUMP
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 3e26d355..9071f396 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -1,7 +1,5 @@
-/// Division modulo the BN254 prime
-
-// Returns y * (x^-1) where the inverse is taken modulo N
-%macro divfp254
+// Returns reverse order divison y/x, modulo N
+%macro divr_fp254
     // stack: x   , y
     %inv_fp254
     // stack: x^-1, y

From 787cc8903b1ac9b811fd554dfebed28d95bd0353 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 13 Feb 2023 12:32:40 -0800
Subject: [PATCH 188/201] change segment

---
 .../curve_arithmetic/invariant_exponent.asm   |   6 +-
 .../bn254/curve_arithmetic/tate_pairing.asm   |  22 +-
 .../bn254/field_arithmetic/degree_12_mul.asm  |   4 +-
 .../curve/bn254/field_arithmetic/inverse.asm  |   3 +-
 .../asm/curve/bn254/field_arithmetic/util.asm | 198 ++++++++++--------
 evm/src/cpu/kernel/tests/bn254.rs             |   4 +-
 evm/src/memory/segments.rs                    |   7 +-
 evm/src/witness/util.rs                       |   4 +-
 8 files changed, 138 insertions(+), 110 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
index b21174e1..c74db1af 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
@@ -59,9 +59,9 @@ final_exp:
     // stack:             300, val, retdest
     %stack () -> (1, 1, 1)
     // stack:    1, 1, 1, 300, val, retdest
-    %mstore_kernel_general(200)  
-    %mstore_kernel_general(224)  
-    %mstore_kernel_general(212)
+    %mstore_kernel_bn254_pairing(200)  
+    %mstore_kernel_bn254_pairing(224)  
+    %mstore_kernel_bn254_pairing(212)
     // stack:             300, val, retdest  {200: y0, 212: y2, 224: y4}
     %stack () -> (64, 62, 65)
     // stack: 64, 62, 65, 300, val, retdest  {200: y0, 212: y2, 224: y4}
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 414e00ad..375f9bc4 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -37,7 +37,7 @@ global bn254_miller:
     // stack:         ptr, out, retdest
     %stack (ptr, out) -> (out, 1, ptr, out)
     // stack: out, 1, ptr, out, retdest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:         ptr, out, retdest
     %load_fp254_6
     // stack:        P, Q, out, retdest
@@ -201,7 +201,7 @@ after_add:
     // stack:      py^2 , 9, px, py, qx, qx_,  qy, qy_
     SUBFP254
     // stack:      py^2 - 9, px, py, qx, qx_,  qy, qy_
-    %mstore_kernel_general(100)
+    %mstore_kernel_bn254_pairing(100)
     // stack:                px, py, qx, qx_,  qy, qy_
     DUP1  
     MULFP254
@@ -217,7 +217,7 @@ after_add:
     DUP3  
     MULFP254
     // stack:   (-3*px^2)qx, py, -3px^2, qx_,  qy, qy_ 
-    %mstore_kernel_general(102)
+    %mstore_kernel_bn254_pairing(102)
     // stack:                py, -3px^2, qx_,  qy, qy_ 
     PUSH 2  
     MULFP254
@@ -227,15 +227,15 @@ after_add:
     DUP4  
     MULFP254
     // stack:           (2py)qy, -3px^2, qx_, 2py, qy_ 
-    %mstore_kernel_general(108)
+    %mstore_kernel_bn254_pairing(108)
     // stack:                    -3px^2, qx_, 2py, qy_ 
     MULFP254
     // stack:                   (-3px^2)*qx_, 2py, qy_ 
-    %mstore_kernel_general(103)
+    %mstore_kernel_bn254_pairing(103)
     // stack:                                 2py, qy_ 
     MULFP254
     // stack:                                (2py)*qy_ 
-    %mstore_kernel_general(109)
+    %mstore_kernel_bn254_pairing(109)
 %endmacro
 
 /// def cord(p1x, p1y, p2x, p2y, qx, qy):
@@ -257,7 +257,7 @@ after_add:
     // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     SUBFP254
     // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    %mstore_kernel_general(100)
+    %mstore_kernel_bn254_pairing(100)
     // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     SWAP3
     // stack:                    p2y , p1y, p2x , p1x, qx, qx_, qy, qy_
@@ -272,20 +272,20 @@ after_add:
     DUP5
     MULFP254
     // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
-    %mstore_kernel_general(108)
+    %mstore_kernel_bn254_pairing(108)
     // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
     SWAP1
     // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
     DUP2
     MULFP254
     // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
-    %mstore_kernel_general(102)
+    %mstore_kernel_bn254_pairing(102)
     // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
     MULFP254
     // stack:                            (p2y - p1y)qx_, p1x - p2x, qy_
-    %mstore_kernel_general(103)
+    %mstore_kernel_bn254_pairing(103)
     // stack:                                            p1x - p2x, qy_
     MULFP254
     // stack:                                           (p1x - p2x)*qy_
-    %mstore_kernel_general(109)
+    %mstore_kernel_bn254_pairing(109)
 %endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index 3d384380..9b3e67cf 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -209,7 +209,7 @@ global mul_fp254_12_sparse:
     // stack:                           f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     DUP13 
     // stack:                      inB, f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                      g0 , f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     %scale_re_fp254_6
     // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
@@ -238,7 +238,7 @@ global mul_fp254_12_sparse:
     // stack: out, G1 * sh(f) + G2 * sh2(f') + g0 * f, inB, f', out, f, inB, f', inA, inB, out
     %store_fp254_6
     // stack:                                          inB, f', out, f, inB, f', inA, inB, out
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                                          g0 , f', out, f, inB, f', inA, inB, out
     %scale_re_fp254_6
     // stack:                                          g0 * f', out, f, inB, f', inA, inB, out
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 9071f396..72ca051b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -11,7 +11,8 @@
     // stack:        x
     PROVER_INPUT(ff::bn254_base::inverse)
     // stack: x^-1 , x
-    SWAP1  DUP2
+    SWAP1  
+    DUP2
     // stack: x^-1 , x, x^-1
     MULFP254
     // stack: x^-1 * x, x^-1
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
index e1f043f1..af074714 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm
@@ -1,10 +1,32 @@
 // Load a single value from bn254 pairings memory.
-%macro mload_bn254_pairings
+%macro mload_kernel_bn254_pairing
     // stack: offset
-    %mload_kernel(@SEGMENT_BN254_PAIRINGS)
+    %mload_kernel(@SEGMENT_KERNEL_BN_PAIRING)
     // stack: value
 %endmacro
 
+%macro mload_kernel_bn254_pairing(offset)
+    // stack:
+    PUSH $offset
+    // stack: offset
+    %mload_kernel(@SEGMENT_KERNEL_BN_PAIRING)
+    // stack: value
+%endmacro
+
+// Store a single value to bn254 pairings memory.
+%macro mstore_kernel_bn254_pairing
+    // stack: offset, value
+    %mstore_kernel(@SEGMENT_KERNEL_BN_PAIRING)
+    // stack:
+%endmacro
+
+%macro mstore_kernel_bn254_pairing(offset)
+    // stack: value
+    PUSH $offset
+    // stack: offset, value
+    %mstore_kernel(@SEGMENT_KERNEL_BN_PAIRING)
+    // stack:
+%endmacro
 
 // fp254_2 macros
 
@@ -13,11 +35,11 @@
     DUP1  
     %add_const(1)
     // stack: ind1, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:   x1, ptr
     SWAP1
     // stack: ind0, x1
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:   x0, x1
 %endmacro 
 
@@ -101,31 +123,31 @@
     DUP1  
     %add_const(4)
     // stack:                   ind4, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                     x4, ptr
     DUP2  
     %add_const(3)
     // stack:               ind3, x4, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                 x3, x4, ptr
     DUP3  
     %add_const(2)
     // stack:           ind2, x3, x4, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:             x2, x3, x4, ptr
     DUP4  
     %add_const(1)
     // stack:       ind1, x2, x3, x4, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x1, x2, x3, x4, ptr
     DUP5  
     %add_const(5)
     // stack:   ind5, x1, x2, x3, x4, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:     x5, x1, x2, x3, x4, ptr
     SWAP5
     // stack:   ind0, x1, x2, x3, x4, x5
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:     x0, x1, x2, x3, x4, x5
 %endmacro
 
@@ -135,31 +157,31 @@
     PUSH $ptr  
     %add_const(5)
     // stack:                     ind5
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                       x5
     PUSH $ptr  
     %add_const(4)
     // stack:                 ind4, x5
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                   x4, x5
     PUSH $ptr  
     %add_const(3)
     // stack:             ind3, x4, x5
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:               x3, x4, x5
     PUSH $ptr  
     %add_const(2)
     // stack:         ind2, x3, x4, x5
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:           x2, x3, x4, x5
     PUSH $ptr  
     %add_const(1)
     // stack:     ind1, x2, x3, x4, x5
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:       x1, x2, x3, x4, x5
     PUSH $ptr
     // stack: ind0, x1, x2, x3, x4, x5
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:   x0, x1, x2, x3, x4, x5
 %endmacro
 
@@ -171,30 +193,30 @@
     DUP6  
     %add_const(4)
     // stack: ind4, x4, x0, x1, x2, x3, ptr, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:           x0, x1, x2, x3, ptr, x5
     DUP5
     // stack:     ind0, x0, x1, x2, x3, ptr, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:               x1, x2, x3, ptr, x5
     DUP4  
     %add_const(1)
     // stack:         ind1, x1, x2, x3, ptr, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                   x2, x3, ptr, x5
     DUP3  
     %add_const(2)
     // stack:             ind2, x2, x3, ptr, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                       x3, ptr, x5
     DUP2  
     %add_const(3)
     // stack:                 ind3, x3, ptr, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                           ptr, x5
     %add_const(5)
     // stack:                          ind5, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:
 %endmacro
 
@@ -209,14 +231,14 @@
     DUP7  
     %add_const(5)
     // stack: ind5, 2*x5, x0, x1, x2, x3, x4, ptr
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:             x0, x1, x2, x3, x4, ptr
     PUSH 2  
     MULFP254
     // stack:           2*x0, x1, x2, x3, x4, ptr
     DUP6
     // stack:     ind0, 2*x0, x1, x2, x3, x4, ptr
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                 x1, x2, x3, x4, ptr
     PUSH 2  
     MULFP254
@@ -224,7 +246,7 @@
     DUP5  
     %add_const(1)
     // stack:         ind1, 2*x1, x2, x3, x4, ptr
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                     x2, x3, x4, ptr
     PUSH 2  
     MULFP254
@@ -232,7 +254,7 @@
     DUP4  
     %add_const(2)
     // stack:             ind2, 2*x2, x3, x4, ptr
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                         x3, x4, ptr
     PUSH 2 
     MULFP254
@@ -240,7 +262,7 @@
     DUP3  
     %add_const(3)
     // stack:                 ind3, 2*x3, x4, ptr
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                             x4, ptr
     PUSH 2  
     MULFP254
@@ -249,7 +271,7 @@
     // stack:                           ptr, 2*x4
     %add_const(4)
     // stack:                          ind4, 2*x4
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:
 %endmacro
 
@@ -258,32 +280,32 @@
     // stack:       x0, x1, x2, x3, x4, x5
     PUSH $ptr
     // stack: ind0, x0, x1, x2, x3, x4, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:           x1, x2, x3, x4, x5
     PUSH $ptr  
     %add_const(1)
     // stack:     ind1, x1, x2, x3, x4, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:               x2, x3, x4, x5
     PUSH $ptr  
     %add_const(2)
     // stack:         ind2, x2, x3, x4, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                   x3, x4, x5
     PUSH $ptr  
     %add_const(3)
     // stack:             ind3, x3, x4, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                       x4, x5
     PUSH $ptr  
     %add_const(4)
     // stack:                 ind4, x4, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                           x5
     PUSH $ptr  
     %add_const(5)
     // stack:                     ind5, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:
 %endmacro
 
@@ -293,33 +315,33 @@
     PUSH $ptr  
     %add_const(2)
     // stack: ind2, x0, x1, x2, x3, x4, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:           x1, x2, x3, x4, x5
     PUSH $ptr  
     %add_const(3)
     // stack:     ind3, x1, x2, x3, x4, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:               x2, x3, x4, x5
     PUSH $ptr  
     %add_const(4)
     // stack:         ind4, x2, x3, x4, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                   x3, x4, x5
     PUSH $ptr  
     %add_const(5)
     // stack:             ind5, x3, x4, x5
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                       x4, x5
     %i9
     // stack:                       y5, y4
     PUSH $ptr  
     %add_const(1)
     // stack:                 ind1, y5, y4
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                           y4
     PUSH $ptr
     // stack:                     ind0, y4
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:
 %endmacro
 
@@ -795,61 +817,61 @@
     DUP1  
     %add_const(10)
     // stack:                                                   ind10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                                                     x10, ptr
     DUP2  
     %add_const(9)
     // stack:                                              ind09, x10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                                                x09, x10, ptr
     DUP3  
     %add_const(8)
     // stack:                                         ind08, x09, x10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                                           x08, x09, x10, ptr
     DUP4  
     %add_const(7)
     // stack:                                    ind07, x08, x09, x10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                                      x07, x08, x09, x10, ptr
     DUP5  
     %add_const(6)
     // stack:                               ind06, x07, x08, x09, x10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                                 x06, x07, x08, x09, x10, ptr
     DUP6  
     %add_const(5)
     // stack:                          ind05, x06, x07, x08, x09, x10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                            x05, x06, x07, x08, x09, x10, ptr
     DUP7  
     %add_const(4)
     // stack:                     ind04, x05, x06, x07, x08, x09, x10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                       x04, x05, x06, x07, x08, x09, x10, ptr
     DUP8  
     %add_const(3)
     // stack:                ind03, x04, x05, x06, x07, x08, x09, x10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:                  x03, x04, x05, x06, x07, x08, x09, x10, ptr
     DUP9  
     %add_const(2)
     // stack:           ind02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:             x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
     DUP10  
     %add_const(1)
     // stack:      ind01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:        x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
     DUP11  
     %add_const(11)
     // stack: ind11, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:   x11, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, ptr
     SWAP11
     // stack: ind00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:   x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11
 %endmacro
 
@@ -860,60 +882,60 @@
     DUP12  
     %add_const(10)
     // stack: ind10, x10, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:             x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     DUP11
     // stack:      ind00, x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                  x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     DUP10  
     %add_const(01)
     // stack:           ind01, x01, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                       x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
     DUP9   
     %add_const(02)
     // stack:                ind02, x02, x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                            x03, x04, x05, x06, x07, x08, x09, ptr, x11
     DUP8   
     %add_const(03)
     // stack:                     ind03, x03, x04, x05, x06, x07, x08, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                                 x04, x05, x06, x07, x08, x09, ptr, x11
     DUP7   
     %add_const(04)
     // stack:                          ind04, x04, x05, x06, x07, x08, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                                      x05, x06, x07, x08, x09, ptr, x11
     DUP6   
     %add_const(05)
     // stack:                               ind05, x05, x06, x07, x08, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                                           x06, x07, x08, x09, ptr, x11
     DUP5   
     %add_const(06)
     // stack:                                    ind06, x06, x07, x08, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                                                x07, x08, x09, ptr, x11
     DUP4   
     %add_const(07)
     // stack:                                         ind07, x07, x08, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                                                     x08, x09, ptr, x11
     DUP3   
     %add_const(08)
     // stack:                                              ind08, x08, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                                                          x09, ptr, x11
     DUP2   
     %add_const(09)
     // stack:                                                   ind09, x09, ptr, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                                                               ptr, x11
     %add_const(11)
     // stack:                                                             ind11, x11
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:                                                            
 %endmacro
 
@@ -923,120 +945,120 @@
     // stack:              src, dest
     DUP1  
     // stack:       ind00, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x00, src, dest
     DUP3
     // stack: ind00', x00, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     DUP1  
     %add_const(1)
     // stack:       ind01, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x01, src, dest
     DUP3  
     %add_const(1)
     // stack: ind01', x01, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     DUP1  
     %add_const(2)
     // stack:       ind02, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x02, src, dest
     DUP3  
     %add_const(2)
     // stack: ind02', x02, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     DUP1  
     %add_const(3)
     // stack:       ind03, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x03, src, dest
     DUP3  
     %add_const(3)
     // stack: ind03', x03, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     DUP1  
     %add_const(4)
     // stack:       ind04, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x04, src, dest
     DUP3 
     %add_const(4)
     // stack: ind04', x04, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     DUP1  
     %add_const(5)
     // stack:       ind05, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x05, src, dest
     DUP3  
     %add_const(5)
     // stack: ind05', x05, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     DUP1  
     %add_const(6)
     // stack:       ind06, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x06, src, dest
     DUP3  
     %add_const(6)
     // stack: ind06', x06, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     DUP1  
     %add_const(7)
     // stack:       ind07, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x07, src, dest
     DUP3  
     %add_const(7)
     // stack: ind07', x07, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     DUP1  
     %add_const(8)
     // stack:       ind08, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x08, src, dest
     DUP3  
     %add_const(8)
     // stack: ind08', x08, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     DUP1 
     %add_const(9)
     // stack:       ind09, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x09, src, dest
     DUP3  
     %add_const(9)
     // stack: ind09', x09, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     DUP1  
     %add_const(10)
     // stack:       ind10, src, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:         x10, src, dest
     DUP3  
     %add_const(10)
     // stack: ind10', x10, src, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
     // stack:              src, dest
     %add_const(11)
     // stack:            ind11, dest
-    %mload_kernel_general
+    %mload_kernel_bn254_pairing
     // stack:              x11, dest
     DUP2  
     %add_const(11)
     // stack:      ind11', x11, dest
-    %mstore_kernel_general
+    %mstore_kernel_bn254_pairing
 %endmacro
 
 %macro assert_eq_unit_fp254_12
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 3b6734c3..6e618e9b 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -28,7 +28,7 @@ impl InterpreterSetup {
         for (pointer, data) in self.memory {
             for (i, term) in data.iter().enumerate() {
                 interpreter.generation_state.memory.set(
-                    MemoryAddress::new(0, Segment::KernelGeneral, pointer + i),
+                    MemoryAddress::new(0, Segment::BnPairing, pointer + i),
                     *term,
                 )
             }
@@ -43,7 +43,7 @@ fn extract_kernel_memory(range: Range<usize>, interpreter: Interpreter<'static>)
     for i in range {
         let term = interpreter.generation_state.memory.get(MemoryAddress::new(
             0,
-            Segment::KernelGeneral,
+            Segment::BnPairing,
             i,
         ));
         output.push(term);
diff --git a/evm/src/memory/segments.rs b/evm/src/memory/segments.rs
index 4ae1afa4..c6ded3dd 100644
--- a/evm/src/memory/segments.rs
+++ b/evm/src/memory/segments.rs
@@ -43,10 +43,11 @@ pub(crate) enum Segment {
     BnWnafA = 19,
     BnWnafB = 20,
     BnTableQ = 21,
+    BnPairing = 22,
 }
 
 impl Segment {
-    pub(crate) const COUNT: usize = 22;
+    pub(crate) const COUNT: usize = 23;
 
     pub(crate) fn all() -> [Self; Self::COUNT] {
         [
@@ -72,6 +73,7 @@ impl Segment {
             Self::BnWnafA,
             Self::BnWnafB,
             Self::BnTableQ,
+            Self::BnPairing,
         ]
     }
 
@@ -100,6 +102,8 @@ impl Segment {
             Segment::BnWnafA => "SEGMENT_KERNEL_BN_WNAF_A",
             Segment::BnWnafB => "SEGMENT_KERNEL_BN_WNAF_B",
             Segment::BnTableQ => "SEGMENT_KERNEL_BN_TABLE_Q",
+            Segment::BnPairing => "SEGMENT_KERNEL_BN_PAIRING",
+
         }
     }
 
@@ -128,6 +132,7 @@ impl Segment {
             Segment::BnWnafA => 8,
             Segment::BnWnafB => 8,
             Segment::BnTableQ => 256,
+            Segment::BnPairing => 256,
         }
     }
 }
diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs
index fdf554a9..f026abbd 100644
--- a/evm/src/witness/util.rs
+++ b/evm/src/witness/util.rs
@@ -39,11 +39,11 @@ pub(crate) fn stack_peek<F: Field>(state: &GenerationState<F>, i: usize) -> Opti
     )))
 }
 
-/// Peek at the kernel general item at address `i`
+/// Peek at the SEGMENT_KERNEL_BN_PAIRING item at address `i`
 pub(crate) fn kernel_general_peek<F: Field>(state: &GenerationState<F>, i: usize) -> U256 {
     state.memory.get(MemoryAddress::new(
         state.registers.context,
-        Segment::KernelGeneral,
+        Segment::BnPairing,
         i,
     ))
 }

From d320fbfbca4b1eef67773f8aa79c01ec11b4f73a Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 13 Feb 2023 13:21:47 -0800
Subject: [PATCH 189/201] update curve add with ops

---
 .../bn254/curve_arithmetic/curve_add.asm      | 190 +++++++-----------
 .../bn254/curve_arithmetic/tate_pairing.asm   |   1 +
 evm/src/cpu/kernel/tests/bn254.rs             |  10 +-
 evm/src/memory/segments.rs                    |   1 -
 4 files changed, 82 insertions(+), 120 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
index e85838ea..f562f7ad 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@@ -1,16 +1,6 @@
-// #define N 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 // BN254 base field order
-
 // BN254 elliptic curve addition.
 // Uses the standard affine addition formula.
 global bn_add:
-    // Uncomment for test inputs.
-    // PUSH 0xdeadbeef
-    // PUSH 2
-    // PUSH 1
-    // PUSH 0x1bf9384aa3f0b3ad763aee81940cacdde1af71617c06f46e11510f14f3d5d121
-    // PUSH 0xe7313274bb29566ff0c8220eb9841de1d96c2923c6a4028f7dd3c6a14cee770
-    // stack: x0, y0, x1, y1, retdest
-
     // Check if points are valid BN254 points.
     DUP2
     // stack: y0, x0, y0, x1, y1, retdest
@@ -46,7 +36,7 @@ global bn_add_valid_points:
     // stack: x0, y0, x0, y0, x1, y1, retdest
     %ec_isidentity
     // stack: (x0,y0)==(0,0), x0, y0, x1, y1, retdest
-    %jumpi(bn_add_first_zero)
+    %jumpi(bn_add_fst_zero)
     // stack: x0, y0, x1, y1, retdest
 
     // Check if the second point is the identity.
@@ -75,13 +65,13 @@ global bn_add_valid_points:
     // stack: y1, x0, y0, x1, y1, retdest
     DUP3
     // stack: y0, y1, x0, y0, x1, y1, retdest
-    %submod
+    SUBFP254
     // stack: y0 - y1, x0, y0, x1, y1, retdest
     DUP4
     // stack: x1, y0 - y1, x0, y0, x1, y1, retdest
     DUP3
     // stack: x0, x1, y0 - y1, x0, y0, x1, y1, retdest
-    %submod
+    SUBFP254
     // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest
     %divr_fp254
     // stack: lambda, x0, y0, x1, y1, retdest
@@ -89,7 +79,7 @@ global bn_add_valid_points:
 
 // BN254 elliptic curve addition.
 // Assumption: (x0,y0) == (0,0)
-bn_add_first_zero:
+bn_add_fst_zero:
     // stack: x0, y0, x1, y1, retdest
     // Just return (x1,y1)
     %stack (x0, y0, x1, y1, retdest) -> (retdest, x1, y1)
@@ -114,37 +104,33 @@ bn_add_valid_points_with_lambda:
     // stack: x0, lambda, x0, y0, x1, y1, retdest
     DUP5
     // stack: x1, x0, lambda, x0, y0, x1, y1, retdest
-    %bn_base
-    // stack: N, x1, x0, lambda, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest
+    DUP3
+    // stack: lambda, x1, x0, lambda, x0, y0, x1, y1, retdest
     DUP1
-    // stack: lambda, lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest
-    MULMOD
+    // stack: lambda, lambda, x1, x0, lambda, x0, y0, x1, y1, retdest
+    MULFP254
     // stack: lambda^2, x1, x0, lambda, x0, y0, x1, y1, retdest
-    %submod
+    SUBFP254
     // stack: lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest
-    %submod
+    SUBFP254
     // stack: x2, lambda, x0, y0, x1, y1, retdest
 
     // Compute y2 = lambda*(x1 - x2) - y1
-    %bn_base
-    // stack: N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP2
-    // stack: x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP7
-    // stack: x1, x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    %submod
-    // stack: x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    DUP4
-    // stack: lambda, x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest
-    MULMOD
+    DUP1
+    // stack: x2, x2, lambda, x0, y0, x1, y1, retdest
+    DUP6
+    // stack: x1, x2, x2, lambda, x0, y0, x1, y1, retdest
+    SUBFP254
+    // stack: x1 - x2, x2, lambda, x0, y0, x1, y1, retdest
+    DUP3
+    // stack: lambda, x1 - x2, x2, lambda, x0, y0, x1, y1, retdest
+    MULFP254
     // stack: lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
     DUP7
     // stack: y1, lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest
     SWAP1
     // stack: lambda * (x1 - x2), y1, x2, lambda, x0, y0, x1, y1, retdest
-    %submod
+    SUBFP254
     // stack: y2, x2, lambda, x0, y0, x1, y1, retdest
 
     // Return x2,y2
@@ -185,19 +171,15 @@ bn_add_equal_points:
     // stack: x0, y0, x1, y1, retdest
 
     // Compute lambda = 3/2 * x0^2 / y0
-    %bn_base
-    // stack: N, x0, y0, x1, y1, retdest
-    %bn_base
-    // stack: N, N, x0, y0, x1, y1, retdest
-    DUP3
-    // stack: x0, N, N, x0, y0, x1, y1, retdest
     DUP1
-    // stack: x0, x0, N, N, x0, y0, x1, y1, retdest
-    MULMOD
-    // stack: x0^2, N, x0, y0, x1, y1, retdest with
+    // stack: x0, x0, y0, x1, y1, retdest
+    DUP1
+    // stack: x0, x0, x0, y0, x1, y1, retdest
+    MULFP254
+    // stack: x0^2, x0, y0, x1, y1, retdest with
     PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5 // 3/2 in the base field
-    // stack: 3/2, x0^2, N, x0, y0, x1, y1, retdest
-    MULMOD
+    // stack: 3/2, x0^2, x0, y0, x1, y1, retdest
+    MULFP254
     // stack: 3/2 * x0^2, x0, y0, x1, y1, retdest
     DUP3
     // stack: y0, 3/2 * x0^2, x0, y0, x1, y1, retdest
@@ -217,79 +199,59 @@ global bn_double:
     // stack: x, y, x, y, retdest
     %jump(bn_add_equal_points)
 
-// Push the order of the BN254 base field.
-%macro bn_base
-    PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47
-%endmacro
-
-// Assumption: x, y < N and 2N < 2^256.
-// Note: Doesn't hold for Secp256k1 base field.
-%macro submod
-    // stack: x, y
-    %bn_base
-    // stack: N, x, y
-    ADD
-    // stack: N + x, y // Doesn't overflow since 2N < 2^256
-    SUB
-    // stack: N + x - y // Doesn't underflow since y < N
-    %bn_base
-    // stack: N, N + x - y
-    SWAP1
-    // stack: N + x - y, N
-    MOD
-    // stack: (N + x - y) % N = (x-y) % N
-%endmacro
-
 // Check if (x,y) is a valid curve point.
-// Puts y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) on top of the stack.
+// Returns range & curve || is_identity
+// where
+//     range = (x < N) & (y < N) 
+//     curve = y^2 == (x^3 + 3) 
+//     ident = (x,y) == (0,0)
 %macro bn_check
-    // stack: x, y
-    %bn_base
-    // stack: N, x, y
-    DUP2
-    // stack: x, N, x, y
-    LT
-    // stack: x < N, x, y
-    %bn_base
-    // stack: N, x < N, x, y
-    DUP4
-    // stack: y, N, x < N, x, y
-    LT
-    // stack: y < N, x < N, x, y
-    AND
-    // stack: (y < N) & (x < N), x, y
-    %stack (b, x, y) -> (x, x, @BN_BASE, x, @BN_BASE, @BN_BASE, x, y, b)
-    // stack: x, x, N, x, N, N, x, y, b
-    MULMOD
-    // stack: x^2 % N, x, N, N, x, y, b
-    MULMOD
-    // stack: x^3 % N, N, x, y, b
-    PUSH 3
-    // stack: 3, x^3 % N, N, x, y, b
-    ADDMOD
-    // stack: (x^3 + 3) % N, x, y, b
-    DUP3
-    // stack: y, (x^3 + 3) % N, x, y, b
-    %bn_base
-    // stack: N, y, (x^3 + 3) % N, x, y, b
-    SWAP1
-    // stack: y, N, (x^3 + 3) % N, x, y, b
+    // stack:                       x, y
     DUP1
-    // stack: y, y, N, (x^3 + 3) % N, x, y, b
-    MULMOD
-    // stack: y^2 % N, (x^3 + 3) % N, x, y, b
-    EQ
-    // stack: y^2 % N == (x^3 + 3) % N, x, y, b
+    // stack:                    x, x, y
+    PUSH @BN_BASE
+    // stack:                N , x, x, y
+    DUP1
+    // stack:             N, N , x, x, y
+    DUP5
+    // stack:         y , N, N , x, x, y
+    LT  
+    // stack:         y < N, N , x, x, y
     SWAP2
-    // stack: y, x, y^2 % N == (x^3 + 3) % N, b
-    %ec_isidentity
-    // stack: (x,y)==(0,0), y^2 % N == (x^3 + 3) % N, b
-    SWAP2
-    // stack: b, y^2 % N == (x^3 + 3) % N, (x,y)==(0,0)
+    // stack:         x , N, y < N, x, y
+    LT
+    // stack:         x < N, y < N, x, y
     AND
-    // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N), (x,y)==(0,0)
+    // stack:                range, x, y
+    SWAP2
+    // stack:                y, x, range
+    DUP2 
+    // stack:           x  , y, x, range
+    DUP1 
+    DUP1
+    MULFP254
+    MULFP254
+    // stack:           x^3, y, x, range
+    PUSH 3
+    ADDFP254
+    // stack:       3 + x^3, y, x, range
+    DUP2
+    // stack:  y  , 3 + x^3, y, x, range
+    DUP1
+    MULFP254
+    // stack:  y^2, 3 + x^3, y, x, range
+    EQ
+    // stack:         curve, y, x, range
+    SWAP2
+    // stack:         x, y, curve, range
+    %ec_isidentity
+    // stack:       ident , curve, range
+    SWAP2
+    // stack:       range , curve, ident
+    AND
+    // stack:       range & curve, ident
     OR
-    // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0)
+    // stack:                   is_valid
 %endmacro
 
 // Return (u256::MAX, u256::MAX) which is used to indicate the input was invalid.
@@ -297,9 +259,9 @@ global bn_double:
     // stack: retdest
     PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
     // stack: u256::MAX, retdest
-    PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+    DUP1
     // stack: u256::MAX, u256::MAX, retdest
     SWAP2
     // stack: retdest, u256::MAX, u256::MAX
     JUMP
-%endmacro
\ No newline at end of file
+%endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index 375f9bc4..a4cebd72 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -33,6 +33,7 @@ global bn254_tate:
 /// def miller_zero():
 ///     0xnm -= 1
 ///     mul_tangent()
+
 global bn254_miller:
     // stack:         ptr, out, retdest
     %stack (ptr, out) -> (out, 1, ptr, out)
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 6e618e9b..a4c81ab2 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -41,11 +41,11 @@ impl InterpreterSetup {
 fn extract_kernel_memory(range: Range<usize>, interpreter: Interpreter<'static>) -> Vec<U256> {
     let mut output: Vec<U256> = vec![];
     for i in range {
-        let term = interpreter.generation_state.memory.get(MemoryAddress::new(
-            0,
-            Segment::BnPairing,
-            i,
-        ));
+        let term =
+            interpreter
+                .generation_state
+                .memory
+                .get(MemoryAddress::new(0, Segment::BnPairing, i));
         output.push(term);
     }
     output
diff --git a/evm/src/memory/segments.rs b/evm/src/memory/segments.rs
index c6ded3dd..6793089f 100644
--- a/evm/src/memory/segments.rs
+++ b/evm/src/memory/segments.rs
@@ -103,7 +103,6 @@ impl Segment {
             Segment::BnWnafB => "SEGMENT_KERNEL_BN_WNAF_B",
             Segment::BnTableQ => "SEGMENT_KERNEL_BN_TABLE_Q",
             Segment::BnPairing => "SEGMENT_KERNEL_BN_PAIRING",
-
         }
     }
 

From 80e49caae1ec82a76ae197b56b2c7422a2551ebd Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 13 Feb 2023 14:04:43 -0800
Subject: [PATCH 190/201] segment virts

---
 .../curve_arithmetic/invariant_exponent.asm   | 236 +++++++++---------
 .../bn254/curve_arithmetic/tate_pairing.asm   |  60 ++---
 .../bn254/field_arithmetic/degree_12_mul.asm  |  76 +++---
 .../curve/bn254/field_arithmetic/inverse.asm  |   6 +-
 evm/src/cpu/kernel/tests/bn254.rs             |  18 +-
 5 files changed, 198 insertions(+), 198 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
index c74db1af..9d72767d 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
@@ -13,15 +13,15 @@ global bn254_invariant_exponent:
 ///     def first_exp(y):
 ///         return y.frob(6) / y
     // stack:                      out, retdest  {out: y}
-    %stack (out) -> (out, 100, first_exp, out)         
-    // stack: out, 100, first_exp, out, retdest  {out: y}
+    %stack (out) -> (out, 0, first_exp, out)         
+    // stack: out, 0, first_exp, out, retdest  {out: y}
     %jump(inv_fp254_12)
 first_exp:
-    // stack:                             out, retdest  {out: y  , 100: y^-1}
+    // stack:                             out, retdest  {out: y  , 0: y^-1}
     %frob_fp254_12_6
-    // stack:                             out, retdest  {out: y_6, 100: y^-1}
-    %stack (out) -> (out, 100, out, second_exp, out)
-    // stack:  out, 100, out, second_exp, out, retdest  {out: y_6, 100: y^-1}
+    // stack:                             out, retdest  {out: y_6, 0: y^-1}
+    %stack (out) -> (out, 0, out, second_exp, out)
+    // stack:  out, 0, out, second_exp, out, retdest  {out: y_6, 0: y^-1}
     %jump(mul_fp254_12)
 
 /// second, exponentiate by (p^2 + 1) via 
@@ -29,10 +29,10 @@ first_exp:
 ///         return y.frob(2) * y
 second_exp:
     // stack:                                out, retdest  {out: y}
-    %stack (out) -> (out, 100, out, out, final_exp, out)
-    // stack: out, 100, out, out, final_exp, out, retdest  {out: y}
+    %stack (out) -> (out, 0, out, out, final_exp, out)
+    // stack: out, 0, out, out, final_exp, out, retdest  {out: y}
     %frob_fp254_12_2_
-    // stack:      100, out, out, final_exp, out, retdest  {out: y, 100: y_2}
+    // stack:      0, out, out, final_exp, out, retdest  {out: y, 0: y_2}
     %jump(mul_fp254_12)
 
 /// Finally, we must exponentiate by (p^4 - p^2 + 1)/N
@@ -52,19 +52,19 @@ second_exp:
 ///     final_power()
 
 final_exp:
-    // stack:                  val, retdest
-    %stack (val) -> (val, 300, val)
-    // stack:        val, 300, val, retdest
+    // stack:                 val, retdest
+    %stack (val) -> (val, 60, val)
+    // stack:        val, 60, val, retdest
     %move_fp254_12
-    // stack:             300, val, retdest
+    // stack:             60, val, retdest
     %stack () -> (1, 1, 1)
-    // stack:    1, 1, 1, 300, val, retdest
-    %mstore_kernel_bn254_pairing(200)  
-    %mstore_kernel_bn254_pairing(224)  
-    %mstore_kernel_bn254_pairing(212)
-    // stack:             300, val, retdest  {200: y0, 212: y2, 224: y4}
+    // stack:    1, 1, 1, 60, val, retdest
+    %mstore_kernel_bn254_pairing(12)  
+    %mstore_kernel_bn254_pairing(36)  
+    %mstore_kernel_bn254_pairing(24)
+    // stack:             60, val, retdest  {12: y0, 24: y2, 36: y4}
     %stack () -> (64, 62, 65)
-    // stack: 64, 62, 65, 300, val, retdest  {200: y0, 212: y2, 224: y4}
+    // stack: 64, 62, 65, 60, val, retdest  {12: y0, 24: y2, 36: y4}
     %jump(power_loop_4)
 
 /// After computing the powers 
@@ -87,49 +87,49 @@ final_exp:
 ///     return y * y2 * y1 * y0
 
 custom_powers:
-    // stack:                             val, retdest  {200: y0, 212: y2, 224: y4}
-    %stack () -> (200, 236, make_term_1)
-    // stack:      200, 236, make_term_1, val, retdest  {200: y0, 212: y2, 224: y4}
+    // stack:                          val, retdest  {12: y0, 24: y2, 36: y4}
+    %stack () -> (12, 48, make_term_1)
+    // stack:     12, 48, make_term_1, val, retdest  {12: y0, 24: y2, 36: y4}
     %jump(inv_fp254_12)
 make_term_1:
-    // stack:                             val, retdest  {212: y2, 224: y4, 236: y0^-1}
-    %stack () -> (212, 224, 224, make_term_2)
-    // stack: 212, 224, 224, make_term_2, val, retdest  {212: y2, 224: y4, 236: y0^-1}
+    // stack:                          val, retdest  {24: y2, 36: y4, 48: y0^-1}
+    %stack () -> (24, 36, 36, make_term_2)
+    // stack: 24, 36, 36, make_term_2, val, retdest  {24: y2, 36: y4, 48: y0^-1}
     %jump(mul_fp254_12)
 make_term_2:
-    // stack:                             val, retdest  {212: y2, 224: y4 * y2, 236: y0^-1}
-    %stack () -> (212, 224, 224, make_term_3)
-    // stack: 212, 224, 224, make_term_3, val, retdest  {212: y2, 224: y4 * y2, 236: y0^-1}
+    // stack:                          val, retdest  {24: y2, 36: y4 * y2, 48: y0^-1}
+    %stack () -> (24, 36, 36, make_term_3)
+    // stack: 24, 36, 36, make_term_3, val, retdest  {24: y2, 36: y4 * y2, 48: y0^-1}
     %jump(mul_fp254_12)
 make_term_3:
-    // stack:                             val, retdest  {212: y2, 224: y4 * y2^2, 236: y0^-1}
-    %stack () -> (236, 224, 224, final_power)
-    // stack: 236, 224, 224, final_power, val, retdest  {212: y2, 224: y4 * y2^2, 236: y0^-1}
+    // stack:                          val, retdest  {24: y2, 36: y4 * y2^2, 48: y0^-1}
+    %stack () -> (48, 36, 36, final_power)
+    // stack: 48, 36, 36, final_power, val, retdest  {24: y2, 36: y4 * y2^2, 48: y0^-1}
     %jump(mul_fp254_12)
 final_power:
-    // stack:                            val, retdest  {val: y  , 212:  y^a2   , 224:  y^a1   , 236: y^a0}
+    // stack:                           val, retdest  {val: y  , 24:  y^a2   , 36:  y^a1   , 48: y^a0}
     %frob_fp254_12_3
-    // stack:                            val, retdest  {val: y_3, 212:  y^a2   , 224:  y^a1   , 236: y^a0}
-    %stack () -> (212, 212)
+    // stack:                           val, retdest  {val: y_3, 24:  y^a2   , 36:  y^a1   , 48: y^a0}
+    %stack () -> (24, 24)
     %frob_fp254_12_2_
     POP
-    // stack:                            val, retdest  {val: y_3, 212: (y^a2)_2, 224:  y^a1   , 236: y^a0}
-    PUSH 224
+    // stack:                           val, retdest  {val: y_3, 24: (y^a2)_2, 36:  y^a1   , 48: y^a0}
+    PUSH 36
     %frob_fp254_12_1
     POP
-    // stack:                            val, retdest  {val: y_3, 212: (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
-    %stack (val) -> (212, val, val, penult_mul, val)
-    // stack: 212, val, val, penult_mul, val, retdest  {val: y_3, 212: (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
+    // stack:                           val, retdest  {val: y_3, 24: (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
+    %stack (val) -> (24, val, val, penult_mul, val)
+    // stack: 24, val, val, penult_mul, val, retdest  {val: y_3, 24: (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
     %jump(mul_fp254_12)
 penult_mul:
-    // stack:                            val, retdest  {val: y_3 * (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
-    %stack (val) -> (224, val, val, final_mul, val)
-    // stack:  224, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 224: (y^a1)_1, 236: y^a0}
+    // stack:                          val, retdest  {val: y_3 * (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
+    %stack (val) -> (36, val, val, final_mul, val)
+    // stack: 36, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
     %jump(mul_fp254_12)
 final_mul: 
-    // stack:                            val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 236: y^a0}
-    %stack (val) -> (236, val, val)
-    // stack:                  236, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 236: y^a0}
+    // stack:                          val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 48: y^a0}
+    %stack (val) -> (48, val, val)
+    // stack:                 48, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 48: y^a0}
     %jump(mul_fp254_12)
 
 
@@ -164,156 +164,156 @@ final_mul:
 ///     y0 *= acc
 
 power_loop_4:
-    // stack:                                     i  , j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                     i  , j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP1  
     ISZERO
-    // stack:                             break?, i  , j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                             break?, i  , j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jumpi(power_loop_4_end)
-    // stack:                                     i  , j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                     i  , j, k, sqr  {12: y0, 24: y2, 36: y4}
     %sub_const(1)
-    // stack:                                     i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                     i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP1  
     %mload_kernel_code(power_data_4)
-    // stack:                                abc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                abc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP1  
     %lt_const(100)
-    // stack:                         skip?, abc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                         skip?, abc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jumpi(power_loop_4_b)
-    // stack:                                abc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                abc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %sub_const(100)
-    // stack:                                 bc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %stack () -> (224, 224, power_loop_4_b)
-    // stack:      224, 224, power_loop_4_b,  bc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                 bc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    %stack () -> (36, 36, power_loop_4_b)
+    // stack:      36, 36, power_loop_4_b,  bc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP8
-    // stack: sqr, 224, 224, power_loop_4_b,  bc, i-1, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 36, 36, power_loop_4_b,  bc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jump(mul_fp254_12)
 power_loop_4_b:
-    // stack:                               bc, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                               bc, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP1  
     %lt_const(10)
-    // stack:                        skip?, bc, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                        skip?, bc, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jumpi(power_loop_4_c)
-    // stack:                               bc, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                               bc, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %sub_const(10)
-    // stack:                                c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %stack () -> (212, 212, power_loop_4_c)
-    // stack:      212, 212, power_loop_4_c, c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                c, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    %stack () -> (24, 24, power_loop_4_c)
+    // stack:      24, 24, power_loop_4_c, c, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP8
-    // stack: sqr, 212, 212, power_loop_4_c, c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 24, 24, power_loop_4_c, c, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jump(mul_fp254_12)
 power_loop_4_c:
-    // stack:                              c, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                              c, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     ISZERO
-    // stack:                          skip?, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                          skip?, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jumpi(power_loop_4_sq)
-    // stack:                                 i, j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %stack () -> (200, 200, power_loop_4_sq)
-    // stack:      200, 200, power_loop_4_sq, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                 i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    %stack () -> (12, 12, power_loop_4_sq)
+    // stack:      12, 12, power_loop_4_sq, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP7
-    // stack: sqr, 200, 200, power_loop_4_sq, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 12, 12, power_loop_4_sq, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jump(mul_fp254_12)
 power_loop_4_sq:
-    // stack:                         i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                         i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     PUSH power_loop_4  
-    // stack:           power_loop_4, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:           power_loop_4, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP5  
     DUP1
-    // stack: sqr, sqr, power_loop_4, i, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, sqr, power_loop_4, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jump(square_fp254_12)
 power_loop_4_end:
-    // stack:                           0, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                           0, j, k, sqr  {12: y0, 24: y2, 36: y4}
     POP  
-    // stack:                              j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %stack () -> (224, 224, power_loop_2) 
-    // stack:      224, 224, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                              j, k, sqr  {12: y0, 24: y2, 36: y4}
+    %stack () -> (36, 36, power_loop_2) 
+    // stack:      36, 36, power_loop_2, j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP6
-    // stack: sqr, 224, 224, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 36, 36, power_loop_2, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jump(mul_fp254_12)
 
 power_loop_2:
-    // stack:                                   j  , k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                   j  , k, sqr  {12: y0, 24: y2, 36: y4}
     DUP1  
     ISZERO
-    // stack:                           break?, j  , k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                           break?, j  , k, sqr  {12: y0, 24: y2, 36: y4}
     %jumpi(power_loop_2_end)
-    // stack:                                   j  , k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                   j  , k, sqr  {12: y0, 24: y2, 36: y4}
     %sub_const(1)
-    // stack:                                   j-1, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                   j-1, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP1  
     %mload_kernel_code(power_data_2)
-    // stack:                               ab, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                               ab, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP1  
     %lt_const(10)
-    // stack:                        skip?, ab, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                        skip?, ab, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
     %jumpi(power_loop_2_b)
-    // stack:                               ab, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                               ab, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
     %sub_const(10)
-    // stack:                                b, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
-    %stack () -> (212, 212, power_loop_2_b) 
-    // stack:      212, 212, power_loop_2_b, b, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                b, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
+    %stack () -> (24, 24, power_loop_2_b) 
+    // stack:      24, 24, power_loop_2_b, b, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP7
-    // stack: sqr, 212, 212, power_loop_2_b, b, j-1, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 24, 24, power_loop_2_b, b, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
     %jump(mul_fp254_12)
 power_loop_2_b:
-    // stack:                              b, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                              b, j, k, sqr  {12: y0, 24: y2, 36: y4}
     ISZERO
-    // stack:                          skip?, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                          skip?, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jumpi(power_loop_2_sq)
-    // stack:                                 j, k, sqr  {200: y0, 212: y2, 224: y4}
-    %stack () -> (200, 200, power_loop_2_sq) 
-    // stack:      200, 200, power_loop_2_sq, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                 j, k, sqr  {12: y0, 24: y2, 36: y4}
+    %stack () -> (12, 12, power_loop_2_sq) 
+    // stack:      12, 12, power_loop_2_sq, j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP6
-    // stack: sqr, 200, 200, power_loop_2_sq, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 12, 12, power_loop_2_sq, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jump(mul_fp254_12)
 power_loop_2_sq:
-    // stack:                         j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                         j, k, sqr  {12: y0, 24: y2, 36: y4}
     PUSH power_loop_2  
-    // stack:           power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:           power_loop_2, j, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP4  
     DUP1
-    // stack: sqr, sqr, power_loop_2, j, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, sqr, power_loop_2, j, k, sqr  {12: y0, 24: y2, 36: y4}
     %jump(square_fp254_12)
 power_loop_2_end:
-    // stack:                           0, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                           0, k, sqr  {12: y0, 24: y2, 36: y4}
     POP  
-    // stack:                              k, sqr  {200: y0, 212: y2, 224: y4}
-    %stack () -> (212, 212, power_loop_0)
-    // stack:      212, 212, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                              k, sqr  {12: y0, 24: y2, 36: y4}
+    %stack () -> (24, 24, power_loop_0)
+    // stack:      24, 24, power_loop_0, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP5
-    // stack: sqr, 212, 212, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 24, 24, power_loop_0, k, sqr  {12: y0, 24: y2, 36: y4}
     %jump(mul_fp254_12)
 
 power_loop_0:
-    // stack:                                 k  , sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                 k  , sqr  {12: y0, 24: y2, 36: y4}
     DUP1  
     ISZERO
-    // stack:                         break?, k  , sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                         break?, k  , sqr  {12: y0, 24: y2, 36: y4}
     %jumpi(power_loop_0_end)
-    // stack:                                 k  , sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                 k  , sqr  {12: y0, 24: y2, 36: y4}
     %sub_const(1)
-    // stack:                                 k-1, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                 k-1, sqr  {12: y0, 24: y2, 36: y4}
     DUP1  
     %mload_kernel_code(power_data_0)
-    // stack:                              a, k-1, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                              a, k-1, sqr  {12: y0, 24: y2, 36: y4}
     ISZERO
-    // stack:                          skip?, k-1, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                          skip?, k-1, sqr  {12: y0, 24: y2, 36: y4}
     %jumpi(power_loop_0_sq)
-    // stack:                                 k-1, sqr  {200: y0, 212: y2, 224: y4}
-    %stack () -> (200, 200, power_loop_0_sq)  
-    // stack:      200, 200, power_loop_0_sq, k-1, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                                 k-1, sqr  {12: y0, 24: y2, 36: y4}
+    %stack () -> (12, 12, power_loop_0_sq)  
+    // stack:      12, 12, power_loop_0_sq, k-1, sqr  {12: y0, 24: y2, 36: y4}
     DUP5
-    // stack: sqr, 200, 200, power_loop_0_sq, k-1, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, 12, 12, power_loop_0_sq, k-1, sqr  {12: y0, 24: y2, 36: y4}
     %jump(mul_fp254_12)
 power_loop_0_sq:
-    // stack:                         k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:                         k, sqr  {12: y0, 24: y2, 36: y4}
     PUSH power_loop_0  
-    // stack:           power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack:           power_loop_0, k, sqr  {12: y0, 24: y2, 36: y4}
     DUP3  
     DUP1
-    // stack: sqr, sqr, power_loop_0, k, sqr  {200: y0, 212: y2, 224: y4}
+    // stack: sqr, sqr, power_loop_0, k, sqr  {12: y0, 24: y2, 36: y4}
     %jump(square_fp254_12)
 power_loop_0_end:
-    // stack:                         0, sqr  {200: y0, 212: y2, 224: y4}
-    %stack (i, sqr) -> (200, sqr, 200, custom_powers)
-    // stack:   200, sqr, 200, custom_powers  {200: y0, 212: y2, 224: y4}
+    // stack:                         0, sqr  {12: y0, 24: y2, 36: y4}
+    %stack (i, sqr) -> (12, sqr, 12, custom_powers)
+    // stack:   12, sqr, 12, custom_powers  {12: y0, 24: y2, 36: y4}
     %jump(mul_fp254_12)    
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
index a4cebd72..f09684bd 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm
@@ -109,35 +109,35 @@ mul_tangent:
     // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %jump(square_fp254_12)
 mul_tangent_1:
-    // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    // stack:         out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     DUP13
     DUP13
     DUP13
     DUP13
-    // stack:        Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    // stack:      Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     DUP11  
     DUP11
-    // stack:     O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
+    // stack:   O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %tangent
-    // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
-    %stack (out) -> (out, 100, out)
-    // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
+    // stack:         out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {0: line}
+    %stack (out) -> (out, 0, out)
+    // stack: out, 0, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {0: line}
     %jump(mul_fp254_12_sparse)
 mul_tangent_2:
-    // stack:                  retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    // stack:                  retdest, 0xnm, times,   O, P, Q, out  {0: line}
     PUSH after_double
-    // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {0: line}
     DUP6  
     DUP6
-    // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {0: line}
     %jump(bn_double)
 after_double:
-    // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
+    // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {0: line}
     SWAP5
     POP
     SWAP5
     POP
-    // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
+    // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {0: line}
     JUMP
 
 /// def mul_cord()
@@ -146,26 +146,26 @@ after_double:
 ///     O += P
 
 mul_cord:
-    // stack:                            0xnm, times, O, P, Q, out
+    // stack:                          0xnm, times, O, P, Q, out
     PUSH mul_cord_1
-    // stack:                mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack:              mul_cord_1, 0xnm, times, O, P, Q, out
     DUP11  
     DUP11  
     DUP11  
     DUP11
-    // stack:             Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack:           Q, mul_cord_1, 0xnm, times, O, P, Q, out
     DUP9  
     DUP9
-    // stack:          O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack:        O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
     DUP13  
     DUP13
-    // stack:       P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
+    // stack:     P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
     %cord 
-    // stack:                mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
+    // stack:              mul_cord_1, 0xnm, times, O, P, Q, out  {0: line}
     DUP12
-    // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
-    %stack (out) -> (out, 100, out)
-    // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
+    // stack:         out, mul_cord_1, 0xnm, times, O, P, Q, out  {0: line}
+    %stack (out) -> (out, 0, out)
+    // stack: out, 0, out, mul_cord_1, 0xnm, times, O, P, Q, out  {0: line}
     %jump(mul_fp254_12_sparse)
 mul_cord_1:
     // stack:                   0xnm, times, O  , P, Q, out
@@ -202,7 +202,7 @@ after_add:
     // stack:      py^2 , 9, px, py, qx, qx_,  qy, qy_
     SUBFP254
     // stack:      py^2 - 9, px, py, qx, qx_,  qy, qy_
-    %mstore_kernel_bn254_pairing(100)
+    %mstore_kernel_bn254_pairing(0)
     // stack:                px, py, qx, qx_,  qy, qy_
     DUP1  
     MULFP254
@@ -218,7 +218,7 @@ after_add:
     DUP3  
     MULFP254
     // stack:   (-3*px^2)qx, py, -3px^2, qx_,  qy, qy_ 
-    %mstore_kernel_bn254_pairing(102)
+    %mstore_kernel_bn254_pairing(2)
     // stack:                py, -3px^2, qx_,  qy, qy_ 
     PUSH 2  
     MULFP254
@@ -228,15 +228,15 @@ after_add:
     DUP4  
     MULFP254
     // stack:           (2py)qy, -3px^2, qx_, 2py, qy_ 
-    %mstore_kernel_bn254_pairing(108)
+    %mstore_kernel_bn254_pairing(8)
     // stack:                    -3px^2, qx_, 2py, qy_ 
     MULFP254
     // stack:                   (-3px^2)*qx_, 2py, qy_ 
-    %mstore_kernel_bn254_pairing(103)
+    %mstore_kernel_bn254_pairing(3)
     // stack:                                 2py, qy_ 
     MULFP254
     // stack:                                (2py)*qy_ 
-    %mstore_kernel_bn254_pairing(109)
+    %mstore_kernel_bn254_pairing(9)
 %endmacro
 
 /// def cord(p1x, p1y, p2x, p2y, qx, qy):
@@ -258,7 +258,7 @@ after_add:
     // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     SUBFP254
     // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    %mstore_kernel_bn254_pairing(100)
+    %mstore_kernel_bn254_pairing(0)
     // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     SWAP3
     // stack:                    p2y , p1y, p2x , p1x, qx, qx_, qy, qy_
@@ -273,20 +273,20 @@ after_add:
     DUP5
     MULFP254
     // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
-    %mstore_kernel_bn254_pairing(108)
+    %mstore_kernel_bn254_pairing(8)
     // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
     SWAP1
     // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
     DUP2
     MULFP254
     // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
-    %mstore_kernel_bn254_pairing(102)
+    %mstore_kernel_bn254_pairing(2)
     // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
     MULFP254
     // stack:                            (p2y - p1y)qx_, p1x - p2x, qy_
-    %mstore_kernel_bn254_pairing(103)
+    %mstore_kernel_bn254_pairing(3)
     // stack:                                            p1x - p2x, qy_
     MULFP254
     // stack:                                           (p1x - p2x)*qy_
-    %mstore_kernel_bn254_pairing(109)
+    %mstore_kernel_bn254_pairing(9)
 %endmacro
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
index 9b3e67cf..5fd47e80 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm
@@ -66,73 +66,73 @@ mul_fp254_12_1:
     // stack:                   f'g', g'  , f', inA, inB, out 
     %dup_fp254_6_0
     // stack:             f'g', f'g', g'  , f', inA, inB, out 
-    %store_fp254_6_sh(0)                                    
-    // stack:                   f'g', g'  , f', inA, inB, out  {0: sh(f'g')}
-    %store_fp254_6(6)
-    // stack:                         g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    %store_fp254_6_sh(84)                                    
+    // stack:                   f'g', g'  , f', inA, inB, out  {84: sh(f'g')}
+    %store_fp254_6(90)
+    // stack:                         g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     DUP13
-    // stack:                    inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:                    inA, g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     DUP15  
-    // stack:               inB, inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:               inB, inA, g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     %load_fp254_6
-    // stack:                g , inA, g'  , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:                g , inA, g'  , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     %stack (f: 6, x, g: 6) -> (g, x, f)
-    // stack:                g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:                g', inA, g   , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     %dup_fp254_6_7
-    // stack:              g,g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:              g,g', inA, g   , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     %add_fp254_6
-    // stack:              g+g', inA, g   , f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:              g+g', inA, g   , f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     %stack (f: 6, x, g: 6) -> (g, x, f)
-    // stack:                 g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:                 g, inA, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     PUSH mul_fp254_12_2
-    // stack: mul_fp254_12_2, g, inA, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack: mul_fp254_12_2, g, inA, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     SWAP7
-    // stack: inA, g, mul_fp254_12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack: inA, g, mul_fp254_12_2, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     %load_fp254_6
-    // stack:   f, g, mul_fp254_12_2, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
+    // stack:   f, g, mul_fp254_12_2, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
     %jump(mul_fp254_6)
 mul_fp254_12_2:    
-    // stack:                     fg, g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g'}
-    %store_fp254_6(12)
-    // stack:                         g+g', f', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                     fg, g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g'}
+    %store_fp254_6(96)
+    // stack:                         g+g', f', inA, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %stack (x: 6, y: 6) -> (y, x)
-    // stack:                         f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                         f', g+g', inA, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     PUSH mul_fp254_12_3
-    // stack:         mul_fp254_12_3, f', g+g', inA, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:         mul_fp254_12_3, f', g+g', inA, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     SWAP13
-    // stack:         inA, f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:         inA, f', g+g', mul_fp254_12_3, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %load_fp254_6
-    // stack:            f,f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:            f,f', g+g', mul_fp254_12_3, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %add_fp254_6
-    // stack:            f+f', g+g', mul_fp254_12_3, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:            f+f', g+g', mul_fp254_12_3, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %jump(mul_fp254_6)
 mul_fp254_12_3:
-    // stack:                          (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %load_fp254_6(12)
-    // stack:                      fg, (f+f')(g+g'), inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                          (f+f')(g+g'), inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %load_fp254_6(96)
+    // stack:                      fg, (f+f')(g+g'), inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %stack (x: 6, y: 6) -> (y, x)
-    // stack:                      (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                      (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %dup_fp254_6_6
-    // stack:                  fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %load_fp254_6(6)
-    // stack:             f'g',fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                  fg, (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %load_fp254_6(90)
+    // stack:             f'g',fg, (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %add_fp254_6
-    // stack:             f'g'+fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:             f'g'+fg, (f+f')(g+g'), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %subr_fp254_6
-    // stack:          (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
+    // stack:          (f+f')(g+g') - (f'g'+fg), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}   
     DUP14  
     %add_const(6) 
-    // stack:    out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
+    // stack:    out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}   
     %store_fp254_6
-    // stack:                                    fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %load_fp254_6(0)
-    // stack:                         sh(f'g') , fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                                    fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
+    %load_fp254_6(84)
+    // stack:                         sh(f'g') , fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %add_fp254_6
-    // stack:                         sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                         sh(f'g') + fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     DUP8
-    // stack:                    out, sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                    out, sh(f'g') + fg, inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %store_fp254_6
-    // stack:                                        inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
+    // stack:                                        inB, out  {84: sh(f'g'), 90: f'g', 96: fg}
     %pop2  
     JUMP
 
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 72ca051b..8e821f82 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -29,12 +29,12 @@ global inv_fp254_12:
     // stack:              out, f^-1, inp, out, retdest
     %store_fp254_12
     // stack:                         inp, out, retdest
-    %stack (inp, out) -> (inp, out, 50, check_inv_fp254_12)
-    // stack: inp, out, 50, check_inv_fp254_12, retdest 
+    %stack (inp, out) -> (inp, out, 72, check_inv_fp254_12)
+    // stack: inp, out, 72, check_inv_fp254_12, retdest 
     %jump(mul_fp254_12)
 check_inv_fp254_12:
     // stack:        retdest
-    PUSH 50  
+    PUSH 72  
     %load_fp254_12
     // stack: unit?, retdest
     %assert_eq_unit_fp254_12
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index a4c81ab2..4643351a 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -94,8 +94,8 @@ fn test_mul_fp6() -> Result<()> {
 }
 
 fn setup_mul_fp12_test(out: usize, f: Fp12, g: Fp12, label: &str) -> InterpreterSetup {
-    let in0: usize = 64;
-    let in1: usize = 76;
+    let in0: usize = 200;
+    let in1: usize = 212;
 
     let mut stack = vec![
         U256::from(in0),
@@ -115,7 +115,7 @@ fn setup_mul_fp12_test(out: usize, f: Fp12, g: Fp12, label: &str) -> Interpreter
 
 #[test]
 fn test_mul_fp12() -> Result<()> {
-    let out: usize = 88;
+    let out: usize = 224;
 
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
@@ -191,7 +191,7 @@ fn setup_frob_fp12_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
 
 #[test]
 fn test_frob_fp12() -> Result<()> {
-    let ptr: usize = 100;
+    let ptr: usize = 200;
 
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
@@ -226,8 +226,8 @@ fn test_frob_fp12() -> Result<()> {
 
 #[test]
 fn test_inv_fp12() -> Result<()> {
-    let ptr: usize = 100;
-    let inv: usize = 112;
+    let ptr: usize = 200;
+    let inv: usize = 212;
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
 
@@ -247,7 +247,7 @@ fn test_inv_fp12() -> Result<()> {
 
 #[test]
 fn test_invariant_exponent() -> Result<()> {
-    let ptr: usize = 400;
+    let ptr: usize = 200;
 
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
@@ -348,8 +348,8 @@ fn test_miller() -> Result<()> {
 
 #[test]
 fn test_tate() -> Result<()> {
-    let ptr: usize = 300;
-    let out: usize = 400;
+    let ptr: usize = 200;
+    let out: usize = 206;
     let inputs: Vec<U256> = vec![
         CURVE_GENERATOR.x.val,
         CURVE_GENERATOR.y.val,

From 444da8f760c8798458660ea1aca37ae369c01949 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 13 Feb 2023 14:31:53 -0800
Subject: [PATCH 191/201] better comments

---
 .../curve_arithmetic/invariant_exponent.asm   | 222 +++++++++---------
 1 file changed, 111 insertions(+), 111 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
index 9d72767d..2fcd5d2b 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm
@@ -12,14 +12,14 @@ global bn254_invariant_exponent:
 /// first, exponentiate by (p^6 - 1) via
 ///     def first_exp(y):
 ///         return y.frob(6) / y
-    // stack:                      out, retdest  {out: y}
+    // stack:                    out, retdest  {out: y}
     %stack (out) -> (out, 0, first_exp, out)         
     // stack: out, 0, first_exp, out, retdest  {out: y}
     %jump(inv_fp254_12)
 first_exp:
-    // stack:                             out, retdest  {out: y  , 0: y^-1}
+    // stack:                           out, retdest  {out: y  , 0: y^-1}
     %frob_fp254_12_6
-    // stack:                             out, retdest  {out: y_6, 0: y^-1}
+    // stack:                           out, retdest  {out: y_6, 0: y^-1}
     %stack (out) -> (out, 0, out, second_exp, out)
     // stack:  out, 0, out, second_exp, out, retdest  {out: y_6, 0: y^-1}
     %jump(mul_fp254_12)
@@ -28,7 +28,7 @@ first_exp:
 ///     def second_exp(y):
 ///         return y.frob(2) * y
 second_exp:
-    // stack:                                out, retdest  {out: y}
+    // stack:                              out, retdest  {out: y}
     %stack (out) -> (out, 0, out, out, final_exp, out)
     // stack: out, 0, out, out, final_exp, out, retdest  {out: y}
     %frob_fp254_12_2_
@@ -53,18 +53,18 @@ second_exp:
 
 final_exp:
     // stack:                 val, retdest
-    %stack (val) -> (val, 60, val)
-    // stack:        val, 60, val, retdest
+    %stack (val) -> (val, 12, val)
+    // stack:        val, 12, val, retdest
     %move_fp254_12
-    // stack:             60, val, retdest
+    // stack:             12, val, retdest  {12: sqr}
     %stack () -> (1, 1, 1)
-    // stack:    1, 1, 1, 60, val, retdest
-    %mstore_kernel_bn254_pairing(12)  
+    // stack:    1, 1, 1, 12, val, retdest
+    %mstore_kernel_bn254_pairing(24)  
     %mstore_kernel_bn254_pairing(36)  
-    %mstore_kernel_bn254_pairing(24)
-    // stack:             60, val, retdest  {12: y0, 24: y2, 36: y4}
+    %mstore_kernel_bn254_pairing(48)
+    // stack:             12, val, retdest  {12: sqr, 24: y0, 36: y2, 48: y4}
     %stack () -> (64, 62, 65)
-    // stack: 64, 62, 65, 60, val, retdest  {12: y0, 24: y2, 36: y4}
+    // stack: 64, 62, 65, 12, val, retdest  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(power_loop_4)
 
 /// After computing the powers 
@@ -87,49 +87,49 @@ final_exp:
 ///     return y * y2 * y1 * y0
 
 custom_powers:
-    // stack:                          val, retdest  {12: y0, 24: y2, 36: y4}
-    %stack () -> (12, 48, make_term_1)
-    // stack:     12, 48, make_term_1, val, retdest  {12: y0, 24: y2, 36: y4}
+    // stack:                           val, retdest  {24: y0, 36: y2, 48: y4}
+    %stack () -> (24, 60, make_term_1)
+    // stack:      24, 60, make_term_1, val, retdest  {24: y0, 36: y2, 48: y4}
     %jump(inv_fp254_12)
 make_term_1:
-    // stack:                          val, retdest  {24: y2, 36: y4, 48: y0^-1}
-    %stack () -> (24, 36, 36, make_term_2)
-    // stack: 24, 36, 36, make_term_2, val, retdest  {24: y2, 36: y4, 48: y0^-1}
+    // stack:                           val, retdest  {36: y2, 48: y4, 60: y0^-1}
+    %stack () -> (36, 48, 48, make_term_2)
+    // stack:  36, 48, 48, make_term_2, val, retdest  {36: y2, 48: y4, 60: y0^-1}
     %jump(mul_fp254_12)
 make_term_2:
-    // stack:                          val, retdest  {24: y2, 36: y4 * y2, 48: y0^-1}
-    %stack () -> (24, 36, 36, make_term_3)
-    // stack: 24, 36, 36, make_term_3, val, retdest  {24: y2, 36: y4 * y2, 48: y0^-1}
+    // stack:                           val, retdest  {36: y2, 48: y4 * y2, 60: y0^-1}
+    %stack () -> (36, 48, 48, make_term_3)
+    // stack:  36, 48, 48, make_term_3, val, retdest  {36: y2, 48: y4 * y2, 60: y0^-1}
     %jump(mul_fp254_12)
 make_term_3:
-    // stack:                          val, retdest  {24: y2, 36: y4 * y2^2, 48: y0^-1}
-    %stack () -> (48, 36, 36, final_power)
-    // stack: 48, 36, 36, final_power, val, retdest  {24: y2, 36: y4 * y2^2, 48: y0^-1}
+    // stack:                           val, retdest  {36: y2, 48: y4 * y2^2, 60: y0^-1}
+    %stack () -> (60, 48, 48, final_power)
+    // stack:  60, 48, 48, final_power, val, retdest  {36: y2, 48: y4 * y2^2, 60: y0^-1}
     %jump(mul_fp254_12)
 final_power:
-    // stack:                           val, retdest  {val: y  , 24:  y^a2   , 36:  y^a1   , 48: y^a0}
+    // stack:                           val, retdest  {val: y  , 36:  y^a2   , 48:  y^a1   , 60: y^a0}
     %frob_fp254_12_3
-    // stack:                           val, retdest  {val: y_3, 24:  y^a2   , 36:  y^a1   , 48: y^a0}
-    %stack () -> (24, 24)
+    // stack:                           val, retdest  {val: y_3, 36:  y^a2   , 48:  y^a1   , 60: y^a0}
+    %stack () -> (36, 36)
     %frob_fp254_12_2_
     POP
-    // stack:                           val, retdest  {val: y_3, 24: (y^a2)_2, 36:  y^a1   , 48: y^a0}
-    PUSH 36
+    // stack:                           val, retdest  {val: y_3, 36: (y^a2)_2, 48:  y^a1   , 60: y^a0}
+    PUSH 48
     %frob_fp254_12_1
     POP
-    // stack:                           val, retdest  {val: y_3, 24: (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
-    %stack (val) -> (24, val, val, penult_mul, val)
-    // stack: 24, val, val, penult_mul, val, retdest  {val: y_3, 24: (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
+    // stack:                           val, retdest  {val: y_3, 36: (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
+    %stack (val) -> (36, val, val, penult_mul, val)
+    // stack: 36, val, val, penult_mul, val, retdest  {val: y_3, 36: (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
     %jump(mul_fp254_12)
 penult_mul:
-    // stack:                          val, retdest  {val: y_3 * (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
-    %stack (val) -> (36, val, val, final_mul, val)
-    // stack: 36, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 36: (y^a1)_1, 48: y^a0}
+    // stack:                           val, retdest  {val: y_3 * (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
+    %stack (val) -> (48, val, val, final_mul, val)
+    // stack:  48, val, val, final_mul, val, retdest  {val: y_3 * (y^a2)_2, 48: (y^a1)_1, 60: y^a0}
     %jump(mul_fp254_12)
 final_mul: 
-    // stack:                          val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 48: y^a0}
-    %stack (val) -> (48, val, val)
-    // stack:                 48, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 48: y^a0}
+    // stack:                           val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 60: y^a0}
+    %stack (val) -> (60, val, val)
+    // stack:                  60, val, val, retdest  {val: y_3 * (y^a2)_2 * (y^a1)_1, 60: y^a0}
     %jump(mul_fp254_12)
 
 
@@ -164,156 +164,156 @@ final_mul:
 ///     y0 *= acc
 
 power_loop_4:
-    // stack:                                     i  , j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                                   i  , j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP1  
     ISZERO
-    // stack:                             break?, i  , j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                           break?, i  , j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jumpi(power_loop_4_end)
-    // stack:                                     i  , j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                                   i  , j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %sub_const(1)
-    // stack:                                     i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                                   i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP1  
     %mload_kernel_code(power_data_4)
-    // stack:                                abc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                              abc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP1  
     %lt_const(100)
-    // stack:                         skip?, abc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                       skip?, abc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jumpi(power_loop_4_b)
-    // stack:                                abc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                              abc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %sub_const(100)
-    // stack:                                 bc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
-    %stack () -> (36, 36, power_loop_4_b)
-    // stack:      36, 36, power_loop_4_b,  bc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                               bc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (48, 48, power_loop_4_b)
+    // stack:      48, 48, power_loop_4_b,  bc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP8
-    // stack: sqr, 36, 36, power_loop_4_b,  bc, i-1, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, 48, 48, power_loop_4_b,  bc, i-1, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(mul_fp254_12)
 power_loop_4_b:
-    // stack:                               bc, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                             bc, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP1  
     %lt_const(10)
-    // stack:                        skip?, bc, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                      skip?, bc, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jumpi(power_loop_4_c)
-    // stack:                               bc, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                             bc, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %sub_const(10)
-    // stack:                                c, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
-    %stack () -> (24, 24, power_loop_4_c)
-    // stack:      24, 24, power_loop_4_c, c, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                              c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (36, 36, power_loop_4_c)
+    // stack:      36, 36, power_loop_4_c, c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP8
-    // stack: sqr, 24, 24, power_loop_4_c, c, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, 36, 36, power_loop_4_c, c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(mul_fp254_12)
 power_loop_4_c:
-    // stack:                              c, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                            c, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     ISZERO
-    // stack:                          skip?, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                        skip?, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jumpi(power_loop_4_sq)
-    // stack:                                 i, j, k, sqr  {12: y0, 24: y2, 36: y4}
-    %stack () -> (12, 12, power_loop_4_sq)
-    // stack:      12, 12, power_loop_4_sq, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                               i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (24, 24, power_loop_4_sq)
+    // stack:      24, 24, power_loop_4_sq, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP7
-    // stack: sqr, 12, 12, power_loop_4_sq, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, 24, 24, power_loop_4_sq, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(mul_fp254_12)
 power_loop_4_sq:
-    // stack:                         i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                         i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     PUSH power_loop_4  
-    // stack:           power_loop_4, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:           power_loop_4, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP5  
     DUP1
-    // stack: sqr, sqr, power_loop_4, i, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, sqr, power_loop_4, i, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(square_fp254_12)
 power_loop_4_end:
-    // stack:                           0, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                         0, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     POP  
-    // stack:                              j, k, sqr  {12: y0, 24: y2, 36: y4}
-    %stack () -> (36, 36, power_loop_2) 
-    // stack:      36, 36, power_loop_2, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                            j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (48, 48, power_loop_2) 
+    // stack:      48, 48, power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP6
-    // stack: sqr, 36, 36, power_loop_2, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, 48, 48, power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(mul_fp254_12)
 
 power_loop_2:
-    // stack:                                   j  , k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                                   j  , k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP1  
     ISZERO
-    // stack:                           break?, j  , k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                         break?, j  , k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jumpi(power_loop_2_end)
-    // stack:                                   j  , k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                                 j  , k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %sub_const(1)
-    // stack:                                   j-1, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                                 j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP1  
     %mload_kernel_code(power_data_2)
-    // stack:                               ab, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                             ab, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP1  
     %lt_const(10)
-    // stack:                        skip?, ab, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                      skip?, ab, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jumpi(power_loop_2_b)
-    // stack:                               ab, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                             ab, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %sub_const(10)
-    // stack:                                b, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
-    %stack () -> (24, 24, power_loop_2_b) 
-    // stack:      24, 24, power_loop_2_b, b, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                              b, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (36, 36, power_loop_2_b) 
+    // stack:      36, 36, power_loop_2_b, b, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP7
-    // stack: sqr, 24, 24, power_loop_2_b, b, j-1, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, 36, 36, power_loop_2_b, b, j-1, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(mul_fp254_12)
 power_loop_2_b:
-    // stack:                              b, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                            b, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     ISZERO
-    // stack:                          skip?, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                        skip?, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jumpi(power_loop_2_sq)
-    // stack:                                 j, k, sqr  {12: y0, 24: y2, 36: y4}
-    %stack () -> (12, 12, power_loop_2_sq) 
-    // stack:      12, 12, power_loop_2_sq, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                               j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (24, 24, power_loop_2_sq) 
+    // stack:      24, 24, power_loop_2_sq, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP6
-    // stack: sqr, 12, 12, power_loop_2_sq, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, 24, 24, power_loop_2_sq, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(mul_fp254_12)
 power_loop_2_sq:
-    // stack:                         j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                         j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     PUSH power_loop_2  
-    // stack:           power_loop_2, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:           power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP4  
     DUP1
-    // stack: sqr, sqr, power_loop_2, j, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, sqr, power_loop_2, j, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(square_fp254_12)
 power_loop_2_end:
-    // stack:                           0, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                         0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     POP  
-    // stack:                              k, sqr  {12: y0, 24: y2, 36: y4}
-    %stack () -> (24, 24, power_loop_0)
-    // stack:      24, 24, power_loop_0, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                            k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (36, 36, power_loop_0)
+    // stack:      36, 36, power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP5
-    // stack: sqr, 24, 24, power_loop_0, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, 36, 36, power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(mul_fp254_12)
 
 power_loop_0:
-    // stack:                                 k  , sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                               k  , sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP1  
     ISZERO
-    // stack:                         break?, k  , sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                       break?, k  , sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jumpi(power_loop_0_end)
-    // stack:                                 k  , sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                               k  , sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %sub_const(1)
-    // stack:                                 k-1, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                               k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP1  
     %mload_kernel_code(power_data_0)
-    // stack:                              a, k-1, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                            a, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     ISZERO
-    // stack:                          skip?, k-1, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                        skip?, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jumpi(power_loop_0_sq)
-    // stack:                                 k-1, sqr  {12: y0, 24: y2, 36: y4}
-    %stack () -> (12, 12, power_loop_0_sq)  
-    // stack:      12, 12, power_loop_0_sq, k-1, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                               k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack () -> (24, 24, power_loop_0_sq)  
+    // stack:      24, 24, power_loop_0_sq, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP5
-    // stack: sqr, 12, 12, power_loop_0_sq, k-1, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, 24, 24, power_loop_0_sq, k-1, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(mul_fp254_12)
 power_loop_0_sq:
-    // stack:                         k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:                         k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     PUSH power_loop_0  
-    // stack:           power_loop_0, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack:           power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     DUP3  
     DUP1
-    // stack: sqr, sqr, power_loop_0, k, sqr  {12: y0, 24: y2, 36: y4}
+    // stack: sqr, sqr, power_loop_0, k, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(square_fp254_12)
 power_loop_0_end:
-    // stack:                         0, sqr  {12: y0, 24: y2, 36: y4}
-    %stack (i, sqr) -> (12, sqr, 12, custom_powers)
-    // stack:   12, sqr, 12, custom_powers  {12: y0, 24: y2, 36: y4}
+    // stack:                       0, sqr  {12: sqr, 24: y0, 36: y2, 48: y4}
+    %stack (i, sqr) -> (24, sqr, 24, custom_powers)
+    // stack:   24, sqr, 24, custom_powers  {12: sqr, 24: y0, 36: y2, 48: y4}
     %jump(mul_fp254_12)    

From 63f1fbfaea76acc9e77045e336e6889b98f1cd3b Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 16 Feb 2023 19:45:33 -0800
Subject: [PATCH 192/201] fmt

---
 evm/src/cpu/kernel/tests/bn254.rs | 131 ++++++++++++++----------------
 1 file changed, 63 insertions(+), 68 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 4643351a..e7597ab5 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -8,63 +8,43 @@ use crate::bn254_arithmetic::{Fp, Fp12, Fp2, Fp6};
 use crate::bn254_pairing::{
     gen_fp12_sparse, invariant_exponent, miller_loop, tate, Curve, TwistedCurve,
 };
-use crate::cpu::kernel::aggregator::KERNEL;
-use crate::cpu::kernel::interpreter::Interpreter;
-use crate::memory::segments::Segment;
+use crate::cpu::kernel::interpreter::{
+    run_interpreter_with_memory, Interpreter, InterpreterMemoryInitialization,
+};
+use crate::memory::segments::Segment::BnPairing;
 use crate::witness::memory::MemoryAddress;
 
-struct InterpreterSetup {
-    label: String,
-    stack: Vec<U256>,
-    memory: Vec<(usize, Vec<U256>)>,
-}
-
-impl InterpreterSetup {
-    fn run(self) -> Result<Interpreter<'static>> {
-        let label = KERNEL.global_labels[&self.label];
-        let mut stack = self.stack;
-        stack.reverse();
-        let mut interpreter = Interpreter::new_with_kernel(label, stack);
-        for (pointer, data) in self.memory {
-            for (i, term) in data.iter().enumerate() {
-                interpreter.generation_state.memory.set(
-                    MemoryAddress::new(0, Segment::BnPairing, pointer + i),
-                    *term,
-                )
-            }
-        }
-        interpreter.run()?;
-        Ok(interpreter)
-    }
-}
-
 fn extract_kernel_memory(range: Range<usize>, interpreter: Interpreter<'static>) -> Vec<U256> {
     let mut output: Vec<U256> = vec![];
     for i in range {
-        let term =
-            interpreter
-                .generation_state
-                .memory
-                .get(MemoryAddress::new(0, Segment::BnPairing, i));
+        let term = interpreter
+            .generation_state
+            .memory
+            .get(MemoryAddress::new(0, BnPairing, i));
         output.push(term);
     }
     output
 }
 
 fn extract_stack(interpreter: Interpreter<'static>) -> Vec<U256> {
-    let stack = interpreter.stack();
-    stack.iter().rev().cloned().collect::<Vec<U256>>()
+    interpreter
+        .stack()
+        .iter()
+        .rev()
+        .cloned()
+        .collect::<Vec<U256>>()
 }
 
-fn setup_mul_fp6_test(f: Fp6, g: Fp6, label: &str) -> InterpreterSetup {
+fn setup_mul_fp6_test(f: Fp6, g: Fp6, label: &str) -> InterpreterMemoryInitialization {
     let mut stack = f.on_stack();
     if label == "mul_fp254_6" {
         stack.extend(g.on_stack());
     }
     stack.push(U256::from(0xdeadbeefu32));
-    InterpreterSetup {
+    InterpreterMemoryInitialization {
         label: label.to_string(),
         stack,
+        segment: BnPairing,
         memory: vec![],
     }
 }
@@ -75,11 +55,11 @@ fn test_mul_fp6() -> Result<()> {
     let f: Fp6 = rng.gen::<Fp6>();
     let g: Fp6 = rng.gen::<Fp6>();
 
-    let setup_normal: InterpreterSetup = setup_mul_fp6_test(f, g, "mul_fp254_6");
-    let setup_square: InterpreterSetup = setup_mul_fp6_test(f, f, "square_fp254_6");
+    let setup_normal: InterpreterMemoryInitialization = setup_mul_fp6_test(f, g, "mul_fp254_6");
+    let setup_square: InterpreterMemoryInitialization = setup_mul_fp6_test(f, f, "square_fp254_6");
 
-    let intrptr_normal: Interpreter = setup_normal.run().unwrap();
-    let intrptr_square: Interpreter = setup_square.run().unwrap();
+    let intrptr_normal: Interpreter = run_interpreter_with_memory(setup_normal).unwrap();
+    let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap();
 
     let out_normal: Vec<U256> = extract_stack(intrptr_normal);
     let out_square: Vec<U256> = extract_stack(intrptr_square);
@@ -93,7 +73,12 @@ fn test_mul_fp6() -> Result<()> {
     Ok(())
 }
 
-fn setup_mul_fp12_test(out: usize, f: Fp12, g: Fp12, label: &str) -> InterpreterSetup {
+fn setup_mul_fp12_test(
+    out: usize,
+    f: Fp12,
+    g: Fp12,
+    label: &str,
+) -> InterpreterMemoryInitialization {
     let in0: usize = 200;
     let in1: usize = 212;
 
@@ -106,9 +91,10 @@ fn setup_mul_fp12_test(out: usize, f: Fp12, g: Fp12, label: &str) -> Interpreter
     if label == "square_fp254_12" {
         stack.remove(0);
     }
-    InterpreterSetup {
+    InterpreterMemoryInitialization {
         label: label.to_string(),
         stack,
+        segment: BnPairing,
         memory: vec![(in0, f.on_stack()), (in1, g.on_stack())],
     }
 }
@@ -122,13 +108,16 @@ fn test_mul_fp12() -> Result<()> {
     let g: Fp12 = rng.gen::<Fp12>();
     let h: Fp12 = gen_fp12_sparse(&mut rng);
 
-    let setup_normal: InterpreterSetup = setup_mul_fp12_test(out, f, g, "mul_fp254_12");
-    let setup_sparse: InterpreterSetup = setup_mul_fp12_test(out, f, h, "mul_fp254_12_sparse");
-    let setup_square: InterpreterSetup = setup_mul_fp12_test(out, f, f, "square_fp254_12");
+    let setup_normal: InterpreterMemoryInitialization =
+        setup_mul_fp12_test(out, f, g, "mul_fp254_12");
+    let setup_sparse: InterpreterMemoryInitialization =
+        setup_mul_fp12_test(out, f, h, "mul_fp254_12_sparse");
+    let setup_square: InterpreterMemoryInitialization =
+        setup_mul_fp12_test(out, f, f, "square_fp254_12");
 
-    let intrptr_normal: Interpreter = setup_normal.run().unwrap();
-    let intrptr_sparse: Interpreter = setup_sparse.run().unwrap();
-    let intrptr_square: Interpreter = setup_square.run().unwrap();
+    let intrptr_normal: Interpreter = run_interpreter_with_memory(setup_normal).unwrap();
+    let intrptr_sparse: Interpreter = run_interpreter_with_memory(setup_sparse).unwrap();
+    let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap();
 
     let out_normal: Vec<U256> = extract_kernel_memory(out..out + 12, intrptr_normal);
     let out_sparse: Vec<U256> = extract_kernel_memory(out..out + 12, intrptr_sparse);
@@ -145,10 +134,11 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-fn setup_frob_fp6_test(f: Fp6, label: &str) -> InterpreterSetup {
-    InterpreterSetup {
+fn setup_frob_fp6_test(f: Fp6, label: &str) -> InterpreterMemoryInitialization {
+    InterpreterMemoryInitialization {
         label: label.to_string(),
         stack: f.on_stack(),
+        segment: BnPairing,
         memory: vec![],
     }
 }
@@ -162,9 +152,9 @@ fn test_frob_fp6() -> Result<()> {
     let setup_frob_2 = setup_frob_fp6_test(f, "test_frob_fp254_6_2");
     let setup_frob_3 = setup_frob_fp6_test(f, "test_frob_fp254_6_3");
 
-    let intrptr_frob_1: Interpreter = setup_frob_1.run().unwrap();
-    let intrptr_frob_2: Interpreter = setup_frob_2.run().unwrap();
-    let intrptr_frob_3: Interpreter = setup_frob_3.run().unwrap();
+    let intrptr_frob_1: Interpreter = run_interpreter_with_memory(setup_frob_1).unwrap();
+    let intrptr_frob_2: Interpreter = run_interpreter_with_memory(setup_frob_2).unwrap();
+    let intrptr_frob_3: Interpreter = run_interpreter_with_memory(setup_frob_3).unwrap();
 
     let out_frob_1: Vec<U256> = extract_stack(intrptr_frob_1);
     let out_frob_2: Vec<U256> = extract_stack(intrptr_frob_2);
@@ -181,10 +171,11 @@ fn test_frob_fp6() -> Result<()> {
     Ok(())
 }
 
-fn setup_frob_fp12_test(ptr: usize, f: Fp12, label: &str) -> InterpreterSetup {
-    InterpreterSetup {
+fn setup_frob_fp12_test(ptr: usize, f: Fp12, label: &str) -> InterpreterMemoryInitialization {
+    InterpreterMemoryInitialization {
         label: label.to_string(),
         stack: vec![U256::from(ptr)],
+        segment: BnPairing,
         memory: vec![(ptr, f.on_stack())],
     }
 }
@@ -201,10 +192,10 @@ fn test_frob_fp12() -> Result<()> {
     let setup_frob_3 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_3");
     let setup_frob_6 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_6");
 
-    let intrptr_frob_1: Interpreter = setup_frob_1.run().unwrap();
-    let intrptr_frob_2: Interpreter = setup_frob_2.run().unwrap();
-    let intrptr_frob_3: Interpreter = setup_frob_3.run().unwrap();
-    let intrptr_frob_6: Interpreter = setup_frob_6.run().unwrap();
+    let intrptr_frob_1: Interpreter = run_interpreter_with_memory(setup_frob_1).unwrap();
+    let intrptr_frob_2: Interpreter = run_interpreter_with_memory(setup_frob_2).unwrap();
+    let intrptr_frob_3: Interpreter = run_interpreter_with_memory(setup_frob_3).unwrap();
+    let intrptr_frob_6: Interpreter = run_interpreter_with_memory(setup_frob_6).unwrap();
 
     let out_frob_1: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, intrptr_frob_1);
     let out_frob_2: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, intrptr_frob_2);
@@ -231,12 +222,13 @@ fn test_inv_fp12() -> Result<()> {
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
 
-    let setup = InterpreterSetup {
+    let setup = InterpreterMemoryInitialization {
         label: "inv_fp254_12".to_string(),
         stack: vec![U256::from(ptr), U256::from(inv), U256::from(0xdeadbeefu32)],
+        segment: BnPairing,
         memory: vec![(ptr, f.on_stack())],
     };
-    let interpreter: Interpreter = setup.run().unwrap();
+    let interpreter: Interpreter = run_interpreter_with_memory(setup).unwrap();
     let output: Vec<U256> = extract_kernel_memory(inv..inv + 12, interpreter);
     let expected: Vec<U256> = f.inv().on_stack();
 
@@ -252,13 +244,14 @@ fn test_invariant_exponent() -> Result<()> {
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
 
-    let setup = InterpreterSetup {
+    let setup = InterpreterMemoryInitialization {
         label: "bn254_invariant_exponent".to_string(),
         stack: vec![U256::from(ptr), U256::from(0xdeadbeefu32)],
+        segment: BnPairing,
         memory: vec![(ptr, f.on_stack())],
     };
 
-    let interpreter: Interpreter = setup.run().unwrap();
+    let interpreter: Interpreter = run_interpreter_with_memory(setup).unwrap();
     let output: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, interpreter);
     let expected: Vec<U256> = invariant_exponent(f).on_stack();
 
@@ -332,12 +325,13 @@ fn test_miller() -> Result<()> {
         TWISTED_GENERATOR.y.im.val,
     ];
 
-    let setup = InterpreterSetup {
+    let setup = InterpreterMemoryInitialization {
         label: "bn254_miller".to_string(),
         stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
+        segment: BnPairing,
         memory: vec![(ptr, inputs)],
     };
-    let interpreter = setup.run().unwrap();
+    let interpreter = run_interpreter_with_memory(setup).unwrap();
     let output: Vec<U256> = extract_kernel_memory(out..out + 12, interpreter);
     let expected = miller_loop(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
 
@@ -359,12 +353,13 @@ fn test_tate() -> Result<()> {
         TWISTED_GENERATOR.y.im.val,
     ];
 
-    let setup = InterpreterSetup {
+    let setup = InterpreterMemoryInitialization {
         label: "bn254_tate".to_string(),
         stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)],
+        segment: BnPairing,
         memory: vec![(ptr, inputs)],
     };
-    let interpreter = setup.run().unwrap();
+    let interpreter = run_interpreter_with_memory(setup).unwrap();
     let output: Vec<U256> = extract_kernel_memory(out..out + 12, interpreter);
     let expected = tate(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
 

From e8865130dee5b68e3ae0d485fcaae8155635ac9d Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Thu, 16 Feb 2023 20:00:39 -0800
Subject: [PATCH 193/201] put extract in interpreter

---
 evm/src/cpu/kernel/interpreter.rs | 13 +++++++++++
 evm/src/cpu/kernel/tests/bn254.rs | 37 +++++++++----------------------
 2 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs
index d8e2aff0..52876c97 100644
--- a/evm/src/cpu/kernel/interpreter.rs
+++ b/evm/src/cpu/kernel/interpreter.rs
@@ -1,6 +1,7 @@
 //! An EVM interpreter for testing and debugging purposes.
 
 use std::collections::HashMap;
+use std::ops::Range;
 
 use anyhow::{anyhow, bail, ensure};
 use ethereum_types::{U256, U512};
@@ -242,6 +243,18 @@ impl<'a> Interpreter<'a> {
             .content
     }
 
+    pub fn extract_kernel_memory(self, segment: Segment, range: Range<usize>) -> Vec<U256> {
+        let mut output: Vec<U256> = vec![];
+        for i in range {
+            let term = self
+                .generation_state
+                .memory
+                .get(MemoryAddress::new(0, segment, i));
+            output.push(term);
+        }
+        output
+    }
+
     pub(crate) fn push(&mut self, x: U256) {
         self.stack_mut().push(x);
         self.generation_state.registers.stack_len += 1;
diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index e7597ab5..ec4d4198 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -1,5 +1,3 @@
-use std::ops::Range;
-
 use anyhow::Result;
 use ethereum_types::U256;
 use rand::Rng;
@@ -12,19 +10,6 @@ use crate::cpu::kernel::interpreter::{
     run_interpreter_with_memory, Interpreter, InterpreterMemoryInitialization,
 };
 use crate::memory::segments::Segment::BnPairing;
-use crate::witness::memory::MemoryAddress;
-
-fn extract_kernel_memory(range: Range<usize>, interpreter: Interpreter<'static>) -> Vec<U256> {
-    let mut output: Vec<U256> = vec![];
-    for i in range {
-        let term = interpreter
-            .generation_state
-            .memory
-            .get(MemoryAddress::new(0, BnPairing, i));
-        output.push(term);
-    }
-    output
-}
 
 fn extract_stack(interpreter: Interpreter<'static>) -> Vec<U256> {
     interpreter
@@ -119,9 +104,9 @@ fn test_mul_fp12() -> Result<()> {
     let intrptr_sparse: Interpreter = run_interpreter_with_memory(setup_sparse).unwrap();
     let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap();
 
-    let out_normal: Vec<U256> = extract_kernel_memory(out..out + 12, intrptr_normal);
-    let out_sparse: Vec<U256> = extract_kernel_memory(out..out + 12, intrptr_sparse);
-    let out_square: Vec<U256> = extract_kernel_memory(out..out + 12, intrptr_square);
+    let out_normal: Vec<U256> = intrptr_normal.extract_kernel_memory(BnPairing, out..out + 12);
+    let out_sparse: Vec<U256> = intrptr_sparse.extract_kernel_memory(BnPairing, out..out + 12);
+    let out_square: Vec<U256> = intrptr_square.extract_kernel_memory(BnPairing, out..out + 12);
 
     let exp_normal: Vec<U256> = (f * g).on_stack();
     let exp_sparse: Vec<U256> = (f * h).on_stack();
@@ -197,10 +182,10 @@ fn test_frob_fp12() -> Result<()> {
     let intrptr_frob_3: Interpreter = run_interpreter_with_memory(setup_frob_3).unwrap();
     let intrptr_frob_6: Interpreter = run_interpreter_with_memory(setup_frob_6).unwrap();
 
-    let out_frob_1: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, intrptr_frob_1);
-    let out_frob_2: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, intrptr_frob_2);
-    let out_frob_3: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, intrptr_frob_3);
-    let out_frob_6: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, intrptr_frob_6);
+    let out_frob_1: Vec<U256> = intrptr_frob_1.extract_kernel_memory(BnPairing, ptr..ptr + 12);
+    let out_frob_2: Vec<U256> = intrptr_frob_2.extract_kernel_memory(BnPairing, ptr..ptr + 12);
+    let out_frob_3: Vec<U256> = intrptr_frob_3.extract_kernel_memory(BnPairing, ptr..ptr + 12);
+    let out_frob_6: Vec<U256> = intrptr_frob_6.extract_kernel_memory(BnPairing, ptr..ptr + 12);
 
     let exp_frob_1: Vec<U256> = f.frob(1).on_stack();
     let exp_frob_2: Vec<U256> = f.frob(2).on_stack();
@@ -229,7 +214,7 @@ fn test_inv_fp12() -> Result<()> {
         memory: vec![(ptr, f.on_stack())],
     };
     let interpreter: Interpreter = run_interpreter_with_memory(setup).unwrap();
-    let output: Vec<U256> = extract_kernel_memory(inv..inv + 12, interpreter);
+    let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, inv..inv + 12);
     let expected: Vec<U256> = f.inv().on_stack();
 
     assert_eq!(output, expected);
@@ -252,7 +237,7 @@ fn test_invariant_exponent() -> Result<()> {
     };
 
     let interpreter: Interpreter = run_interpreter_with_memory(setup).unwrap();
-    let output: Vec<U256> = extract_kernel_memory(ptr..ptr + 12, interpreter);
+    let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, ptr..ptr + 12);
     let expected: Vec<U256> = invariant_exponent(f).on_stack();
 
     assert_eq!(output, expected);
@@ -332,7 +317,7 @@ fn test_miller() -> Result<()> {
         memory: vec![(ptr, inputs)],
     };
     let interpreter = run_interpreter_with_memory(setup).unwrap();
-    let output: Vec<U256> = extract_kernel_memory(out..out + 12, interpreter);
+    let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, out..out + 12);
     let expected = miller_loop(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
 
     assert_eq!(output, expected);
@@ -360,7 +345,7 @@ fn test_tate() -> Result<()> {
         memory: vec![(ptr, inputs)],
     };
     let interpreter = run_interpreter_with_memory(setup).unwrap();
-    let output: Vec<U256> = extract_kernel_memory(out..out + 12, interpreter);
+    let output: Vec<U256> = interpreter.extract_kernel_memory(BnPairing, out..out + 12);
     let expected = tate(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack();
 
     assert_eq!(output, expected);

From b89e668b5523f659e65355916f6d07fd3286f51b Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Fri, 17 Feb 2023 13:32:20 -0800
Subject: [PATCH 194/201] minor

---
 evm/src/cpu/kernel/tests/bn254.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index ec4d4198..7227e0e9 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -299,8 +299,8 @@ pub const TWISTED_GENERATOR: TwistedCurve = {
 
 #[test]
 fn test_miller() -> Result<()> {
-    let ptr: usize = 300;
-    let out: usize = 400;
+    let ptr: usize = 200;
+    let out: usize = 206;
     let inputs: Vec<U256> = vec![
         CURVE_GENERATOR.x.val,
         CURVE_GENERATOR.y.val,

From 5e3e40a094f6af1795662a5c2b62d05d368f1a54 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sun, 19 Feb 2023 16:05:39 -0800
Subject: [PATCH 195/201] more general kernel peek

---
 evm/src/generation/prover_input.rs |  5 +++--
 evm/src/witness/util.rs            | 14 ++++++++------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs
index dacf0423..993f306c 100644
--- a/evm/src/generation/prover_input.rs
+++ b/evm/src/generation/prover_input.rs
@@ -11,7 +11,8 @@ use crate::generation::prover_input::EvmField::{
 };
 use crate::generation::prover_input::FieldOp::{Inverse, Sqrt};
 use crate::generation::state::GenerationState;
-use crate::witness::util::{kernel_general_peek, stack_peek};
+use crate::memory::segments::Segment::BnPairing;
+use crate::witness::util::{kernel_peek, stack_peek};
 
 /// Prover input function represented as a scoped function name.
 /// Example: `PROVER_INPUT(ff::bn254_base::inverse)` is represented as `ProverInputFn([ff, bn254_base, inverse])`.
@@ -71,7 +72,7 @@ impl<F: Field> GenerationState<F> {
             Bn254Base => {
                 let mut f: [U256; 12] = [U256::zero(); 12];
                 for i in 0..12 {
-                    f[i] = kernel_general_peek(self, ptr + i);
+                    f[i] = kernel_peek(self, BnPairing, ptr + i);
                 }
                 f
             }
diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs
index f026abbd..5c36b529 100644
--- a/evm/src/witness/util.rs
+++ b/evm/src/witness/util.rs
@@ -40,12 +40,14 @@ pub(crate) fn stack_peek<F: Field>(state: &GenerationState<F>, i: usize) -> Opti
 }
 
 /// Peek at the SEGMENT_KERNEL_BN_PAIRING item at address `i`
-pub(crate) fn kernel_general_peek<F: Field>(state: &GenerationState<F>, i: usize) -> U256 {
-    state.memory.get(MemoryAddress::new(
-        state.registers.context,
-        Segment::BnPairing,
-        i,
-    ))
+pub(crate) fn kernel_peek<F: Field>(
+    state: &GenerationState<F>,
+    segment: Segment,
+    virt: usize,
+) -> U256 {
+    state
+        .memory
+        .get(MemoryAddress::new(state.registers.context, segment, virt))
 }
 
 pub(crate) fn mem_read_with_log<F: Field>(

From 2a9d4b1a9a654bf50513654359fb96059c10221d Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 20 Feb 2023 15:19:01 -0800
Subject: [PATCH 196/201] minor

---
 evm/src/cpu/kernel/asm/util/basic_macros.asm | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm
index cac33d58..17003bd8 100644
--- a/evm/src/cpu/kernel/asm/util/basic_macros.asm
+++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm
@@ -289,7 +289,7 @@
 
 // given u32 bytestring abcd return dcba
 %macro reverse_bytes_u32
-    // stack:                   abcd
+    // stack:              abcd
     DUP1
     PUSH 28
     BYTE
@@ -308,11 +308,11 @@
     PUSH 31
     BYTE
     %shl_const(24)
-    // stack:       d000, b0, a, c00
+    // stack:  d000, b0, a, c00
     OR 
     OR
     OR
-    // stack:                   dcba
+    // stack:              dcba
 %endmacro
 
 %macro reverse_bytes_u64

From 2ea3e5e3ca325938ef92b646fb380482dbaa4098 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 25 Feb 2023 10:35:33 -0800
Subject: [PATCH 197/201] minor changes

---
 .../cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm   | 2 +-
 evm/src/witness/util.rs                                         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
index f562f7ad..aacb7d3a 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@@ -200,7 +200,7 @@ global bn_double:
     %jump(bn_add_equal_points)
 
 // Check if (x,y) is a valid curve point.
-// Returns range & curve || is_identity
+// Returns (range & curve) || is_identity
 // where
 //     range = (x < N) & (y < N) 
 //     curve = y^2 == (x^3 + 3) 
diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs
index 5c36b529..a5ebf2ac 100644
--- a/evm/src/witness/util.rs
+++ b/evm/src/witness/util.rs
@@ -39,7 +39,7 @@ pub(crate) fn stack_peek<F: Field>(state: &GenerationState<F>, i: usize) -> Opti
     )))
 }
 
-/// Peek at the SEGMENT_KERNEL_BN_PAIRING item at address `i`
+/// Peek at kernel at specified segment and address
 pub(crate) fn kernel_peek<F: Field>(
     state: &GenerationState<F>,
     segment: Segment,

From 13a8d670d04ae12f815236adec3a82b7b09edb83 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 25 Feb 2023 10:55:18 -0800
Subject: [PATCH 198/201] loop test

---
 evm/src/cpu/kernel/tests/bn254.rs | 71 ++++++++-----------------------
 1 file changed, 18 insertions(+), 53 deletions(-)

diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs
index 7227e0e9..badfb472 100644
--- a/evm/src/cpu/kernel/tests/bn254.rs
+++ b/evm/src/cpu/kernel/tests/bn254.rs
@@ -119,9 +119,9 @@ fn test_mul_fp12() -> Result<()> {
     Ok(())
 }
 
-fn setup_frob_fp6_test(f: Fp6, label: &str) -> InterpreterMemoryInitialization {
+fn setup_frob_fp6_test(f: Fp6, n: usize) -> InterpreterMemoryInitialization {
     InterpreterMemoryInitialization {
-        label: label.to_string(),
+        label: String::from("test_frob_fp254_6_") + &(n.to_string()),
         stack: f.on_stack(),
         segment: BnPairing,
         memory: vec![],
@@ -132,33 +132,19 @@ fn setup_frob_fp6_test(f: Fp6, label: &str) -> InterpreterMemoryInitialization {
 fn test_frob_fp6() -> Result<()> {
     let mut rng = rand::thread_rng();
     let f: Fp6 = rng.gen::<Fp6>();
-
-    let setup_frob_1 = setup_frob_fp6_test(f, "test_frob_fp254_6_1");
-    let setup_frob_2 = setup_frob_fp6_test(f, "test_frob_fp254_6_2");
-    let setup_frob_3 = setup_frob_fp6_test(f, "test_frob_fp254_6_3");
-
-    let intrptr_frob_1: Interpreter = run_interpreter_with_memory(setup_frob_1).unwrap();
-    let intrptr_frob_2: Interpreter = run_interpreter_with_memory(setup_frob_2).unwrap();
-    let intrptr_frob_3: Interpreter = run_interpreter_with_memory(setup_frob_3).unwrap();
-
-    let out_frob_1: Vec<U256> = extract_stack(intrptr_frob_1);
-    let out_frob_2: Vec<U256> = extract_stack(intrptr_frob_2);
-    let out_frob_3: Vec<U256> = extract_stack(intrptr_frob_3);
-
-    let exp_frob_1: Vec<U256> = f.frob(1).on_stack();
-    let exp_frob_2: Vec<U256> = f.frob(2).on_stack();
-    let exp_frob_3: Vec<U256> = f.frob(3).on_stack();
-
-    assert_eq!(out_frob_1, exp_frob_1);
-    assert_eq!(out_frob_2, exp_frob_2);
-    assert_eq!(out_frob_3, exp_frob_3);
-
+    for n in 1..4 {
+        let setup_frob = setup_frob_fp6_test(f, n);
+        let intrptr_frob: Interpreter = run_interpreter_with_memory(setup_frob).unwrap();
+        let out_frob: Vec<U256> = extract_stack(intrptr_frob);
+        let exp_frob: Vec<U256> = f.frob(n).on_stack();
+        assert_eq!(out_frob, exp_frob);
+    }
     Ok(())
 }
 
-fn setup_frob_fp12_test(ptr: usize, f: Fp12, label: &str) -> InterpreterMemoryInitialization {
+fn setup_frob_fp12_test(ptr: usize, f: Fp12, n: usize) -> InterpreterMemoryInitialization {
     InterpreterMemoryInitialization {
-        label: label.to_string(),
+        label: String::from("test_frob_fp254_12_") + &(n.to_string()),
         stack: vec![U256::from(ptr)],
         segment: BnPairing,
         memory: vec![(ptr, f.on_stack())],
@@ -168,35 +154,15 @@ fn setup_frob_fp12_test(ptr: usize, f: Fp12, label: &str) -> InterpreterMemoryIn
 #[test]
 fn test_frob_fp12() -> Result<()> {
     let ptr: usize = 200;
-
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
-
-    let setup_frob_1 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_1");
-    let setup_frob_2 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_2");
-    let setup_frob_3 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_3");
-    let setup_frob_6 = setup_frob_fp12_test(ptr, f, "test_frob_fp254_12_6");
-
-    let intrptr_frob_1: Interpreter = run_interpreter_with_memory(setup_frob_1).unwrap();
-    let intrptr_frob_2: Interpreter = run_interpreter_with_memory(setup_frob_2).unwrap();
-    let intrptr_frob_3: Interpreter = run_interpreter_with_memory(setup_frob_3).unwrap();
-    let intrptr_frob_6: Interpreter = run_interpreter_with_memory(setup_frob_6).unwrap();
-
-    let out_frob_1: Vec<U256> = intrptr_frob_1.extract_kernel_memory(BnPairing, ptr..ptr + 12);
-    let out_frob_2: Vec<U256> = intrptr_frob_2.extract_kernel_memory(BnPairing, ptr..ptr + 12);
-    let out_frob_3: Vec<U256> = intrptr_frob_3.extract_kernel_memory(BnPairing, ptr..ptr + 12);
-    let out_frob_6: Vec<U256> = intrptr_frob_6.extract_kernel_memory(BnPairing, ptr..ptr + 12);
-
-    let exp_frob_1: Vec<U256> = f.frob(1).on_stack();
-    let exp_frob_2: Vec<U256> = f.frob(2).on_stack();
-    let exp_frob_3: Vec<U256> = f.frob(3).on_stack();
-    let exp_frob_6: Vec<U256> = f.frob(6).on_stack();
-
-    assert_eq!(out_frob_1, exp_frob_1);
-    assert_eq!(out_frob_2, exp_frob_2);
-    assert_eq!(out_frob_3, exp_frob_3);
-    assert_eq!(out_frob_6, exp_frob_6);
-
+    for n in [1, 2, 3, 6] {
+        let setup_frob = setup_frob_fp12_test(ptr, f, n);
+        let intrptr_frob: Interpreter = run_interpreter_with_memory(setup_frob).unwrap();
+        let out_frob: Vec<U256> = intrptr_frob.extract_kernel_memory(BnPairing, ptr..ptr + 12);
+        let exp_frob: Vec<U256> = f.frob(n).on_stack();
+        assert_eq!(out_frob, exp_frob);
+    }
     Ok(())
 }
 
@@ -225,7 +191,6 @@ fn test_inv_fp12() -> Result<()> {
 #[test]
 fn test_invariant_exponent() -> Result<()> {
     let ptr: usize = 200;
-
     let mut rng = rand::thread_rng();
     let f: Fp12 = rng.gen::<Fp12>();
 

From 1d94756e1a430c9ff36c642ff73ef6bdec149bbd Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 25 Feb 2023 18:07:10 -0800
Subject: [PATCH 199/201] add inverse doc

---
 .../cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm  | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
index 8e821f82..6214f385 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm
@@ -6,7 +6,10 @@
     MULFP254
 %endmacro
 
-// Non-deterministically provide the inverse modulo N.
+// Non-deterministically provide the inverse x^-1 of x modulo N.
+// If x === 0 mod N, this function panics.
+// Although the official prover provides the unique inverse <N
+// this macro only checks that x * x^-1 === 0 mod N
 %macro inv_fp254
     // stack:        x
     PROVER_INPUT(ff::bn254_base::inverse)

From e8c94632680c1d4286adeaaf960bc9d1252ccf9a Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Sat, 25 Feb 2023 18:22:55 -0800
Subject: [PATCH 200/201] comment

---
 .../cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
index ba0631fd..20882c05 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm
@@ -30,7 +30,7 @@ global miller_data:
 ///     EXPS2 = [       (a2[i], a0[i]) for i in len(a4)..len(a2)]
 ///     EXPS0 = [               a0[i]  for i in len(a2)..len(a0)]
 /// power_data_n is simply a reverse-order byte encoding of EXPSn
-///     where (i,j,k) is sent to (0b100)i + (0b10)j + k
+///     where (i,j,k) is sent to (100)i + (10)j + k
 
 global power_data_4:
     BYTES 111, 010, 011, 111

From 95e5fb5910d192fa1fb4dcc69a92dfcb95e23653 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Mon, 27 Feb 2023 22:40:39 -0800
Subject: [PATCH 201/201] cleaner rand

---
 evm/src/bn254_arithmetic.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/evm/src/bn254_arithmetic.rs b/evm/src/bn254_arithmetic.rs
index d1050560..c2f1e3d4 100644
--- a/evm/src/bn254_arithmetic.rs
+++ b/evm/src/bn254_arithmetic.rs
@@ -27,9 +27,9 @@ impl Fp {
 
 impl Distribution<Fp> for Standard {
     fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Fp {
-        let (x0, x1, x2, x3) = rng.gen::<(u64, u64, u64, u64)>();
+        let xs = rng.gen::<[u64; 4]>();
         Fp {
-            val: U256([x0, x1, x2, x3]) % BN_BASE,
+            val: U256(xs) % BN_BASE,
         }
     }
 }