From f0a6ec953522b688ef9c83aa33573c2da4f1cad2 Mon Sep 17 00:00:00 2001
From: Dmitry Vagner <dmitry.vagner@gmail.com>
Date: Wed, 25 Jan 2023 14:42:30 +0700
Subject: [PATCH] clean asm

---
 .../bn254/curve_arithmetic/curve_add.asm      |  47 ++++--
 .../bn254/curve_arithmetic/miller_loop.asm    | 146 +++++++++++-------
 .../curve/bn254/field_arithmetic/fp12_mul.asm |  43 ++++--
 3 files changed, 148 insertions(+), 88 deletions(-)

diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
index e090e4e9..0ac947da 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm
@@ -4,11 +4,13 @@ global ec_add:
     // stack:                                    x0, y0, x1, y1, retdest
 
     // Check if points are valid BN254 points.
-    DUP2  DUP2    
+    DUP2
+    DUP2    
     // stack:                            x0, y0, x0, y0, x1, y1, retdest
     %ec_check
     // stack:                   isValid(x0, y0), x0, y0, x1, y1, retdest
-    DUP5  DUP5    
+    DUP5
+    DUP5    
     // stack:         x1, y1  , isValid(x0, y0), x0, y0, x1, y1, retdest
     %ec_check
     // stack: isValid(x1, y1) , isValid(x0, y0), x0, y0, x1, y1, retdest
@@ -28,7 +30,8 @@ global ec_add_valid_points:
     // stack:                   x0, y0, x1, y1, retdest
 
     // Check if the first point is the identity.
-    DUP2  DUP2
+    DUP2
+    DUP2
     // stack:           x0,y0 , x0, y0, x1, y1, retdest
     %ec_isidentity
     // stack:   (0,0)==(x0,y0), x0, y0, x1, y1, retdest
@@ -36,7 +39,8 @@ global ec_add_valid_points:
     // stack:                   x0, y0, x1, y1, retdest
 
     // Check if the second point is the identity.
-    DUP4  DUP4    
+    DUP4
+    DUP4    
     // stack:           x1,y1 , x0, y0, x1, y1, retdest
     %ec_isidentity
     // stack:   (0,0)==(x1,y1), x0, y0, x1, y1, retdest
@@ -44,7 +48,8 @@ global ec_add_valid_points:
     // stack:                   x0, y0, x1, y1, retdest
 
     // Check if both points have the same x-coordinate.
-    DUP3  DUP2    
+    DUP3
+    DUP2    
     // stack:         x0 ,  x1, x0, y0, x1, y1, retdest
     EQ
     // stack:         x0 == x1, x0, y0, x1, y1, retdest
@@ -54,11 +59,13 @@ global ec_add_valid_points:
     // stack:                   x0, y0, x1, y1, retdest
     // Otherwise, we can use the standard formula.
     // Compute lambda = (y0 - y1)/(x0 - x1)
-    DUP4  DUP3
+    DUP4
+    DUP3
     // stack:          y0 , y1, x0, y0, x1, y1, retdest
     SUBFP254
     // stack:          y0 - y1, x0, y0, x1, y1, retdest
-    DUP4  DUP3
+    DUP4
+    DUP3
     // stack: x0 , x1, y0 - y1, x0, y0, x1, y1, retdest
     SUBFP254
     // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest
@@ -88,11 +95,13 @@ ec_add_valid_points_with_lambda:
     // stack:                             lambda, x0, y0, x1, y1, retdest
 
     // Compute x2 = lambda^2 - x1 - x0
-    DUP2  DUP5
+    DUP2
+    DUP5
     // stack:                     x1, x0, lambda, x0, y0, x1, y1, retdest
     DUP3
     // stack:          lambda   , x1, x0, lambda, x0, y0, x1, y1, retdest
-    DUP1  MULFP254
+    DUP1
+    MULFP254
     // stack:          lambda^2 , x1, x0, lambda, x0, y0, x1, y1, retdest
     SUBFP254
     // stack:          lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest
@@ -127,7 +136,8 @@ ec_add_equal_first_coord:
     // stack:           x0, y0, x1, y1, retdest with x0 == x1
 
     // Check if the points are equal
-    DUP2  DUP5
+    DUP2
+    DUP5
     // stack: y1  , y0, x0, y0, x1, y1, retdest
     EQ
     // stack: y1 == y0, x0, y0, x1, y1, retdest
@@ -153,7 +163,8 @@ ec_add_equal_points:
 
     DUP1
     // stack:           x0  , x0, y0, x1, y1, retdest
-    DUP1  MULFP254
+    DUP1
+    MULFP254
     // stack:           x0^2, x0, y0, x1, y1, retdest
     %bn_3_over_2
     // stack:     3/2 , x0^2, x0, y0, x1, y1, retdest
@@ -170,7 +181,8 @@ ec_add_equal_points:
 // Standard doubling formula.
 global ec_double:
     // stack:         x0, y0, retdest
-    DUP2  DUP2    
+    DUP2
+    DUP2    
     // stack: x0, y0, x0, y0, retdest
     %jump(ec_add_equal_points)
 
@@ -213,13 +225,18 @@ global ec_double:
     // stack:                y, x, range
     DUP2 
     // stack:           x  , y, x, range
-    DUP1  DUP1  MULFP254  MULFP254
+    DUP1 
+    DUP1
+    MULFP254
+    MULFP254
     // stack:           x^3, y, x, range
-    PUSH 3  ADDFP254
+    PUSH 3
+    ADDFP254
     // stack:       3 + x^3, y, x, range
     DUP2
     // stack:  y  , 3 + x^3, y, x, range
-    DUP1  MULFP254
+    DUP1
+    MULFP254
     // stack:  y^2, 3 + x^3, y, x, range
     EQ
     // stack:         curve, y, x, range
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
index 0c92143b..cd13f80e 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm
@@ -27,23 +27,20 @@
 
 global miller:
     // stack:         ptr, out, retdest
-    PUSH 1
-    // stack:      1, ptr, out, retdest
-    DUP3
+    %stack (ptr, out) -> (out, 1, ptr, out)
     // stack: out, 1, ptr, out, retdest
     %mstore_kernel_general
     // stack:         ptr, out, retdest
     %load_fp6
     // stack:        P, Q, out, retdest
-    DUP2  DUP2
-    // stack:     O, P, Q, out, retdest
-    PUSH 53
-    // stack: 53, O, P, Q, out, retdest
-    PUSH 0 // this placeholder lets miller_loop start with POP
+    %stack (P: 2) -> (0, 53, P, P)
+    // stack: 0, 53, O, P, Q, out, retdest
+    // the head 0 lets miller_loop start with POP
 global miller_loop:
     POP
     // stack:          times  , O, P, Q, out, retdest
-    DUP1  ISZERO
+    DUP1  
+    ISZERO
     // stack:  break?, times  , O, P, Q, out, retdest
     %jumpi(miller_return)
     // stack:          times  , O, P, Q, out, retdest
@@ -56,13 +53,14 @@ global miller_loop:
     %jump(miller_one)
 miller_return:
     // stack: times, O, P, Q, out, retdest
-    POP  %pop2  %pop2  %pop4  POP
+    %stack (times, O: 2, P: 2, Q: 4, out, retdest) -> (retdest)
     // stack:                      retdest
     JUMP 
 
 miller_one:
     // stack:               0xnm, times, O, P, Q, out, retdest
-    DUP1  %lt_const(0x20) 
+    DUP1  
+    %lt_const(0x20) 
     // stack:        skip?, 0xnm, times, O, P, Q, out, retdest
     %jumpi(miller_zero)
     // stack:               0xnm, times, O, P, Q, out, retdest
@@ -74,7 +72,8 @@ miller_one:
 
 miller_zero:
     // stack:              m  , times, O, P, Q, out, retdest
-    DUP1  ISZERO
+    DUP1  
+    ISZERO
     // stack:       skip?, m  , times, O, P, Q, out, retdest
     %jumpi(miller_loop)
     // stack:              m  , times, O, P, Q, out, retdest
@@ -93,32 +92,42 @@ miller_zero:
 
 mul_tangent:
     // stack:                                              retdest, 0xnm, times, O, P, Q, out
-    PUSH mul_tangent_2  DUP13  PUSH mul_tangent_1
+    PUSH mul_tangent_2  
+    DUP13  
+    PUSH mul_tangent_1
     // stack:           mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP2  DUP1
+    %stack (mul_tangent_1, out) -> (out, out, mul_tangent_1, out)
     // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %jump(square_fp12)
 mul_tangent_1:
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP13  DUP13  DUP13  DUP13
+    DUP13
+    DUP13
+    DUP13
+    DUP13
     // stack:        Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
-    DUP11  DUP11
+    DUP11  
+    DUP11
     // stack:     O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out
     %tangent
     // stack:           out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
-    PUSH 100  DUP2
+    %stack (out) -> (out, 100, out)
     // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out  {100: line}
     %jump(mul_fp12_sparse)
 mul_tangent_2:
     // stack:                  retdest, 0xnm, times,   O, P, Q, out  {100: line}
     PUSH after_double
     // stack:    after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    DUP6  DUP6
+    DUP6  
+    DUP6
     // stack: O, after_double, retdest, 0xnm, times,   O, P, Q, out  {100: line}
     %jump(ec_double)
 after_double:
     // stack:             2*O, retdest, 0xnm, times,   O, P, Q, out  {100: line}
-    SWAP5  POP  SWAP5  POP
+    SWAP5
+    POP
+    SWAP5
+    POP
     // stack:                  retdest, 0xnm, times, 2*O, P, Q, out  {100: line}
     JUMP
 
@@ -131,31 +140,40 @@ mul_cord:
     // stack:                            0xnm, times, O, P, Q, out
     PUSH mul_cord_1
     // stack:                mul_cord_1, 0xnm, times, O, P, Q, out
-    DUP11  DUP11  DUP11  DUP11
+    DUP11  
+    DUP11  
+    DUP11  
+    DUP11
     // stack:             Q, mul_cord_1, 0xnm, times, O, P, Q, out
-    DUP9  DUP9
+    DUP9  
+    DUP9
     // stack:          O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
-    DUP13  DUP13
+    DUP13  
+    DUP13
     // stack:       P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out
     %cord 
     // stack:                mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     DUP12
     // stack:           out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
-    PUSH 100
-    // stack:      100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
-    DUP2
+    %stack (out) -> (out, 100, out)
     // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out  {100: line}
     %jump(mul_fp12_sparse)
 mul_cord_1:
     // stack:                   0xnm, times, O  , P, Q, out
     PUSH after_add
     // stack:        after_add, 0xnm, times, O  , P, Q, out
-    DUP7  DUP7  DUP7  DUP7
+    DUP7  
+    DUP7  
+    DUP7  
+    DUP7
     // stack: O , P, after_add, 0xnm, times, O  , P, Q, out
     %jump(ec_add_valid_points)
 after_add:
     // stack:            O + P, 0xnm, times, O  , P, Q, out
-    SWAP4  POP  SWAP4  POP
+    SWAP4
+    POP
+    SWAP4
+    POP
     // stack:                   0xnm, times, O+P, P, Q, out
     %jump(miller_one)
 
@@ -169,38 +187,42 @@ after_add:
 
 %macro tangent
     // stack:                px, py, qx, qx_,  qy, qy_
-    PUSH 9
-    // stack:             9, px, py, qx, qx_,  qy, qy_
-    DUP3
-    // stack:        py , 9, px, py, qx, qx_,  qy, qy_
-    DUP1  MULFP254
-    // stack:     py**2 , 9, px, py, qx, qx_,  qy, qy_
+    %stack (px, py) -> (py, py , 9, px, py)
+    // stack:    py, py , 9, px, py, qx, qx_,  qy, qy_
+    MULFP254
+    // stack:      py^2 , 9, px, py, qx, qx_,  qy, qy_
     SUBFP254
-    // stack:     py**2 - 9, px, py, qx, qx_,  qy, qy_
+    // stack:      py^2 - 9, px, py, qx, qx_,  qy, qy_
     %mstore_kernel_general(100)
     // stack:                px, py, qx, qx_,  qy, qy_
-    DUP1  MULFP254
-    // stack:             px**2, py, qx, qx_,  qy, qy_
-    PUSH 3  MULFP254
-    // stack:           3*px**2, py, qx, qx_,  qy, qy_
-    PUSH 0  SUBFP254
-    // stack:          -3*px**2, py, qx, qx_,  qy, qy_
-    SWAP2
-    // stack:           qx, py, -3px**2, qx_,  qy, qy_
-    DUP3  MULFP254
-    // stack: (-3*px**2)qx, py, -3px**2, qx_,  qy, qy_ 
-    %mstore_kernel_general(102)
-    // stack:               py, -3px**2, qx_,  qy, qy_ 
-    PUSH 2  MULFP254
-    // stack:              2py, -3px**2, qx_,  qy, qy_ 
-    SWAP3 
-    // stack:               qy, -3px**2, qx_, 2py, qy_ 
-    DUP4  MULFP254
-    // stack:          (2py)qy, -3px**2, qx_, 2py, qy_ 
-    %mstore_kernel_general(108)
-    // stack:                   -3px**2, qx_, 2py, qy_ 
+    DUP1  
     MULFP254
-    // stack:                  (-3px**2)*qx_, 2py, qy_ 
+    // stack:              px^2, py, qx, qx_,  qy, qy_
+    PUSH 3  
+    MULFP254
+    // stack:            3*px^2, py, qx, qx_,  qy, qy_
+    PUSH 0  
+    SUBFP254
+    // stack:           -3*px^2, py, qx, qx_,  qy, qy_
+    SWAP2
+    // stack:            qx, py, -3px^2, qx_,  qy, qy_
+    DUP3  
+    MULFP254
+    // stack:   (-3*px^2)qx, py, -3px^2, qx_,  qy, qy_ 
+    %mstore_kernel_general(102)
+    // stack:                py, -3px^2, qx_,  qy, qy_ 
+    PUSH 2  
+    MULFP254
+    // stack:               2py, -3px^2, qx_,  qy, qy_ 
+    SWAP3 
+    // stack:                qy, -3px^2, qx_, 2py, qy_ 
+    DUP4  
+    MULFP254
+    // stack:           (2py)qy, -3px^2, qx_, 2py, qy_ 
+    %mstore_kernel_general(108)
+    // stack:                    -3px^2, qx_, 2py, qy_ 
+    MULFP254
+    // stack:                   (-3px^2)*qx_, 2py, qy_ 
     %mstore_kernel_general(103)
     // stack:                                 2py, qy_ 
     MULFP254
@@ -217,9 +239,13 @@ after_add:
 
 %macro cord
     // stack:                    p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    DUP1  DUP5  MULFP254
+    DUP1  
+    DUP5  
+    MULFP254
     // stack:           p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
-    DUP3  DUP5  MULFP254
+    DUP3  
+    DUP5  
+    MULFP254
     // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
     SUBFP254
     // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_
@@ -235,13 +261,15 @@ after_add:
     // stack:                    p1x - p2x, p2y - p1y, qx, qx_, qy, qy_
     SWAP4
     // stack:                    qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
-    DUP5  MULFP254
+    DUP5
+    MULFP254
     // stack:         (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_
     %mstore_kernel_general(108)
     // stack:                        p2y - p1y, qx, qx_, p1x - p2x, qy_
     SWAP1
     // stack:                        qx, p2y - p1y, qx_, p1x - p2x, qy_
-    DUP2  MULFP254
+    DUP2
+    MULFP254
     // stack:             (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_
     %mstore_kernel_general(102)
     // stack:                            p2y - p1y, qx_, p1x - p2x, qy_
diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
index ee8804c7..3069107f 100644
--- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
+++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm
@@ -45,11 +45,13 @@
 
 global mul_fp12:
     // stack:                                inA, inB, out 
-    DUP1  %offset_fp6 
+    DUP1  
+    %offset_fp6 
     // stack:                          inA', inA, inB, out 
     %load_fp6
     // stack:                            f', inA, inB, out 
-    DUP8  %offset_fp6
+    DUP8  
+    %offset_fp6
     // stack:                      inB', f', inA, inB, out 
     %load_fp6
     // stack:                        g', f', inA, inB, out 
@@ -118,7 +120,8 @@ mul_fp12_3:
     // stack:          f'g'+fg, (f+f')(g+g'), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %subr_fp6
     // stack:       (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
-    DUP14  %offset_fp6 
+    DUP14  
+    %offset_fp6 
     // stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}   
     %store_fp6
     // stack:                                 fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
@@ -130,7 +133,8 @@ mul_fp12_3:
     // stack:                 out, sh(f'g') + fg, inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
     %store_fp6
     // stack:                                     inB, out  {0: sh(f'g'), 6: f'g', 12: fg}
-    %pop2  JUMP
+    %pop2  
+    JUMP
 
 
 //////////////////////////////////////
@@ -178,7 +182,8 @@ mul_fp12_3:
 
 global mul_fp12_sparse:
     // stack:                                                                    inA, inB, out
-    DUP1  %offset_fp6
+    DUP1  
+    %offset_fp6
     // stack:                                                              inA', inA, inB, out
     %load_fp6
     // stack:                                                                f', inA, inB, out
@@ -210,7 +215,8 @@ global mul_fp12_sparse:
     // stack:                      g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out
     %swap_fp6
     // stack:                    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
-    DUP13  %add_const(8)
+    DUP13
+    %add_const(8)
     // stack:           inB2,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
     %load_fp2
     // stack:           G2  ,    f'  , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out
@@ -238,7 +244,8 @@ global mul_fp12_sparse:
     // stack:                                          g0 * f', out, f, inB, f', inA, inB, out
     %swap_fp6_hole
     // stack:                                        f  , out, g0 * f', inB, f', inA, inB, out
-    DUP14  %add_const(8)
+    DUP14
+    %add_const(8)
     // stack:                               inB2,    f  , out, g0 * f', inB, f', inA, inB, out
     %load_fp2
     // stack:                                G2 ,    f  , out, g0 * f', inB, f', inA, inB, out
@@ -248,7 +255,8 @@ global mul_fp12_sparse:
     // stack:                                     G2 * sh(f) + g0 * f', inB, f', inA, inB, out
     %swap_fp6_hole
     // stack:                                    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
-    DUP7  %add_const(2)
+    DUP7
+    %add_const(2)
     // stack:                           inB1,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
     %load_fp2
     // stack:                            G1 ,    f' , inB, G2 * sh(f) + g0 * f', inA, inB, out
@@ -256,11 +264,13 @@ global mul_fp12_sparse:
     // stack:                            G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out
     %add_fp6_hole
     // stack:                                G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
-    DUP9  %offset_fp6
+    DUP9
+    %offset_fp6
     // stack:                          out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out
     %store_fp6
     // stack:                                                                    inA, inB, out
-    %pop3  JUMP
+    %pop3
+    JUMP
 
 
 /////////////////////////
@@ -324,11 +334,13 @@ global square_fp12:
     // stack:                                  square_fp12_2, inp, f, square_fp12_3, out 
     %dup_fp6_2
     // stack:                              f , square_fp12_2, inp, f, square_fp12_3, out
-    DUP16  %offset_fp6
+    DUP16
+    %offset_fp6
     // stack:                        out', f , square_fp12_2, inp, f, square_fp12_3, out
     PUSH square_fp12_1
     // stack:         square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
-    DUP10  %offset_fp6
+    DUP10
+    %offset_fp6
     // stack:   inp', square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
     %load_fp6
     // stack:     f', square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out
@@ -352,7 +364,9 @@ square_fp12_2:
     // stack:                                       sh(f'f'), inp, f, square_fp12_3, out
     %swap_fp6_hole
     // stack:                                       f, inp, sh(f'f'), square_fp12_3, out
-    SWAP6  SWAP13  SWAP6
+    SWAP6
+    SWAP13
+    SWAP6
     // stack:                                       f, square_fp12_3, sh(f'f'), inp, out
     %jump(square_fp6)
 square_fp12_3:
@@ -363,4 +377,5 @@ square_fp12_3:
     // stack:                                               out, ff + sh(f'f'), inp, out
     %store_fp6
     // stack:                                                                   inp, out
-    %pop2  JUMP
+    %pop2
+    JUMP