From f0a6ec953522b688ef9c83aa33573c2da4f1cad2 Mon Sep 17 00:00:00 2001 From: Dmitry Vagner Date: Wed, 25 Jan 2023 14:42:30 +0700 Subject: [PATCH] clean asm --- .../bn254/curve_arithmetic/curve_add.asm | 47 ++++-- .../bn254/curve_arithmetic/miller_loop.asm | 146 +++++++++++------- .../curve/bn254/field_arithmetic/fp12_mul.asm | 43 ++++-- 3 files changed, 148 insertions(+), 88 deletions(-) diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm index e090e4e9..0ac947da 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm @@ -4,11 +4,13 @@ global ec_add: // stack: x0, y0, x1, y1, retdest // Check if points are valid BN254 points. - DUP2 DUP2 + DUP2 + DUP2 // stack: x0, y0, x0, y0, x1, y1, retdest %ec_check // stack: isValid(x0, y0), x0, y0, x1, y1, retdest - DUP5 DUP5 + DUP5 + DUP5 // stack: x1, y1 , isValid(x0, y0), x0, y0, x1, y1, retdest %ec_check // stack: isValid(x1, y1) , isValid(x0, y0), x0, y0, x1, y1, retdest @@ -28,7 +30,8 @@ global ec_add_valid_points: // stack: x0, y0, x1, y1, retdest // Check if the first point is the identity. - DUP2 DUP2 + DUP2 + DUP2 // stack: x0,y0 , x0, y0, x1, y1, retdest %ec_isidentity // stack: (0,0)==(x0,y0), x0, y0, x1, y1, retdest @@ -36,7 +39,8 @@ global ec_add_valid_points: // stack: x0, y0, x1, y1, retdest // Check if the second point is the identity. - DUP4 DUP4 + DUP4 + DUP4 // stack: x1,y1 , x0, y0, x1, y1, retdest %ec_isidentity // stack: (0,0)==(x1,y1), x0, y0, x1, y1, retdest @@ -44,7 +48,8 @@ global ec_add_valid_points: // stack: x0, y0, x1, y1, retdest // Check if both points have the same x-coordinate. - DUP3 DUP2 + DUP3 + DUP2 // stack: x0 , x1, x0, y0, x1, y1, retdest EQ // stack: x0 == x1, x0, y0, x1, y1, retdest @@ -54,11 +59,13 @@ global ec_add_valid_points: // stack: x0, y0, x1, y1, retdest // Otherwise, we can use the standard formula. // Compute lambda = (y0 - y1)/(x0 - x1) - DUP4 DUP3 + DUP4 + DUP3 // stack: y0 , y1, x0, y0, x1, y1, retdest SUBFP254 // stack: y0 - y1, x0, y0, x1, y1, retdest - DUP4 DUP3 + DUP4 + DUP3 // stack: x0 , x1, y0 - y1, x0, y0, x1, y1, retdest SUBFP254 // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest @@ -88,11 +95,13 @@ ec_add_valid_points_with_lambda: // stack: lambda, x0, y0, x1, y1, retdest // Compute x2 = lambda^2 - x1 - x0 - DUP2 DUP5 + DUP2 + DUP5 // stack: x1, x0, lambda, x0, y0, x1, y1, retdest DUP3 // stack: lambda , x1, x0, lambda, x0, y0, x1, y1, retdest - DUP1 MULFP254 + DUP1 + MULFP254 // stack: lambda^2 , x1, x0, lambda, x0, y0, x1, y1, retdest SUBFP254 // stack: lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest @@ -127,7 +136,8 @@ ec_add_equal_first_coord: // stack: x0, y0, x1, y1, retdest with x0 == x1 // Check if the points are equal - DUP2 DUP5 + DUP2 + DUP5 // stack: y1 , y0, x0, y0, x1, y1, retdest EQ // stack: y1 == y0, x0, y0, x1, y1, retdest @@ -153,7 +163,8 @@ ec_add_equal_points: DUP1 // stack: x0 , x0, y0, x1, y1, retdest - DUP1 MULFP254 + DUP1 + MULFP254 // stack: x0^2, x0, y0, x1, y1, retdest %bn_3_over_2 // stack: 3/2 , x0^2, x0, y0, x1, y1, retdest @@ -170,7 +181,8 @@ ec_add_equal_points: // Standard doubling formula. global ec_double: // stack: x0, y0, retdest - DUP2 DUP2 + DUP2 + DUP2 // stack: x0, y0, x0, y0, retdest %jump(ec_add_equal_points) @@ -213,13 +225,18 @@ global ec_double: // stack: y, x, range DUP2 // stack: x , y, x, range - DUP1 DUP1 MULFP254 MULFP254 + DUP1 + DUP1 + MULFP254 + MULFP254 // stack: x^3, y, x, range - PUSH 3 ADDFP254 + PUSH 3 + ADDFP254 // stack: 3 + x^3, y, x, range DUP2 // stack: y , 3 + x^3, y, x, range - DUP1 MULFP254 + DUP1 + MULFP254 // stack: y^2, 3 + x^3, y, x, range EQ // stack: curve, y, x, range diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm index 0c92143b..cd13f80e 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm @@ -27,23 +27,20 @@ global miller: // stack: ptr, out, retdest - PUSH 1 - // stack: 1, ptr, out, retdest - DUP3 + %stack (ptr, out) -> (out, 1, ptr, out) // stack: out, 1, ptr, out, retdest %mstore_kernel_general // stack: ptr, out, retdest %load_fp6 // stack: P, Q, out, retdest - DUP2 DUP2 - // stack: O, P, Q, out, retdest - PUSH 53 - // stack: 53, O, P, Q, out, retdest - PUSH 0 // this placeholder lets miller_loop start with POP + %stack (P: 2) -> (0, 53, P, P) + // stack: 0, 53, O, P, Q, out, retdest + // the head 0 lets miller_loop start with POP global miller_loop: POP // stack: times , O, P, Q, out, retdest - DUP1 ISZERO + DUP1 + ISZERO // stack: break?, times , O, P, Q, out, retdest %jumpi(miller_return) // stack: times , O, P, Q, out, retdest @@ -56,13 +53,14 @@ global miller_loop: %jump(miller_one) miller_return: // stack: times, O, P, Q, out, retdest - POP %pop2 %pop2 %pop4 POP + %stack (times, O: 2, P: 2, Q: 4, out, retdest) -> (retdest) // stack: retdest JUMP miller_one: // stack: 0xnm, times, O, P, Q, out, retdest - DUP1 %lt_const(0x20) + DUP1 + %lt_const(0x20) // stack: skip?, 0xnm, times, O, P, Q, out, retdest %jumpi(miller_zero) // stack: 0xnm, times, O, P, Q, out, retdest @@ -74,7 +72,8 @@ miller_one: miller_zero: // stack: m , times, O, P, Q, out, retdest - DUP1 ISZERO + DUP1 + ISZERO // stack: skip?, m , times, O, P, Q, out, retdest %jumpi(miller_loop) // stack: m , times, O, P, Q, out, retdest @@ -93,32 +92,42 @@ miller_zero: mul_tangent: // stack: retdest, 0xnm, times, O, P, Q, out - PUSH mul_tangent_2 DUP13 PUSH mul_tangent_1 + PUSH mul_tangent_2 + DUP13 + PUSH mul_tangent_1 // stack: mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out - DUP2 DUP1 + %stack (mul_tangent_1, out) -> (out, out, mul_tangent_1, out) // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out %jump(square_fp12) mul_tangent_1: // stack: out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out - DUP13 DUP13 DUP13 DUP13 + DUP13 + DUP13 + DUP13 + DUP13 // stack: Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out - DUP11 DUP11 + DUP11 + DUP11 // stack: O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out %tangent // stack: out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out {100: line} - PUSH 100 DUP2 + %stack (out) -> (out, 100, out) // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out {100: line} %jump(mul_fp12_sparse) mul_tangent_2: // stack: retdest, 0xnm, times, O, P, Q, out {100: line} PUSH after_double // stack: after_double, retdest, 0xnm, times, O, P, Q, out {100: line} - DUP6 DUP6 + DUP6 + DUP6 // stack: O, after_double, retdest, 0xnm, times, O, P, Q, out {100: line} %jump(ec_double) after_double: // stack: 2*O, retdest, 0xnm, times, O, P, Q, out {100: line} - SWAP5 POP SWAP5 POP + SWAP5 + POP + SWAP5 + POP // stack: retdest, 0xnm, times, 2*O, P, Q, out {100: line} JUMP @@ -131,31 +140,40 @@ mul_cord: // stack: 0xnm, times, O, P, Q, out PUSH mul_cord_1 // stack: mul_cord_1, 0xnm, times, O, P, Q, out - DUP11 DUP11 DUP11 DUP11 + DUP11 + DUP11 + DUP11 + DUP11 // stack: Q, mul_cord_1, 0xnm, times, O, P, Q, out - DUP9 DUP9 + DUP9 + DUP9 // stack: O, Q, mul_cord_1, 0xnm, times, O, P, Q, out - DUP13 DUP13 + DUP13 + DUP13 // stack: P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out %cord // stack: mul_cord_1, 0xnm, times, O, P, Q, out {100: line} DUP12 // stack: out, mul_cord_1, 0xnm, times, O, P, Q, out {100: line} - PUSH 100 - // stack: 100, out, mul_cord_1, 0xnm, times, O, P, Q, out {100: line} - DUP2 + %stack (out) -> (out, 100, out) // stack: out, 100, out, mul_cord_1, 0xnm, times, O, P, Q, out {100: line} %jump(mul_fp12_sparse) mul_cord_1: // stack: 0xnm, times, O , P, Q, out PUSH after_add // stack: after_add, 0xnm, times, O , P, Q, out - DUP7 DUP7 DUP7 DUP7 + DUP7 + DUP7 + DUP7 + DUP7 // stack: O , P, after_add, 0xnm, times, O , P, Q, out %jump(ec_add_valid_points) after_add: // stack: O + P, 0xnm, times, O , P, Q, out - SWAP4 POP SWAP4 POP + SWAP4 + POP + SWAP4 + POP // stack: 0xnm, times, O+P, P, Q, out %jump(miller_one) @@ -169,38 +187,42 @@ after_add: %macro tangent // stack: px, py, qx, qx_, qy, qy_ - PUSH 9 - // stack: 9, px, py, qx, qx_, qy, qy_ - DUP3 - // stack: py , 9, px, py, qx, qx_, qy, qy_ - DUP1 MULFP254 - // stack: py**2 , 9, px, py, qx, qx_, qy, qy_ + %stack (px, py) -> (py, py , 9, px, py) + // stack: py, py , 9, px, py, qx, qx_, qy, qy_ + MULFP254 + // stack: py^2 , 9, px, py, qx, qx_, qy, qy_ SUBFP254 - // stack: py**2 - 9, px, py, qx, qx_, qy, qy_ + // stack: py^2 - 9, px, py, qx, qx_, qy, qy_ %mstore_kernel_general(100) // stack: px, py, qx, qx_, qy, qy_ - DUP1 MULFP254 - // stack: px**2, py, qx, qx_, qy, qy_ - PUSH 3 MULFP254 - // stack: 3*px**2, py, qx, qx_, qy, qy_ - PUSH 0 SUBFP254 - // stack: -3*px**2, py, qx, qx_, qy, qy_ - SWAP2 - // stack: qx, py, -3px**2, qx_, qy, qy_ - DUP3 MULFP254 - // stack: (-3*px**2)qx, py, -3px**2, qx_, qy, qy_ - %mstore_kernel_general(102) - // stack: py, -3px**2, qx_, qy, qy_ - PUSH 2 MULFP254 - // stack: 2py, -3px**2, qx_, qy, qy_ - SWAP3 - // stack: qy, -3px**2, qx_, 2py, qy_ - DUP4 MULFP254 - // stack: (2py)qy, -3px**2, qx_, 2py, qy_ - %mstore_kernel_general(108) - // stack: -3px**2, qx_, 2py, qy_ + DUP1 MULFP254 - // stack: (-3px**2)*qx_, 2py, qy_ + // stack: px^2, py, qx, qx_, qy, qy_ + PUSH 3 + MULFP254 + // stack: 3*px^2, py, qx, qx_, qy, qy_ + PUSH 0 + SUBFP254 + // stack: -3*px^2, py, qx, qx_, qy, qy_ + SWAP2 + // stack: qx, py, -3px^2, qx_, qy, qy_ + DUP3 + MULFP254 + // stack: (-3*px^2)qx, py, -3px^2, qx_, qy, qy_ + %mstore_kernel_general(102) + // stack: py, -3px^2, qx_, qy, qy_ + PUSH 2 + MULFP254 + // stack: 2py, -3px^2, qx_, qy, qy_ + SWAP3 + // stack: qy, -3px^2, qx_, 2py, qy_ + DUP4 + MULFP254 + // stack: (2py)qy, -3px^2, qx_, 2py, qy_ + %mstore_kernel_general(108) + // stack: -3px^2, qx_, 2py, qy_ + MULFP254 + // stack: (-3px^2)*qx_, 2py, qy_ %mstore_kernel_general(103) // stack: 2py, qy_ MULFP254 @@ -217,9 +239,13 @@ after_add: %macro cord // stack: p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ - DUP1 DUP5 MULFP254 + DUP1 + DUP5 + MULFP254 // stack: p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ - DUP3 DUP5 MULFP254 + DUP3 + DUP5 + MULFP254 // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ SUBFP254 // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ @@ -235,13 +261,15 @@ after_add: // stack: p1x - p2x, p2y - p1y, qx, qx_, qy, qy_ SWAP4 // stack: qy, p2y - p1y, qx, qx_, p1x - p2x, qy_ - DUP5 MULFP254 + DUP5 + MULFP254 // stack: (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_ %mstore_kernel_general(108) // stack: p2y - p1y, qx, qx_, p1x - p2x, qy_ SWAP1 // stack: qx, p2y - p1y, qx_, p1x - p2x, qy_ - DUP2 MULFP254 + DUP2 + MULFP254 // stack: (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_ %mstore_kernel_general(102) // stack: p2y - p1y, qx_, p1x - p2x, qy_ diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm index ee8804c7..3069107f 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/fp12_mul.asm @@ -45,11 +45,13 @@ global mul_fp12: // stack: inA, inB, out - DUP1 %offset_fp6 + DUP1 + %offset_fp6 // stack: inA', inA, inB, out %load_fp6 // stack: f', inA, inB, out - DUP8 %offset_fp6 + DUP8 + %offset_fp6 // stack: inB', f', inA, inB, out %load_fp6 // stack: g', f', inA, inB, out @@ -118,7 +120,8 @@ mul_fp12_3: // stack: f'g'+fg, (f+f')(g+g'), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %subr_fp6 // stack: (f+f')(g+g') - (f'g'+fg), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} - DUP14 %offset_fp6 + DUP14 + %offset_fp6 // stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %store_fp6 // stack: fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} @@ -130,7 +133,8 @@ mul_fp12_3: // stack: out, sh(f'g') + fg, inB, out {0: sh(f'g'), 6: f'g', 12: fg} %store_fp6 // stack: inB, out {0: sh(f'g'), 6: f'g', 12: fg} - %pop2 JUMP + %pop2 + JUMP ////////////////////////////////////// @@ -178,7 +182,8 @@ mul_fp12_3: global mul_fp12_sparse: // stack: inA, inB, out - DUP1 %offset_fp6 + DUP1 + %offset_fp6 // stack: inA', inA, inB, out %load_fp6 // stack: f', inA, inB, out @@ -210,7 +215,8 @@ global mul_fp12_sparse: // stack: g0 * f, f', inB, f, inB, f', out, f, inB, f', inA, inB, out %swap_fp6 // stack: f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out - DUP13 %add_const(8) + DUP13 + %add_const(8) // stack: inB2, f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out %load_fp2 // stack: G2 , f' , g0 * f, inB, f, inB, f', out, f, inB, f', inA, inB, out @@ -238,7 +244,8 @@ global mul_fp12_sparse: // stack: g0 * f', out, f, inB, f', inA, inB, out %swap_fp6_hole // stack: f , out, g0 * f', inB, f', inA, inB, out - DUP14 %add_const(8) + DUP14 + %add_const(8) // stack: inB2, f , out, g0 * f', inB, f', inA, inB, out %load_fp2 // stack: G2 , f , out, g0 * f', inB, f', inA, inB, out @@ -248,7 +255,8 @@ global mul_fp12_sparse: // stack: G2 * sh(f) + g0 * f', inB, f', inA, inB, out %swap_fp6_hole // stack: f' , inB, G2 * sh(f) + g0 * f', inA, inB, out - DUP7 %add_const(2) + DUP7 + %add_const(2) // stack: inB1, f' , inB, G2 * sh(f) + g0 * f', inA, inB, out %load_fp2 // stack: G1 , f' , inB, G2 * sh(f) + g0 * f', inA, inB, out @@ -256,11 +264,13 @@ global mul_fp12_sparse: // stack: G1 * sh(f'), inB, G2 * sh(f) + g0 * f', inA, inB, out %add_fp6_hole // stack: G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out - DUP9 %offset_fp6 + DUP9 + %offset_fp6 // stack: out', G1 * sh(f') + G2 * sh(f) + g0 * f', inA, inB, out %store_fp6 // stack: inA, inB, out - %pop3 JUMP + %pop3 + JUMP ///////////////////////// @@ -324,11 +334,13 @@ global square_fp12: // stack: square_fp12_2, inp, f, square_fp12_3, out %dup_fp6_2 // stack: f , square_fp12_2, inp, f, square_fp12_3, out - DUP16 %offset_fp6 + DUP16 + %offset_fp6 // stack: out', f , square_fp12_2, inp, f, square_fp12_3, out PUSH square_fp12_1 // stack: square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out - DUP10 %offset_fp6 + DUP10 + %offset_fp6 // stack: inp', square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out %load_fp6 // stack: f', square_fp12_1, out', f , square_fp12_2, inp, f, square_fp12_3, out @@ -352,7 +364,9 @@ square_fp12_2: // stack: sh(f'f'), inp, f, square_fp12_3, out %swap_fp6_hole // stack: f, inp, sh(f'f'), square_fp12_3, out - SWAP6 SWAP13 SWAP6 + SWAP6 + SWAP13 + SWAP6 // stack: f, square_fp12_3, sh(f'f'), inp, out %jump(square_fp6) square_fp12_3: @@ -363,4 +377,5 @@ square_fp12_3: // stack: out, ff + sh(f'f'), inp, out %store_fp6 // stack: inp, out - %pop2 JUMP + %pop2 + JUMP