diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index bb2dce92..0080b351 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -27,6 +27,7 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"), include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"), include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/lines.asm"), include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"), include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"), include_str!("asm/curve/bn254/field_arithmetic/moddiv.asm"), diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm index 13807c41..573d4c04 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/constants.asm @@ -1,9 +1,9 @@ global miller_data: - BYTES 0x21, 0x13, 0x11, 0x61, 0x52, 0x24, 0x21, 0x21, - BYTES 0x11, 0x25, 0x13, 0x15, 0x44, 0x12, 0x21, 0x13, - BYTES 0x11, 0x11, 0x32, 0x33, 0x14, 0x21, 0x11, 0x13, - BYTES 0x12, 0x11, 0x11, 0x21, 0x11, 0x46, 0x11, 0x22, - BYTES 0x31, 0x11, 0x24, 0x11, 0x11, 0x26, 0x16, 0x21, - BYTES 0x21, 0x21, 0x11, 0x13, 0x15, 0x11, 0x34, 0x21, - BYTES 0x12, 0x11, 0x17, 0x21, 0x23, 0x12, 0x34, 0x11, + BYTES 0x21, 0x13, 0x11, 0x61, 0x52, 0x24, 0x21, 0x21 + BYTES 0x11, 0x25, 0x13, 0x15, 0x44, 0x12, 0x21, 0x13 + BYTES 0x11, 0x11, 0x32, 0x33, 0x14, 0x21, 0x11, 0x13 + BYTES 0x12, 0x11, 0x11, 0x21, 0x11, 0x46, 0x11, 0x22 + BYTES 0x31, 0x11, 0x24, 0x11, 0x11, 0x26, 0x16, 0x21 + BYTES 0x21, 0x21, 0x11, 0x13, 0x15, 0x11, 0x34, 0x21 + BYTES 0x12, 0x11, 0x17, 0x21, 0x23, 0x12, 0x34, 0x11 BYTES 0x32, 0x32, 0x12, 0x13, 0x22, 0x15 \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm index a9297076..f4e8bed9 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/lines.asm @@ -6,9 +6,14 @@ /// (-3*px**2) * qx, /// (2*py) * qy, +%macro tangent +%endmacro + /// def cord(p1x, p1y, p2x, p2y, qx, qy): /// return /// p1y*p2x - p2y*p1x, /// (p2y - p1y) * qx, /// (p1x - p2x) * qy, - \ No newline at end of file + +%macro cord +%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm index 5ff74c6f..463e9573 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm @@ -114,7 +114,7 @@ mul_tangent_1: // stack: O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out %tangent // stack: line, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out - %sparse_store(100) + %sparse_store // stack: out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out {100: line} PUSH 100 DUP2 // stack: out, 100, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out {100: line} @@ -123,7 +123,7 @@ mul_tangent_2: // stack: out, retdest, 0xnm, times, O, P, Q, out {100: line} POP DUP5 DUP5 // stack: O, retdest, 0xnm, times, O, P, Q, out {100: line} - %ec_double_bn254 + // %ec_double_bn254 // stack: 2*O, retdest, 0xnm, times, O, P, Q, out {100: line} SWAP5 SWAP1 SWAP6 SWAP1 // stack: 2*O, retdest, 0xnm, times, 2*O, P, Q, out {100: line} @@ -145,7 +145,7 @@ mul_cord: // stack: O, P, Q, mul_cord_1, 0xnm, times, O, P, Q, out %cord // stack: line, mul_cord_1, 0xnm, times, O, P, Q, out - %sparse_store(100) + %sparse_store // stack: mul_cord_1, 0xnm, times, O, P, Q, out DUP12 // stack: out, mul_cord_1, 0xnm, times, O, P, Q, out @@ -158,8 +158,20 @@ mul_cord_1: // stack: 0xnm, times, O , P, Q, out DUP6 DUP6 DUP6 DUP6 // stack: O , P, 0xnm, times, O , P, Q, out - %ec_add_bn254 + // %ec_add_bn254 // stack: O + P, 0xnm, times, O , P, Q, out SWAP4 SWAP1 SWAP5 SWAP1 // stack: 0xnm, times, O+P, P, Q, out %jump(miller_one) + + +%macro sparse_store + // stack: g0, G1, G1' + PUSH 100 %mstore_kernel_general + // stack: G1, G1' + PUSH 102 %mstore_kernel_general + PUSH 103 %mstore_kernel_general + // stack: G1' + PUSH 108 %mstore_kernel_general + PUSH 109 %mstore_kernel_general +%endmacro diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm index 93ba4d6e..1065fd3a 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm @@ -31,7 +31,7 @@ global post_mllr: // stack: 100, out, tate_mul1, tate_mul2, tate_mul3, retdest DUP2 // stack: out, 100, out, tate_mul1, tate_mul2, tate_mul3, retdest - %inverse_fp12 + // %inverse_fp12 // stack: 100, out, tate_mul1, tate_mul2, tate_mul3, retdest {100: inv} DUP2 // stack: out, 100, out, tate_mul1, tate_mul2, tate_mul3, retdest {100: inv} @@ -57,7 +57,7 @@ tate_mul2: // stack: 100, post_pow, out, tate_mul3, retdest {100: acc} DUP3 // stack: out, 100, post_pow, out, tate_mul3, retdest {100: acc} - %jump(power) + // %jump(power) post_pow: // stack: 100, out, tate_mul3, retdest {100: pow} DUP2 diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm index 73736194..a76ed2ae 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/field_macros.asm @@ -588,13 +588,103 @@ // stack: c * f0, c * f1, c * f2, c * f3, c * f4, c * f5 %endmacro +/// cost: +/// +/// G0 + G1t + G2t^2 = (a+bi) * (F0 + F1t + F2t^2) +/// = (a+bi)F0 + (a+bi)F1t + (a+bi)F2t^2 +/// +/// G0 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i +/// G1 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i +/// G2 = (a+bi)(f2+f2_i) = (af2 - bf2_) + (bf2 + af2_)i + +%macro mul_fp2_fp6 + // stack: a, b, f0, f0_, f1, f1_, f2, f2_ + DUP2 + DUP5 + MULFP254 + // stack: bf0_, a, b, f0, f0_, f1, f1_, f2, f2_ + DUP2 + DUP5 + MULFP254 + // stack: af0, bf0_, a, b, f0, f0_, f1, f1_, f2, f2_ + SUBFP254 + // stack: g0, a, b, f0, f0_, f1, f1_, f2, f2_ + SWAP3 + // stack: f0, a, b, g0, f0_, f1, f1_, f2, f2_ + DUP3 + MULFP254 + // stack: bf0, a, b, g0, f0_, f1, f1_, f2, f2_ + SWAP1 + SWAP4 + // stack: f0_, bf0, b, g0, a, f1, f1_, f2, f2_ + DUP5 + MULFP254 + // stack: af0_, bf0, b, g0, a, f1, f1_, f2, f2_ + ADDFP254 + // stack: g0_, b, g0, a, f1, f1_, f2, f2_ + SWAP3 + // stack: a, b, g0, g0_, f1, f1_, f2, f2_ + DUP2 + DUP7 + MULFP254 + // stack: bf1_, a, b, g0, g0_, f1, f1_, f2, f2_ + DUP2 + DUP7 + MULFP254 + // stack: af1, bf1_, a, b, g0, g0_, f1, f1_, f2, f2_ + SUBFP254 + // stack: g1, a, b, g0, g0_, f1, f1_, f2, f2_ + SWAP5 + // stack: f1, a, b, g0, g0_, g1, f1_, f2, f2_ + DUP3 + MULFP254 + // stack: bf1, a, b, g0, g0_, g1, f1_, f2, f2_ + SWAP1 + SWAP6 + // stack: f1_, bf1, b, g0, g0_, g1, a, f2, f2_ + DUP7 + MULFP254 + // stack: af1_, bf1, b, g0, g0_, g1, a, f2, f2_ + ADDFP254 + // stack: g1_, b, g0, g0_, g1, a, f2, f2_ + SWAP5 + // stack: a, b, g0, g0_, g1, g1_, f2, f2_ + DUP2 + DUP9 + MULFP254 + // stack: bf2_, a, b, g0, g0_, g1, g1_, f2, f2_ + DUP2 + DUP9 + MULFP254 + // stack: af2, bf2_, a, b, g0, g0_, g1, g1_, f2, f2_ + SUBFP254 + // stack: g2, a, b, g0, g0_, g1, g1_, f2, f2_ + SWAP7 + // stack: f2, a, b, g0, g0_, g1, g1_, g2, f2_ + SWAP8 + // stack: f2_, a, b, g0, g0_, g1, g1_, g2, f2 + MULFP254 + // stack: af2_, b, g0, g0_, g1, g1_, g2, f2 + SWAP7 + // stack: f2, b, g0, g0_, g1, g1_, g2, af2_ + MULFP254 + // stack: bf2, g0, g0_, g1, g1_, g2, af2_ + SWAP1 + SWAP6 + // stack: af2_, bf2, g0_, g1, g1_, g2, g0 + ADDFP254 + // stack: g2_, g0_, g1, g1_, g2, g0 + SWAP5 + // stack: g0, g0_, g1, g1_, g2, g2_ +%endmacro + /// cost: 1 i9 (9) + 16 dups + 15 swaps + 12 muls + 6 adds/subs = 58 /// /// G0 + G1t + G2t^2 = (a+bi)t * (F0 + F1t + F2t^2) /// = (c+di)F2 + (a+bi)F0t + (a+bi)F1t^2 /// where c+di = (a+bi)(9+i) = (9a-b) + (a+9b)i /// -/// G0 = (c+di)(f0+f0_i) = (cf2 - df2_) + (df2 + cf2_)i +/// G0 = (c+di)(f2+f2_i) = (cf2 - df2_) + (df2 + cf2_)i /// G1 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i /// G2 = (a+bi)(f1+f1_i) = (af1 - bf1_) + (bf1 + af1_)i @@ -688,9 +778,9 @@ /// = (c+di)F1 + (c+di)F2t + (a+bi)F0t^2 /// where c+di = (a+bi)(9+i) = (9a-b) + (a+9b)i /// -/// G0 = (c+di)(f0+f0_i) = (cf1 - df1_) + (df1 + cf1_)i -/// G1 = (a+bi)(f0+f0_i) = (cf2 - df2_) + (df2 + cf2_)i -/// G2 = (a+bi)(f1+f1_i) = (af0 - bf0_) + (bf0 + af0_)i +/// G0 = (c+di)(f1+f1_i) = (cf1 - df1_) + (df1 + cf1_)i +/// G1 = (a+bi)(f2+f2_i) = (cf2 - df2_) + (df2 + cf2_)i +/// G2 = (a+bi)(f0+f0_i) = (af0 - bf0_) + (bf0 + af0_)i %macro mul_fp2_fp6_sh2 // stack: a, b, f0, f0_, f1, f1_, f2, f2_ diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm index 6bab7ab0..37845f4b 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/frobenius.asm @@ -143,7 +143,7 @@ // stack: ptr %endmacro -%macro frob_fp12_6: +%macro frob_fp12_6 // stack: ptr DUP1 %offset_fp6 // stack: ptr', ptr