diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 62021123..32aeb8f6 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -55,15 +55,17 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/curve/bn254/curve_arithmetic/constants.asm"), include_str!("asm/curve/bn254/curve_arithmetic/curve_add.asm"), include_str!("asm/curve/bn254/curve_arithmetic/curve_mul.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/final_exponent.asm"), include_str!("asm/curve/bn254/curve_arithmetic/glv.asm"), - include_str!("asm/curve/bn254/curve_arithmetic/invariant_exponent.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/miller_loop.asm"), include_str!("asm/curve/bn254/curve_arithmetic/msm.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/pairing.asm"), include_str!("asm/curve/bn254/curve_arithmetic/precomputation.asm"), - include_str!("asm/curve/bn254/curve_arithmetic/tate_pairing.asm"), - include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"), + include_str!("asm/curve/bn254/curve_arithmetic/twisted_curve.asm"), include_str!("asm/curve/bn254/field_arithmetic/degree_6_mul.asm"), include_str!("asm/curve/bn254/field_arithmetic/degree_12_mul.asm"), include_str!("asm/curve/bn254/field_arithmetic/frobenius.asm"), + include_str!("asm/curve/bn254/field_arithmetic/inverse.asm"), include_str!("asm/curve/bn254/field_arithmetic/util.asm"), include_str!("asm/curve/common.asm"), include_str!("asm/curve/secp256k1/curve_add.asm"), diff --git a/evm/src/cpu/kernel/asm/core/precompiles/snarkv.asm b/evm/src/cpu/kernel/asm/core/precompiles/snarkv.asm index fbb46670..433186b5 100644 --- a/evm/src/cpu/kernel/asm/core/precompiles/snarkv.asm +++ b/evm/src/cpu/kernel/asm/core/precompiles/snarkv.asm @@ -1,3 +1,118 @@ global precompile_snarkv: - // TODO - PANIC + // stack: address, retdest, new_ctx, (old stack) + %pop2 + // stack: new_ctx, (old stack) + DUP1 + SET_CONTEXT + // stack: (empty) + PUSH 0x100000000 // = 2^32 (is_kernel = true) + // stack: kexit_info + + PUSH 192 %calldatasize DUP2 DUP2 + // stack: calldata_size, 192, calldata_size, 192, kexit_info + MOD %jumpi(fault_exception) // calldata_size should be a multiple of 192 + DIV + // stack: k, kexit_info + DUP1 %mul_const(@SNARKV_DYNAMIC_GAS) %add_const(@SNARKV_STATIC_GAS) + %stack (gas, k, kexit_info) -> (gas, kexit_info, k) + %charge_gas + SWAP1 + // stack: k, kexit_info + PUSH 0 +loading_loop: + // stack: i, k, kexit_info + DUP2 DUP2 EQ %jumpi(loading_done) + // stack: i, k, kexit_info + DUP1 %mul_const(192) + // stack: px, i, k, kexit_info + GET_CONTEXT + %stack (ctx, px) -> (ctx, @SEGMENT_CALLDATA, px, 32, loading_loop_contd, px) + %jump(mload_packing) +loading_loop_contd: + // stack: x, px, i, k, kexit_info + SWAP1 %add_const(32) + GET_CONTEXT + %stack (ctx, py) -> (ctx, @SEGMENT_CALLDATA, py, 32, loading_loop_contd2, py) + %jump(mload_packing) +loading_loop_contd2: + // stack: y, py, x, i, k, kexit_info + SWAP1 %add_const(32) + GET_CONTEXT + %stack (ctx, px_im) -> (ctx, @SEGMENT_CALLDATA, px_im, 32, loading_loop_contd3, px_im) + %jump(mload_packing) +loading_loop_contd3: + // stack: x_im, px_im, y, x, i, k, kexit_info + SWAP1 %add_const(32) + // stack: px_re, x_im, y, x, i, k, kexit_info + GET_CONTEXT + %stack (ctx, px_re) -> (ctx, @SEGMENT_CALLDATA, px_re, 32, loading_loop_contd4, px_re) + %jump(mload_packing) +loading_loop_contd4: + // stack: x_re, px_re, x_im, y, x, i, k, kexit_info + SWAP1 %add_const(32) + // stack: py_im, x_re, x_im, y, x, i, k, kexit_info + GET_CONTEXT + %stack (ctx, py_im) -> (ctx, @SEGMENT_CALLDATA, py_im, 32, loading_loop_contd5, py_im) + %jump(mload_packing) +loading_loop_contd5: + // stack: y_im, py_im, x_re, x_im, y, x, i, k, kexit_info + SWAP1 %add_const(32) + // stack: py_re, y_im, x_re, x_im, y, x, i, k, kexit_info + GET_CONTEXT + %stack (ctx, py_re) -> (ctx, @SEGMENT_CALLDATA, py_re, 32, loading_loop_contd6) + %jump(mload_packing) +loading_loop_contd6: + // stack: y_re, y_im, x_re, x_im, y, x, i, k, kexit_info + SWAP1 // the EVM serializes the imaginary part first + // stack: y_im, y_re, x_re, x_im, y, x, i, k, kexit_info + DUP7 + // stack: i, y_im, y_re, x_re, x_im, y, x, i, k, kexit_info + %mul_const(6) %add_const(@SNARKV_INP) + %add_const(5) + %mstore_kernel_bn254_pairing + // stack: y_re, x_re, x_im, y, x, i, k, kexit_info + DUP6 + // stack: i, y_re, x_re, x_im, y, x, i, k, kexit_info + %mul_const(6) %add_const(@SNARKV_INP) + %add_const(4) + %mstore_kernel_bn254_pairing + SWAP1 // the EVM serializes the imaginary part first + // stack: x_im, x_re, y, x, i, k, kexit_info + DUP5 + // stack: i, x_im, x_re, y, x, i, k, kexit_info + %mul_const(6) %add_const(@SNARKV_INP) + %add_const(3) + %mstore_kernel_bn254_pairing + // stack: x_re, y, x, i, k, kexit_info + DUP4 + // stack: i, x_re, y, x, i, k, kexit_info + %mul_const(6) %add_const(@SNARKV_INP) + %add_const(2) + %mstore_kernel_bn254_pairing + // stack: y, x, i, k, kexit_info + DUP3 + // stack: i, y, x, i, k, kexit_info + %mul_const(6) %add_const(@SNARKV_INP) + %add_const(1) + %mstore_kernel_bn254_pairing + // stack: x, i, k, kexit_info + DUP2 + // stack: i, x, i, k, kexit_info + %mul_const(6) %add_const(@SNARKV_INP) + %mstore_kernel_bn254_pairing + // stack: i, k, kexit_info + %increment + %jump(loading_loop) + +loading_done: + %stack (i, k) -> (k, @SNARKV_INP, @SNARKV_OUT, got_result) + %jump(bn254_pairing) +got_result: + // stack: result, kexit_info + DUP1 %eq_const(@U256_MAX) %jumpi(fault_exception) + // stack: result, kexit_info + // Store the result bool (repr. by a U256) to the parent's return data using `mstore_unpacking`. + %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 32) + %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT) + %stack (parent_ctx, address) -> (parent_ctx, @SEGMENT_RETURNDATA, 0, address, 32, pop_and_return_success) + %jump(mstore_unpacking) diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm index 499d88d5..a43c4047 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/curve_add.asm @@ -201,8 +201,8 @@ global bn_double: %jump(bn_add_equal_points) // Check if (x,y) is a valid curve point. -// Returns (range & curve) || is_identity -// where +// Returns (range & curve) || ident +// where // range = (x < N) & (y < N) // curve = y^2 == (x^3 + 3) // ident = (x,y) == (0,0) diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_exponent.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_exponent.asm new file mode 100644 index 00000000..f8e48807 --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/final_exponent.asm @@ -0,0 +1,319 @@ +/// To make the Tate pairing an invariant, the final step is to exponentiate by +/// (p^12 - 1)/N = (p^6 - 1) * (p^2 + 1) * (p^4 - p^2 + 1)/N +/// and thus we can exponentiate by each factor sequentially. +/// +/// def bn254_final_exponent(y: Fp12): +/// y = first_exp(y) +/// y = second_exp(y) +/// return final_exp(y) + +global bn254_final_exponent: + +/// first, exponentiate by (p^6 - 1) via +/// def first_exp(y): +/// return y.frob(6) / y + // stack: k, inp, out, retdest {out: y} + %stack (k, inp, out) -> (out, 0, first_exp, out) + // stack: out, 0, first_exp, out, retdest {out: y} + %jump(inv_fp254_12) +first_exp: + // stack: out, retdest {out: y , 0: y^-1} + %frob_fp254_12_6 + // stack: out, retdest {out: y_6, 0: y^-1} + %stack (out) -> (out, 0, out, second_exp, out) + // stack: out, 0, out, second_exp, out, retdest {out: y_6, 0: y^-1} + %jump(mul_fp254_12) + +/// second, exponentiate by (p^2 + 1) via +/// def second_exp(y): +/// return y.frob(2) * y +second_exp: + // stack: out, retdest {out: y} + %stack (out) -> (out, 0, out, out, final_exp, out) + // stack: out, 0, out, out, final_exp, out, retdest {out: y} + %frob_fp254_12_2_ + // stack: 0, out, out, final_exp, out, retdest {out: y, 0: y_2} + %jump(mul_fp254_12) + +/// Finally, we must exponentiate by (p^4 - p^2 + 1)/N +/// To do so efficiently, we can express this power as +/// (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0 +/// and simultaneously compute y^a4, y^a2, y^a0 where +/// a1 = a4 + 2a2 - a0 +/// We first initialize these powers as 1 and then use +/// binary algorithms for exponentiation. +/// +/// def final_exp(y): +/// y4, y2, y0 = 1, 1, 1 +/// power_loop_4() +/// power_loop_2() +/// power_loop_0() +/// custom_powers() +/// final_power() + +final_exp: + // stack: val, retdest + %stack (val) -> (val, 0, val) + // stack: val, 0, val, retdest + %move_fp254_12 + // stack: 0, val, retdest {0: sqr} + %stack () -> (1, 1, 1) + // stack: 1, 1, 1, 0, val, retdest + %mstore_kernel_bn254_pairing(12) + %mstore_kernel_bn254_pairing(24) + %mstore_kernel_bn254_pairing(36) + // stack: 0, val, retdest {0: sqr, 12: y0, 24: y2, 36: y4} + %stack () -> (64, 62, 65) + // stack: 64, 62, 65, 0, val, retdest {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(power_loop_4) + +/// After computing the powers +/// y^a4, y^a2, y^a0 +/// we would like to transform them to +/// y^a2, y^-a1, y^-a0 +/// +/// def custom_powers() +/// y0 = y0^{-1} +/// y1 = y4 * y2^2 * y0 +/// return y2, y1, y0 +/// +/// And finally, upon doing so, compute the final power +/// y^(p^3) * (y^a2)^(p^2) * (y^-a1)^p * (y^-a0) +/// +/// def final_power() +/// y = y.frob(3) +/// y2 = y2.frob(2) +/// y1 = y1.frob(1) +/// return y * y2 * y1 * y0 + +custom_powers: + // stack: val, retdest {12: y0, 24: y2, 36: y4} + %stack () -> (12, 48, make_term_1) + // stack: 12, 48, make_term_1, val, retdest {12: y0, 24: y2, 36: y4} + %jump(inv_fp254_12) +make_term_1: + // stack: val, retdest {24: y2, 36: y4, 48: y0^-1} + %stack () -> (24, 36, 36, make_term_2) + // stack: 24, 36, 36, make_term_2, val, retdest {24: y2, 36: y4, 48: y0^-1} + %jump(mul_fp254_12) +make_term_2: + // stack: val, retdest {24: y2, 36: y4 * y2, 48: y0^-1} + %stack () -> (24, 36, 36, make_term_3) + // stack: 24, 36, 36, make_term_3, val, retdest {24: y2, 36: y4 * y2, 48: y0^-1} + %jump(mul_fp254_12) +make_term_3: + // stack: val, retdest {24: y2, 36: y4 * y2^2, 48: y0^-1} + %stack () -> (48, 36, 36, final_power) + // stack: 48, 36, 36, final_power, val, retdest {24: y2, 36: y4 * y2^2, 48: y0^-1} + %jump(mul_fp254_12) +final_power: + // stack: val, retdest {val: y , 24: y^a2 , 36: y^a1 , 48: y^a0} + %frob_fp254_12_3 + // stack: val, retdest {val: y_3, 24: y^a2 , 36: y^a1 , 48: y^a0} + %stack () -> (24, 24) + %frob_fp254_12_2_ + POP + // stack: val, retdest {val: y_3, 24: (y^a2)_2, 36: y^a1 , 48: y^a0} + PUSH 36 + %frob_fp254_12_1 + POP + // stack: val, retdest {val: y_3, 24: (y^a2)_2, 36: (y^a1)_1, 48: y^a0} + %stack (val) -> (24, val, val, penult_mul, val) + // stack: 24, val, val, penult_mul, val, retdest {val: y_3, 24: (y^a2)_2, 36: (y^a1)_1, 48: y^a0} + %jump(mul_fp254_12) +penult_mul: + // stack: val, retdest {val: y_3 * (y^a2)_2, 36: (y^a1)_1, 48: y^a0} + %stack (val) -> (36, val, val, final_mul, val) + // stack: 36, val, val, final_mul, val, retdest {val: y_3 * (y^a2)_2, 36: (y^a1)_1, 48: y^a0} + %jump(mul_fp254_12) +final_mul: + // stack: val, retdest {val: y_3 * (y^a2)_2 * (y^a1)_1, 48: y^a0} + %stack (val) -> (48, val, val) + // stack: 48, val, val, retdest {val: y_3 * (y^a2)_2 * (y^a1)_1, 48: y^a0} + %jump(mul_fp254_12) + + +/// def power_loop_4(): +/// for i in range(64): +/// abc = load(i, power_data_4) +/// if a: +/// y4 *= acc +/// if b: +/// y2 *= acc +/// if c: +/// y0 *= acc +/// acc = square_fp254_12(acc) +/// y4 *= acc +/// +/// def power_loop_2(): +/// for i in range(62): +/// ab = load(i, power_data_2) +/// if a: +/// y2 *= acc +/// if b: +/// y0 *= acc +/// acc = square_fp254_12(acc) +/// y2 *= acc +/// +/// def power_loop_0(): +/// for i in range(65): +/// a = load(i, power_data_0) +/// if a: +/// y0 *= acc +/// acc = square_fp254_12(acc) +/// y0 *= acc + +power_loop_4: + // stack: i , j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP1 + ISZERO + // stack: break?, i , j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jumpi(power_loop_4_end) + // stack: i , j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %sub_const(1) + // stack: i-1, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP1 + %mload_kernel_code(power_data_4) + // stack: abc, i-1, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP1 + %lt_const(100) + // stack: skip?, abc, i-1, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jumpi(power_loop_4_b) + // stack: abc, i-1, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %sub_const(100) + // stack: bc, i-1, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %stack () -> (36, 36, power_loop_4_b) + // stack: 36, 36, power_loop_4_b, bc, i-1, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP8 + // stack: sqr, 36, 36, power_loop_4_b, bc, i-1, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(mul_fp254_12) +power_loop_4_b: + // stack: bc, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP1 + %lt_const(10) + // stack: skip?, bc, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jumpi(power_loop_4_c) + // stack: bc, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %sub_const(10) + // stack: c, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %stack () -> (24, 24, power_loop_4_c) + // stack: 24, 24, power_loop_4_c, c, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP8 + // stack: sqr, 24, 24, power_loop_4_c, c, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(mul_fp254_12) +power_loop_4_c: + // stack: c, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + ISZERO + // stack: skip?, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jumpi(power_loop_4_sq) + // stack: i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %stack () -> (12, 12, power_loop_4_sq) + // stack: 12, 12, power_loop_4_sq, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP7 + // stack: sqr, 12, 12, power_loop_4_sq, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(mul_fp254_12) +power_loop_4_sq: + // stack: i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + PUSH power_loop_4 + // stack: power_loop_4, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP5 + DUP1 + // stack: sqr, sqr, power_loop_4, i, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(square_fp254_12) +power_loop_4_end: + // stack: 0, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + POP + // stack: j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %stack () -> (36, 36, power_loop_2) + // stack: 36, 36, power_loop_2, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP6 + // stack: sqr, 36, 36, power_loop_2, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(mul_fp254_12) + +power_loop_2: + // stack: j , k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP1 + ISZERO + // stack: break?, j , k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jumpi(power_loop_2_end) + // stack: j , k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %sub_const(1) + // stack: j-1, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP1 + %mload_kernel_code(power_data_2) + // stack: ab, j-1, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP1 + %lt_const(10) + // stack: skip?, ab, j-1, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jumpi(power_loop_2_b) + // stack: ab, j-1, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %sub_const(10) + // stack: b, j-1, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %stack () -> (24, 24, power_loop_2_b) + // stack: 24, 24, power_loop_2_b, b, j-1, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP7 + // stack: sqr, 24, 24, power_loop_2_b, b, j-1, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(mul_fp254_12) +power_loop_2_b: + // stack: b, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + ISZERO + // stack: skip?, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jumpi(power_loop_2_sq) + // stack: j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %stack () -> (12, 12, power_loop_2_sq) + // stack: 12, 12, power_loop_2_sq, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP6 + // stack: sqr, 12, 12, power_loop_2_sq, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(mul_fp254_12) +power_loop_2_sq: + // stack: j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + PUSH power_loop_2 + // stack: power_loop_2, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP4 + DUP1 + // stack: sqr, sqr, power_loop_2, j, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(square_fp254_12) +power_loop_2_end: + // stack: 0, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + POP + // stack: k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %stack () -> (24, 24, power_loop_0) + // stack: 24, 24, power_loop_0, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP5 + // stack: sqr, 24, 24, power_loop_0, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(mul_fp254_12) + +power_loop_0: + // stack: k , sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP1 + ISZERO + // stack: break?, k , sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jumpi(power_loop_0_end) + // stack: k , sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %sub_const(1) + // stack: k-1, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP1 + %mload_kernel_code(power_data_0) + // stack: a, k-1, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + ISZERO + // stack: skip?, k-1, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jumpi(power_loop_0_sq) + // stack: k-1, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %stack () -> (12, 12, power_loop_0_sq) + // stack: 12, 12, power_loop_0_sq, k-1, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP5 + // stack: sqr, 12, 12, power_loop_0_sq, k-1, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(mul_fp254_12) +power_loop_0_sq: + // stack: k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + PUSH power_loop_0 + // stack: power_loop_0, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + DUP3 + DUP1 + // stack: sqr, sqr, power_loop_0, k, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(square_fp254_12) +power_loop_0_end: + // stack: 0, sqr {0: sqr, 12: y0, 24: y2, 36: y4} + %stack (i, sqr) -> (12, sqr, 12, custom_powers) + // stack: 12, sqr, 12, custom_powers {0: sqr, 12: y0, 24: y2, 36: y4} + %jump(mul_fp254_12) diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm deleted file mode 100644 index 2fcd5d2b..00000000 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/invariant_exponent.asm +++ /dev/null @@ -1,319 +0,0 @@ -/// To make the Tate pairing an invariant, the final step is to exponentiate by -/// (p^12 - 1)/N = (p^6 - 1) * (p^2 + 1) * (p^4 - p^2 + 1)/N -/// and thus we can exponentiate by each factor sequentially. -/// -/// def bn254_invariant_exponent(y: Fp12): -/// y = first_exp(y) -/// y = second_exp(y) -/// return final_exp(y) - -global bn254_invariant_exponent: - -/// first, exponentiate by (p^6 - 1) via -/// def first_exp(y): -/// return y.frob(6) / y - // stack: out, retdest {out: y} - %stack (out) -> (out, 0, first_exp, out) - // stack: out, 0, first_exp, out, retdest {out: y} - %jump(inv_fp254_12) -first_exp: - // stack: out, retdest {out: y , 0: y^-1} - %frob_fp254_12_6 - // stack: out, retdest {out: y_6, 0: y^-1} - %stack (out) -> (out, 0, out, second_exp, out) - // stack: out, 0, out, second_exp, out, retdest {out: y_6, 0: y^-1} - %jump(mul_fp254_12) - -/// second, exponentiate by (p^2 + 1) via -/// def second_exp(y): -/// return y.frob(2) * y -second_exp: - // stack: out, retdest {out: y} - %stack (out) -> (out, 0, out, out, final_exp, out) - // stack: out, 0, out, out, final_exp, out, retdest {out: y} - %frob_fp254_12_2_ - // stack: 0, out, out, final_exp, out, retdest {out: y, 0: y_2} - %jump(mul_fp254_12) - -/// Finally, we must exponentiate by (p^4 - p^2 + 1)/N -/// To do so efficiently, we can express this power as -/// (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0 -/// and simultaneously compute y^a4, y^a2, y^a0 where -/// a1 = a4 + 2a2 - a0 -/// We first initialize these powers as 1 and then use -/// binary algorithms for exponentiation. -/// -/// def final_exp(y): -/// y4, y2, y0 = 1, 1, 1 -/// power_loop_4() -/// power_loop_2() -/// power_loop_0() -/// custom_powers() -/// final_power() - -final_exp: - // stack: val, retdest - %stack (val) -> (val, 12, val) - // stack: val, 12, val, retdest - %move_fp254_12 - // stack: 12, val, retdest {12: sqr} - %stack () -> (1, 1, 1) - // stack: 1, 1, 1, 12, val, retdest - %mstore_kernel_bn254_pairing(24) - %mstore_kernel_bn254_pairing(36) - %mstore_kernel_bn254_pairing(48) - // stack: 12, val, retdest {12: sqr, 24: y0, 36: y2, 48: y4} - %stack () -> (64, 62, 65) - // stack: 64, 62, 65, 12, val, retdest {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(power_loop_4) - -/// After computing the powers -/// y^a4, y^a2, y^a0 -/// we would like to transform them to -/// y^a2, y^-a1, y^-a0 -/// -/// def custom_powers() -/// y0 = y0^{-1} -/// y1 = y4 * y2^2 * y0 -/// return y2, y1, y0 -/// -/// And finally, upon doing so, compute the final power -/// y^(p^3) * (y^a2)^(p^2) * (y^-a1)^p * (y^-a0) -/// -/// def final_power() -/// y = y.frob(3) -/// y2 = y2.frob(2) -/// y1 = y1.frob(1) -/// return y * y2 * y1 * y0 - -custom_powers: - // stack: val, retdest {24: y0, 36: y2, 48: y4} - %stack () -> (24, 60, make_term_1) - // stack: 24, 60, make_term_1, val, retdest {24: y0, 36: y2, 48: y4} - %jump(inv_fp254_12) -make_term_1: - // stack: val, retdest {36: y2, 48: y4, 60: y0^-1} - %stack () -> (36, 48, 48, make_term_2) - // stack: 36, 48, 48, make_term_2, val, retdest {36: y2, 48: y4, 60: y0^-1} - %jump(mul_fp254_12) -make_term_2: - // stack: val, retdest {36: y2, 48: y4 * y2, 60: y0^-1} - %stack () -> (36, 48, 48, make_term_3) - // stack: 36, 48, 48, make_term_3, val, retdest {36: y2, 48: y4 * y2, 60: y0^-1} - %jump(mul_fp254_12) -make_term_3: - // stack: val, retdest {36: y2, 48: y4 * y2^2, 60: y0^-1} - %stack () -> (60, 48, 48, final_power) - // stack: 60, 48, 48, final_power, val, retdest {36: y2, 48: y4 * y2^2, 60: y0^-1} - %jump(mul_fp254_12) -final_power: - // stack: val, retdest {val: y , 36: y^a2 , 48: y^a1 , 60: y^a0} - %frob_fp254_12_3 - // stack: val, retdest {val: y_3, 36: y^a2 , 48: y^a1 , 60: y^a0} - %stack () -> (36, 36) - %frob_fp254_12_2_ - POP - // stack: val, retdest {val: y_3, 36: (y^a2)_2, 48: y^a1 , 60: y^a0} - PUSH 48 - %frob_fp254_12_1 - POP - // stack: val, retdest {val: y_3, 36: (y^a2)_2, 48: (y^a1)_1, 60: y^a0} - %stack (val) -> (36, val, val, penult_mul, val) - // stack: 36, val, val, penult_mul, val, retdest {val: y_3, 36: (y^a2)_2, 48: (y^a1)_1, 60: y^a0} - %jump(mul_fp254_12) -penult_mul: - // stack: val, retdest {val: y_3 * (y^a2)_2, 48: (y^a1)_1, 60: y^a0} - %stack (val) -> (48, val, val, final_mul, val) - // stack: 48, val, val, final_mul, val, retdest {val: y_3 * (y^a2)_2, 48: (y^a1)_1, 60: y^a0} - %jump(mul_fp254_12) -final_mul: - // stack: val, retdest {val: y_3 * (y^a2)_2 * (y^a1)_1, 60: y^a0} - %stack (val) -> (60, val, val) - // stack: 60, val, val, retdest {val: y_3 * (y^a2)_2 * (y^a1)_1, 60: y^a0} - %jump(mul_fp254_12) - - -/// def power_loop_4(): -/// for i in range(64): -/// abc = load(i, power_data_4) -/// if a: -/// y4 *= acc -/// if b: -/// y2 *= acc -/// if c: -/// y0 *= acc -/// acc = square_fp254_12(acc) -/// y4 *= acc -/// -/// def power_loop_2(): -/// for i in range(62): -/// ab = load(i, power_data_2) -/// if a: -/// y2 *= acc -/// if b: -/// y0 *= acc -/// acc = square_fp254_12(acc) -/// y2 *= acc -/// -/// def power_loop_0(): -/// for i in range(65): -/// a = load(i, power_data_0) -/// if a: -/// y0 *= acc -/// acc = square_fp254_12(acc) -/// y0 *= acc - -power_loop_4: - // stack: i , j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP1 - ISZERO - // stack: break?, i , j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jumpi(power_loop_4_end) - // stack: i , j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %sub_const(1) - // stack: i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP1 - %mload_kernel_code(power_data_4) - // stack: abc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP1 - %lt_const(100) - // stack: skip?, abc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jumpi(power_loop_4_b) - // stack: abc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %sub_const(100) - // stack: bc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %stack () -> (48, 48, power_loop_4_b) - // stack: 48, 48, power_loop_4_b, bc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP8 - // stack: sqr, 48, 48, power_loop_4_b, bc, i-1, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(mul_fp254_12) -power_loop_4_b: - // stack: bc, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP1 - %lt_const(10) - // stack: skip?, bc, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jumpi(power_loop_4_c) - // stack: bc, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %sub_const(10) - // stack: c, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %stack () -> (36, 36, power_loop_4_c) - // stack: 36, 36, power_loop_4_c, c, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP8 - // stack: sqr, 36, 36, power_loop_4_c, c, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(mul_fp254_12) -power_loop_4_c: - // stack: c, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - ISZERO - // stack: skip?, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jumpi(power_loop_4_sq) - // stack: i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %stack () -> (24, 24, power_loop_4_sq) - // stack: 24, 24, power_loop_4_sq, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP7 - // stack: sqr, 24, 24, power_loop_4_sq, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(mul_fp254_12) -power_loop_4_sq: - // stack: i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - PUSH power_loop_4 - // stack: power_loop_4, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP5 - DUP1 - // stack: sqr, sqr, power_loop_4, i, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(square_fp254_12) -power_loop_4_end: - // stack: 0, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - POP - // stack: j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %stack () -> (48, 48, power_loop_2) - // stack: 48, 48, power_loop_2, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP6 - // stack: sqr, 48, 48, power_loop_2, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(mul_fp254_12) - -power_loop_2: - // stack: j , k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP1 - ISZERO - // stack: break?, j , k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jumpi(power_loop_2_end) - // stack: j , k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %sub_const(1) - // stack: j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP1 - %mload_kernel_code(power_data_2) - // stack: ab, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP1 - %lt_const(10) - // stack: skip?, ab, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jumpi(power_loop_2_b) - // stack: ab, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %sub_const(10) - // stack: b, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %stack () -> (36, 36, power_loop_2_b) - // stack: 36, 36, power_loop_2_b, b, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP7 - // stack: sqr, 36, 36, power_loop_2_b, b, j-1, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(mul_fp254_12) -power_loop_2_b: - // stack: b, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - ISZERO - // stack: skip?, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jumpi(power_loop_2_sq) - // stack: j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %stack () -> (24, 24, power_loop_2_sq) - // stack: 24, 24, power_loop_2_sq, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP6 - // stack: sqr, 24, 24, power_loop_2_sq, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(mul_fp254_12) -power_loop_2_sq: - // stack: j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - PUSH power_loop_2 - // stack: power_loop_2, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP4 - DUP1 - // stack: sqr, sqr, power_loop_2, j, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(square_fp254_12) -power_loop_2_end: - // stack: 0, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - POP - // stack: k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %stack () -> (36, 36, power_loop_0) - // stack: 36, 36, power_loop_0, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP5 - // stack: sqr, 36, 36, power_loop_0, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(mul_fp254_12) - -power_loop_0: - // stack: k , sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP1 - ISZERO - // stack: break?, k , sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jumpi(power_loop_0_end) - // stack: k , sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %sub_const(1) - // stack: k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP1 - %mload_kernel_code(power_data_0) - // stack: a, k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - ISZERO - // stack: skip?, k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jumpi(power_loop_0_sq) - // stack: k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %stack () -> (24, 24, power_loop_0_sq) - // stack: 24, 24, power_loop_0_sq, k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP5 - // stack: sqr, 24, 24, power_loop_0_sq, k-1, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(mul_fp254_12) -power_loop_0_sq: - // stack: k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - PUSH power_loop_0 - // stack: power_loop_0, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - DUP3 - DUP1 - // stack: sqr, sqr, power_loop_0, k, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(square_fp254_12) -power_loop_0_end: - // stack: 0, sqr {12: sqr, 24: y0, 36: y2, 48: y4} - %stack (i, sqr) -> (24, sqr, 24, custom_powers) - // stack: 24, sqr, 24, custom_powers {12: sqr, 24: y0, 36: y2, 48: y4} - %jump(mul_fp254_12) diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm similarity index 77% rename from evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm rename to evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm index f09684bd..120365af 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/tate_pairing.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/miller_loop.asm @@ -1,12 +1,3 @@ -/// def tate(P: Curve, Q: TwistedCurve) -> Fp12: -/// out = miller_loop(P, Q) -/// return bn254_invariant_exponent(P, Q) -global bn254_tate: - // stack: inp, out, retdest - %stack (inp, out) -> (inp, out, bn254_invariant_exponent, out) - // stack: inp, out, bn254_invariant_exponent, out, retdest - %jump(bn254_miller) - /// def miller(P, Q): /// miller_init() /// miller_loop() @@ -35,13 +26,13 @@ global bn254_tate: /// mul_tangent() global bn254_miller: - // stack: ptr, out, retdest - %stack (ptr, out) -> (out, 1, ptr, out) - // stack: out, 1, ptr, out, retdest - %mstore_kernel_bn254_pairing - // stack: ptr, out, retdest + // stack: ptr, out, retdest + %stack (ptr, out) -> (out, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ptr, out) + // stack: out, unit, ptr, out, retdest + %store_fp254_12 + // stack: ptr, out, retdest %load_fp254_6 - // stack: P, Q, out, retdest + // stack: P, Q, out, retdest %stack (P: 2) -> (0, 53, P, P) // stack: 0, 53, O, P, Q, out, retdest // the head 0 lets miller_loop start with POP @@ -64,6 +55,7 @@ miller_return: // stack: times, O, P, Q, out, retdest %stack (times, O: 2, P: 2, Q: 4, out, retdest) -> (retdest) // stack: retdest + %clear_line JUMP miller_one: @@ -109,35 +101,35 @@ mul_tangent: // stack: out, out, mul_tangent_1, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out %jump(square_fp254_12) mul_tangent_1: - // stack: out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out + // stack: out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out DUP13 DUP13 DUP13 DUP13 - // stack: Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out + // stack: Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out DUP11 DUP11 - // stack: O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out + // stack: O, Q, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out %tangent - // stack: out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out {0: line} - %stack (out) -> (out, 0, out) - // stack: out, 0, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out {0: line} + // stack: out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out {12: line} + %stack (out) -> (out, 12, out) + // stack: out, 12, out, mul_tangent_2, retdest, 0xnm, times, O, P, Q, out {12: line} %jump(mul_fp254_12_sparse) mul_tangent_2: - // stack: retdest, 0xnm, times, O, P, Q, out {0: line} + // stack: retdest, 0xnm, times, O, P, Q, out {12: line} PUSH after_double - // stack: after_double, retdest, 0xnm, times, O, P, Q, out {0: line} + // stack: after_double, retdest, 0xnm, times, O, P, Q, out {12: line} DUP6 DUP6 - // stack: O, after_double, retdest, 0xnm, times, O, P, Q, out {0: line} + // stack: O, after_double, retdest, 0xnm, times, O, P, Q, out {12: line} %jump(bn_double) after_double: - // stack: 2*O, retdest, 0xnm, times, O, P, Q, out {0: line} + // stack: 2*O, retdest, 0xnm, times, O, P, Q, out {12: line} SWAP5 POP SWAP5 POP - // stack: retdest, 0xnm, times, 2*O, P, Q, out {0: line} + // stack: retdest, 0xnm, times, 2*O, P, Q, out {12: line} JUMP /// def mul_cord() @@ -146,26 +138,26 @@ after_double: /// O += P mul_cord: - // stack: 0xnm, times, O, P, Q, out + // stack: 0xnm, times, O, P, Q, out PUSH mul_cord_1 - // stack: mul_cord_1, 0xnm, times, O, P, Q, out + // stack: mul_cord_1, 0xnm, times, O, P, Q, out DUP11 DUP11 DUP11 DUP11 - // stack: Q, mul_cord_1, 0xnm, times, O, P, Q, out + // stack: Q, mul_cord_1, 0xnm, times, O, P, Q, out DUP9 DUP9 - // stack: O, Q, mul_cord_1, 0xnm, times, O, P, Q, out + // stack: O, Q, mul_cord_1, 0xnm, times, O, P, Q, out DUP13 DUP13 - // stack: P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out + // stack: P, O, Q, mul_cord_1, 0xnm, times, O, P, Q, out %cord - // stack: mul_cord_1, 0xnm, times, O, P, Q, out {0: line} + // stack: mul_cord_1, 0xnm, times, O, P, Q, out {12: line} DUP12 - // stack: out, mul_cord_1, 0xnm, times, O, P, Q, out {0: line} - %stack (out) -> (out, 0, out) - // stack: out, 0, out, mul_cord_1, 0xnm, times, O, P, Q, out {0: line} + // stack: out, mul_cord_1, 0xnm, times, O, P, Q, out {12: line} + %stack (out) -> (out, 12, out) + // stack: out, 12, out, mul_cord_1, 0xnm, times, O, P, Q, out {12: line} %jump(mul_fp254_12_sparse) mul_cord_1: // stack: 0xnm, times, O , P, Q, out @@ -202,7 +194,7 @@ after_add: // stack: py^2 , 9, px, py, qx, qx_, qy, qy_ SUBFP254 // stack: py^2 - 9, px, py, qx, qx_, qy, qy_ - %mstore_kernel_bn254_pairing(0) + %mstore_kernel_bn254_pairing(12) // stack: px, py, qx, qx_, qy, qy_ DUP1 MULFP254 @@ -218,7 +210,7 @@ after_add: DUP3 MULFP254 // stack: (-3*px^2)qx, py, -3px^2, qx_, qy, qy_ - %mstore_kernel_bn254_pairing(2) + %mstore_kernel_bn254_pairing(14) // stack: py, -3px^2, qx_, qy, qy_ PUSH 2 MULFP254 @@ -228,15 +220,15 @@ after_add: DUP4 MULFP254 // stack: (2py)qy, -3px^2, qx_, 2py, qy_ - %mstore_kernel_bn254_pairing(8) + %mstore_kernel_bn254_pairing(20) // stack: -3px^2, qx_, 2py, qy_ MULFP254 // stack: (-3px^2)*qx_, 2py, qy_ - %mstore_kernel_bn254_pairing(3) + %mstore_kernel_bn254_pairing(15) // stack: 2py, qy_ MULFP254 // stack: (2py)*qy_ - %mstore_kernel_bn254_pairing(9) + %mstore_kernel_bn254_pairing(21) %endmacro /// def cord(p1x, p1y, p2x, p2y, qx, qy): @@ -258,7 +250,7 @@ after_add: // stack: p1y*p2x , p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ SUBFP254 // stack: p1y*p2x - p2y*p1x, p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ - %mstore_kernel_bn254_pairing(0) + %mstore_kernel_bn254_pairing(12) // stack: p1x , p1y, p2x , p2y, qx, qx_, qy, qy_ SWAP3 // stack: p2y , p1y, p2x , p1x, qx, qx_, qy, qy_ @@ -273,20 +265,29 @@ after_add: DUP5 MULFP254 // stack: (p1x - p2x)qy, p2y - p1y, qx, qx_, p1x - p2x, qy_ - %mstore_kernel_bn254_pairing(8) + %mstore_kernel_bn254_pairing(20) // stack: p2y - p1y, qx, qx_, p1x - p2x, qy_ SWAP1 // stack: qx, p2y - p1y, qx_, p1x - p2x, qy_ DUP2 MULFP254 // stack: (p2y - p1y)qx, p2y - p1y, qx_, p1x - p2x, qy_ - %mstore_kernel_bn254_pairing(2) + %mstore_kernel_bn254_pairing(14) // stack: p2y - p1y, qx_, p1x - p2x, qy_ MULFP254 // stack: (p2y - p1y)qx_, p1x - p2x, qy_ - %mstore_kernel_bn254_pairing(3) + %mstore_kernel_bn254_pairing(15) // stack: p1x - p2x, qy_ MULFP254 // stack: (p1x - p2x)*qy_ - %mstore_kernel_bn254_pairing(9) + %mstore_kernel_bn254_pairing(21) +%endmacro + +%macro clear_line + %stack () -> (0, 0, 0, 0, 0) + %mstore_kernel_bn254_pairing(12) + %mstore_kernel_bn254_pairing(14) + %mstore_kernel_bn254_pairing(15) + %mstore_kernel_bn254_pairing(20) + %mstore_kernel_bn254_pairing(21) %endmacro diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/pairing.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/pairing.asm new file mode 100644 index 00000000..4479b965 --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/pairing.asm @@ -0,0 +1,193 @@ +/// The input to the pairing script is a list of points +/// P_i = n_i*G: Curve, Q_i = m_i*H: TwistedCurve +/// where G, H are the respective generators, such that +/// sum_i n_i*m_i = 0 +/// and therefore, due to bilinearity of the pairing: +/// prod_i e(P_i, Q_i) +/// = prod_i e(n_i G, m_i H) +/// = prod_i e(G,H)^{n_i * m_i} +/// = e(G,H)^{sum_i n_i * m_i} +/// = e(G,H)^0 +/// = 1: Fp12 + +/// def bn254_pairing(pairs: List((Curve, TwistedCurve))) -> Bool: +/// +/// for P, Q in pairs: +/// if not (P.is_valid and Q.is_valid): +/// return @U256_MAX +/// +/// out = 1 +/// for P, Q in pairs: +/// if P != 0 and Q != 0: +/// out *= miller_loop(P, Q) +/// +/// result = bn254_final_exponent(out) +/// return result == unit_fp12 + +/// The following is a key to this API +/// +/// - k is the number of inputs +/// - each input given by a pair of points, one on the curve and one on the twisted curve +/// - each input consists of 6 stack terms---2 for the curve point and 4 for the twisted curve point +/// - the inputs are presumed to be placed on the kernel contiguously +/// - the output (as defined above) is an Fp12 element +/// - out and inp are the BnPairing segment offsets for the output element and input +/// - the assembly code currently uses offsets 0-78 for scratch space + +global bn254_pairing: + // stack: k, inp, out, retdest + DUP1 + +bn254_input_check: + // stack: j , k, inp + DUP1 + ISZERO + // stack: end?, j , k, inp + %jumpi(bn254_pairing_start) + // stack: j , k, inp + %sub_const(1) + // stack: j=j-1, k, inp + + %stack (j, k, inp) -> (j, inp, j, k, inp) + // stack: j, inp, j, k, inp + %mul_const(6) + ADD + // stack: inp_j=inp+6j, j, k, inp + DUP1 + // stack: inp_j, inp_j, j, k, inp + %load_fp254_2 + // stack: P_j, inp_j, j, k, inp + %bn_check + // stack: valid?, inp_j, j, k, inp + ISZERO + %jumpi(bn_pairing_invalid_input) + // stack: inp_j, j, k, inp + DUP1 + // stack: inp_j , inp_j, j, k, inp + %add_const(2) + // stack: inp_j', inp_j, j, k, inp + %load_fp254_4 + // stack: Q_j, inp_j, j, k, inp + %bn_check_twisted + // stack: valid?, inp_j, j, k, inp + ISZERO + %jumpi(bn_pairing_invalid_input) + // stack: inp_j, j, k, inp + POP + %jump(bn254_input_check) + +bn_pairing_invalid_input: + // stack: inp_j, j, k, inp, out, retdest + %stack (inp_j, j, k, inp, out, retdest) -> (retdest, inp_j) + JUMP + +bn254_pairing_start: + // stack: 0, k, inp, out, retdest + %stack (j, k, inp, out) -> (out, 1, k, inp, out, bn254_pairing_output_validation, out) + // stack: out, 1, k, inp, out, bn254_pairing_output_validation, out, retdest + %mstore_kernel_bn254_pairing + // stack: k, inp, out, bn254_pairing_output_validation, out, retdest + +bn254_pairing_loop: + // stack: k, inp, out, bn254_pairing_output_validation, out, retdest + DUP1 + ISZERO + // stack: end?, k, inp, out, bn254_pairing_output_validation, out, retdest + %jumpi(bn254_final_exponent) + // stack: k, inp, out, bn254_pairing_output_validation, out, retdest + %sub_const(1) + // stack: k=k-1, inp, out, bn254_pairing_output_validation, out, retdest + %stack (k, inp) -> (k, inp, k, inp) + // stack: k, inp, k, inp, out, bn254_pairing_output_validation, out, retdest + %mul_const(6) + ADD + // stack: inp_k, k, inp, out, bn254_pairing_output_validation, out, retdest + DUP1 + %load_fp254_6 + // stack: P, Q, inp_k, k, inp, out, bn254_pairing_output_validation, out, retdest + %neutral_input + // stack: skip?, inp_k, k, inp, out, bn254_pairing_output_validation, out, retdest + %jumpi(bn_skip_input) + // stack: inp_k, k, inp, out, bn254_pairing_output_validation, out, retdest + %stack (inp_k, k, inp, out) -> (bn254_miller, inp_k, 0, mul_fp254_12, 0, out, out, bn254_pairing_loop, k, inp, out) + // stack: bn254_miller, inp_k, 0, + // mul_fp254_12, 0, out, out, + // bn254_pairing_loop, k, inp, out, + // bn254_pairing_output_validation, out, retdest + JUMP + +bn_skip_input: + // stack: inp_k, k, inp, out, bn254_pairing_output_validation, out, retdest + POP + // stack: k, inp, out, bn254_pairing_output_validation, out, retdest + %jump(bn254_pairing_loop) + + +bn254_pairing_output_validation: + // stack: out, retdest + PUSH 1 + // stack: check, out, retdest + %check_output_term + %check_output_term(1) + %check_output_term(2) + %check_output_term(3) + %check_output_term(4) + %check_output_term(5) + %check_output_term(6) + %check_output_term(7) + %check_output_term(8) + %check_output_term(9) + %check_output_term(10) + %check_output_term(11) + // stack: check, out, retdest + %stack (check, out, retdest) -> (retdest, check) + JUMP + +%macro check_output_term + // stack: check, out + DUP2 + // stack: out0, check, out + %mload_kernel_bn254_pairing + // stack: f0, check, out + %eq_const(1) + // stack: check0, check, out + MUL + // stack: check, out +%endmacro + +%macro check_output_term(j) + // stack: check, out + DUP2 + %add_const($j) + // stack: outj, check, out + %mload_kernel_bn254_pairing + // stack: fj, check, out + ISZERO + // stack: checkj, check, out + MUL + // stack: check, out +%endmacro + +%macro neutral_input + // stack: P , Q + ISZERO + SWAP1 + ISZERO + MUL + // stack: P==0, Q + SWAP4 + // stack: Q , P==0 + ISZERO + SWAP1 + ISZERO + MUL + SWAP1 + ISZERO + MUL + SWAP1 + ISZERO + MUL + // stack: Q==0, P==0 + OR + // stack: Q==0||P==0 +%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/twisted_curve.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/twisted_curve.asm new file mode 100644 index 00000000..859c45fe --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/twisted_curve.asm @@ -0,0 +1,94 @@ +// Check if (X,Y) is a valid curve point. +// Returns (range & curve) || ident +// where +// range = (x < N) & (x_ < N) & (y < N) & (y_ < N) +// curve = Y^2 == X^3 + 3/(9+i) +// ident = (X,Y) == (0,0) + +%macro bn_check_twisted + // stack: x, x_, y, y_ + %bn_check_twisted_range + // stack: range, x, x_, y, y_ + %bn_check_twisted_curve + // stack: curve , range, x, x_, y, y_ + MUL // Cheaper than AND + // stack: curve & range, x, x_, y, y_ + SWAP4 + // stack: y_, x, x_, y, curve & range + %bn_check_twisted_ident + // stack: ident , curve & range + OR + // stack: ident || (curve & range) +%endmacro + +%macro bn_check_twisted_range + // stack: x, x_, y, y_ + PUSH @BN_BASE + // stack: N, x, x_, y, y_ + %stack (N) -> (N, N, N, N) + // stack: N, N, N, N, x, x_, y, y_ + DUP8 + // stack: y_ , N, N, N, N, x, x_, y, y_ + LT + // stack: y_ < N, N, N, N, x, x_, y, y_ + SWAP3 + // stack: N, N, N, y_ < N, x, x_, y, y_ + DUP7 + // stack: y , N, N, N, y_ < N, x, x_, y, y_ + LT + // stack: y < N, N, N, y_ < N, x, x_, y, y_ + SWAP2 + // stack: N, N, y < N, y_ < N, x, x_, y, y_ + DUP6 + // stack: x_ , N, N, y < N, y_ < N, x, x_, y, y_ + LT + // stack: x_ < N, N, y < N, y_ < N, x, x_, y, y_ + SWAP1 + // stack: N, x_ < N, y < N, y_ < N, x, x_, y, y_ + DUP5 + // stack: x , N, x_ < N, y < N, y_ < N, x, x_, y, y_ + LT + // stack: x < N, x_ < N, y < N, y_ < N, x, x_, y, y_ + MUL // Cheaper than AND + MUL // Cheaper than AND + MUL // Cheaper than AND + // stack: range, x, x_, y, y_ +%endmacro + +%macro bn_check_twisted_curve + // stack: range, X, Y + %stack (range, X: 2, Y: 2) -> (Y, Y, range, X, Y) + // stack: Y, Y, range, X, Y + %mul_fp254_2 + // stack: Y^2, range, X, Y + %stack () -> (@BN_TWISTED_RE, @BN_TWISTED_IM) + // stack: A, Y^2, range, X, Y + %stack (A: 2, Y2: 2, range, X: 2) -> (X, X, X, A, Y2, range, X) + // stack: X, X, X, A, Y^2, range, X, Y + %mul_fp254_2 + %mul_fp254_2 + // stack: X^3 , A, Y^2, range, X, Y + %add_fp254_2 + // stack: X^3 + A, Y^2, range, X, Y + %eq_fp254_2 + // stack: curve, range, X, Y +%endmacro + +%macro bn_check_twisted_ident + SWAP2 + // stack: a , b , c , d + ISZERO + SWAP3 + // stack: d , b , c , a==0 + ISZERO + SWAP2 + // stack: c , b , d==0, a==0 + ISZERO + SWAP1 + // stack: b , c==0, d==0, a==0 + ISZERO + // stack: b==0, c==0, d==0, a==0 + MUL // Cheaper than AND + MUL // Cheaper than AND + MUL // Cheaper than AND +%endmacro diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm index 5fd47e80..ca32a30d 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/degree_12_mul.asm @@ -2,32 +2,6 @@ ///// GENERAL FP12 MULTIPLICATION ///// /////////////////////////////////////// -/// cost: 1063 - -/// fp254_6 functions: -/// fn | num | ops | cost -/// ------------------------- -/// load | 8 | 40 | 320 -/// store | 5 | 40 | 200 -/// dup | 5 | 6 | 30 -/// swap | 4 | 16 | 64 -/// add | 4 | 16 | 64 -/// subr | 1 | 17 | 17 -/// mul | 3 | 157 | 471 -/// i9 | 1 | 9 | 9 -/// -/// lone stack operations: -/// op | num -/// ------------ -/// ADD | 3 -/// SWAP | 2 -/// DUP | 6 -/// PUSH | 6 -/// POP | 2 -/// JUMP | 6 -/// -/// TOTAL: 1201 - /// inputs: /// F = f + f'z /// G = g + g'z @@ -66,73 +40,73 @@ mul_fp254_12_1: // stack: f'g', g' , f', inA, inB, out %dup_fp254_6_0 // stack: f'g', f'g', g' , f', inA, inB, out - %store_fp254_6_sh(84) - // stack: f'g', g' , f', inA, inB, out {84: sh(f'g')} - %store_fp254_6(90) - // stack: g' , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + %store_fp254_6_sh(60) + // stack: f'g', g' , f', inA, inB, out {60: sh(f'g')} + %store_fp254_6(66) + // stack: g' , f', inA, inB, out {60: sh(f'g'), 66: f'g'} DUP13 - // stack: inA, g' , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + // stack: inA, g' , f', inA, inB, out {60: sh(f'g'), 66: f'g'} DUP15 - // stack: inB, inA, g' , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + // stack: inB, inA, g' , f', inA, inB, out {60: sh(f'g'), 66: f'g'} %load_fp254_6 - // stack: g , inA, g' , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + // stack: g , inA, g' , f', inA, inB, out {60: sh(f'g'), 66: f'g'} %stack (f: 6, x, g: 6) -> (g, x, f) - // stack: g', inA, g , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + // stack: g', inA, g , f', inA, inB, out {60: sh(f'g'), 66: f'g'} %dup_fp254_6_7 - // stack: g,g', inA, g , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + // stack: g,g', inA, g , f', inA, inB, out {60: sh(f'g'), 66: f'g'} %add_fp254_6 - // stack: g+g', inA, g , f', inA, inB, out {84: sh(f'g'), 90: f'g'} + // stack: g+g', inA, g , f', inA, inB, out {60: sh(f'g'), 66: f'g'} %stack (f: 6, x, g: 6) -> (g, x, f) - // stack: g, inA, g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g'} + // stack: g, inA, g+g', f', inA, inB, out {60: sh(f'g'), 66: f'g'} PUSH mul_fp254_12_2 - // stack: mul_fp254_12_2, g, inA, g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g'} + // stack: mul_fp254_12_2, g, inA, g+g', f', inA, inB, out {60: sh(f'g'), 66: f'g'} SWAP7 - // stack: inA, g, mul_fp254_12_2, g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g'} + // stack: inA, g, mul_fp254_12_2, g+g', f', inA, inB, out {60: sh(f'g'), 66: f'g'} %load_fp254_6 - // stack: f, g, mul_fp254_12_2, g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g'} + // stack: f, g, mul_fp254_12_2, g+g', f', inA, inB, out {60: sh(f'g'), 66: f'g'} %jump(mul_fp254_6) mul_fp254_12_2: - // stack: fg, g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g'} - %store_fp254_6(96) - // stack: g+g', f', inA, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: fg, g+g', f', inA, inB, out {60: sh(f'g'), 66: f'g'} + %store_fp254_6(72) + // stack: g+g', f', inA, inB, out {60: sh(f'g'), 66: f'g', 72: fg} %stack (x: 6, y: 6) -> (y, x) - // stack: f', g+g', inA, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: f', g+g', inA, inB, out {60: sh(f'g'), 66: f'g', 72: fg} PUSH mul_fp254_12_3 - // stack: mul_fp254_12_3, f', g+g', inA, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: mul_fp254_12_3, f', g+g', inA, inB, out {60: sh(f'g'), 66: f'g', 72: fg} SWAP13 - // stack: inA, f', g+g', mul_fp254_12_3, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: inA, f', g+g', mul_fp254_12_3, inB, out {60: sh(f'g'), 66: f'g', 72: fg} %load_fp254_6 - // stack: f,f', g+g', mul_fp254_12_3, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: f,f', g+g', mul_fp254_12_3, inB, out {60: sh(f'g'), 66: f'g', 72: fg} %add_fp254_6 - // stack: f+f', g+g', mul_fp254_12_3, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: f+f', g+g', mul_fp254_12_3, inB, out {60: sh(f'g'), 66: f'g', 72: fg} %jump(mul_fp254_6) mul_fp254_12_3: - // stack: (f+f')(g+g'), inB, out {84: sh(f'g'), 90: f'g', 96: fg} - %load_fp254_6(96) - // stack: fg, (f+f')(g+g'), inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: (f+f')(g+g'), inB, out {60: sh(f'g'), 66: f'g', 72: fg} + %load_fp254_6(72) + // stack: fg, (f+f')(g+g'), inB, out {60: sh(f'g'), 66: f'g', 72: fg} %stack (x: 6, y: 6) -> (y, x) - // stack: (f+f')(g+g'), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: (f+f')(g+g'), fg, inB, out {60: sh(f'g'), 66: f'g', 72: fg} %dup_fp254_6_6 - // stack: fg, (f+f')(g+g'), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} - %load_fp254_6(90) - // stack: f'g',fg, (f+f')(g+g'), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: fg, (f+f')(g+g'), fg, inB, out {60: sh(f'g'), 66: f'g', 72: fg} + %load_fp254_6(66) + // stack: f'g',fg, (f+f')(g+g'), fg, inB, out {60: sh(f'g'), 66: f'g', 72: fg} %add_fp254_6 - // stack: f'g'+fg, (f+f')(g+g'), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: f'g'+fg, (f+f')(g+g'), fg, inB, out {60: sh(f'g'), 66: f'g', 72: fg} %subr_fp254_6 - // stack: (f+f')(g+g') - (f'g'+fg), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: (f+f')(g+g') - (f'g'+fg), fg, inB, out {60: sh(f'g'), 66: f'g', 72: fg} DUP14 %add_const(6) - // stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: out', (f+f')(g+g') - (f'g'+fg), fg, inB, out {60: sh(f'g'), 66: f'g', 72: fg} %store_fp254_6 - // stack: fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} - %load_fp254_6(84) - // stack: sh(f'g') , fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: fg, inB, out {60: sh(f'g'), 66: f'g', 72: fg} + %load_fp254_6(60) + // stack: sh(f'g') , fg, inB, out {60: sh(f'g'), 66: f'g', 72: fg} %add_fp254_6 - // stack: sh(f'g') + fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: sh(f'g') + fg, inB, out {60: sh(f'g'), 66: f'g', 72: fg} DUP8 - // stack: out, sh(f'g') + fg, inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: out, sh(f'g') + fg, inB, out {60: sh(f'g'), 66: f'g', 72: fg} %store_fp254_6 - // stack: inB, out {84: sh(f'g'), 90: f'g', 96: fg} + // stack: inB, out {60: sh(f'g'), 66: f'g', 72: fg} %pop2 JUMP @@ -141,29 +115,6 @@ mul_fp254_12_3: ///// SPARSE FP12 MULTIPLICATION ///// ////////////////////////////////////// -/// cost: 645 - -/// fp254_6 functions: -/// fn | num | ops | cost -/// --------------------------- -/// load | 2 | 40 | 80 -/// store | 2 | 40 | 80 -/// dup | 4 | 6 | 24 -/// swap | 4 | 16 | 64 -/// add | 4 | 16 | 64 -/// mul_fp254_ | 2 | 21 | 42 -/// mul_fp254_2 | 4 | 59 | 236 -/// -/// lone stack operations: -/// op | num -/// ------------ -/// ADD | 6 -/// DUP | 9 -/// PUSH | 6 -/// POP | 5 -/// -/// TOTAL: 618 - /// input: /// F = f + f'z /// G = g0 + (G1)t + (G2)tz @@ -277,32 +228,6 @@ global mul_fp254_12_sparse: ///// FP12 SQUARING ///// ///////////////////////// -/// cost: 646 - -/// fp254_6 functions: -/// fn | num | ops | cost -/// ------------------------- -/// load | 2 | 40 | 80 -/// store | 2 | 40 | 80 -/// dup | 2 | 6 | 12 -/// swap | 2 | 16 | 32 -/// add | 1 | 16 | 16 -/// mul | 1 | 157 | 157 -/// sq | 2 | 101 | 202 -/// dbl | 1 | 13 | 13 -/// -/// lone stack operations: -/// op | num -/// ------------ -/// ADD | 3 -/// SWAP | 4 -/// DUP | 5 -/// PUSH | 6 -/// POP | 3 -/// JUMP | 4 -/// -/// TOTAL: - /// input: /// F = f + f'z /// diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm index 6214f385..ce4602a9 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/inverse.asm @@ -32,16 +32,19 @@ global inv_fp254_12: // stack: out, f^-1, inp, out, retdest %store_fp254_12 // stack: inp, out, retdest - %stack (inp, out) -> (inp, out, 72, check_inv_fp254_12) - // stack: inp, out, 72, check_inv_fp254_12, retdest + %stack (inp, out) -> (inp, out, 60, check_inv_fp254_12) + // stack: inp, out, 60, check_inv_fp254_12, retdest %jump(mul_fp254_12) check_inv_fp254_12: // stack: retdest - PUSH 72 + PUSH 60 %load_fp254_12 // stack: unit?, retdest %assert_eq_unit_fp254_12 // stack: retdest + PUSH 0 + // stack: 0, retdest + %mstore_kernel_bn254_pairing(60) JUMP %macro prover_inv_fp254_12 diff --git a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm index af074714..82617e8f 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/field_arithmetic/util.asm @@ -67,6 +67,31 @@ // stack: cx, cy %endmacro +%macro eq_fp254_2 + // stack: x, x_, y, y_ + SWAP3 + // stack: y_, x_, y, x + EQ + // stack: y_==x_, y, x + SWAP2 + // stack: x, y, y_==x_ + EQ + // stack: x==y, y_==x_ + AND +%endmacro + +%macro add_fp254_2 + // stack: x, x_, y, y_ + SWAP3 + // stack: y_, x_, y, x + ADDFP254 + // stack: z_, y, x + SWAP2 + // stack: x, y, z_ + ADDFP254 + // stack: z, z_ +%endmacro + /// Given z = x + iy: Fp254_2, return complex conjugate z': Fp254_2 /// where input is represented z.re, z.im and output as z'.im, z'.re /// cost: 9; note this returns y, x for the output x + yi @@ -116,6 +141,31 @@ // stack: ac - bd, bc + ad %endmacro +// load twisted curve + +%macro load_fp254_4 + // stack: ptr + DUP1 + %add_const(2) + // stack: ind2, ptr + %mload_kernel_bn254_pairing + // stack: x2, ptr + DUP2 + %add_const(1) + // stack: ind1, x2, ptr + %mload_kernel_bn254_pairing + // stack: x1, x2, ptr + DUP3 + %add_const(3) + // stack: ind3, x1, x2, ptr + %mload_kernel_bn254_pairing + // stack: x3, x1, x2, ptr + SWAP3 + // stack: ind0, x1, x2, x3 + %mload_kernel_bn254_pairing + // stack: x0, x1, x2, x3 +%endmacro + // fp254_6 macros %macro load_fp254_6 diff --git a/evm/src/cpu/kernel/constants/mod.rs b/evm/src/cpu/kernel/constants/mod.rs index cf2a1e31..b1486589 100644 --- a/evm/src/cpu/kernel/constants/mod.rs +++ b/evm/src/cpu/kernel/constants/mod.rs @@ -44,6 +44,10 @@ pub fn evm_constants() -> HashMap { c.insert(name.into(), U256::from(value)); } + for (name, value) in SNARKV_POINTERS { + c.insert(name.into(), U256::from(value)); + } + for segment in Segment::all() { c.insert(segment.var_name().into(), (segment as u32).into()); } @@ -87,7 +91,7 @@ const HASH_CONSTANTS: [(&str, [u8; 32]); 2] = [ ), ]; -const EC_CONSTANTS: [(&str, [u8; 32]); 18] = [ +const EC_CONSTANTS: [(&str, [u8; 32]); 20] = [ ( "U256_MAX", hex!("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"), @@ -96,6 +100,14 @@ const EC_CONSTANTS: [(&str, [u8; 32]); 18] = [ "BN_BASE", hex!("30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47"), ), + ( + "BN_TWISTED_RE", + hex!("2b149d40ceb8aaae81be18991be06ac3b5b4c5e559dbefa33267e6dc24a138e5"), + ), + ( + "BN_TWISTED_IM", + hex!("009713b03af0fed4cd2cafadeed8fdf4a74fa084e52d1852e4a2bd0685c315d2"), + ), ( "BN_SCALAR", hex!("30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001"), @@ -232,6 +244,8 @@ const PRECOMPILES_GAS: [(&str, u16); 13] = [ ("BLAKE2_F__GAS", 1), ]; +const SNARKV_POINTERS: [(&str, u64); 2] = [("SNARKV_INP", 112), ("SNARKV_OUT", 100)]; + const CODE_SIZE_LIMIT: [(&str, u64); 3] = [ ("MAX_CODE_SIZE", 0x6000), ("MAX_INITCODE_SIZE", 0xc000), diff --git a/evm/src/cpu/kernel/tests/bn254.rs b/evm/src/cpu/kernel/tests/bn254.rs index 8e71ffd6..548f9789 100644 --- a/evm/src/cpu/kernel/tests/bn254.rs +++ b/evm/src/cpu/kernel/tests/bn254.rs @@ -2,12 +2,12 @@ use anyhow::Result; use ethereum_types::U256; use rand::Rng; -use crate::bn254_pairing::{ - gen_fp12_sparse, invariant_exponent, miller_loop, tate, Curve, TwistedCurve, -}; use crate::cpu::kernel::interpreter::{ run_interpreter_with_memory, Interpreter, InterpreterMemoryInitialization, }; +use crate::curve_pairings::{ + bn_final_exponent, bn_miller_loop, gen_bn_fp12_sparse, Curve, CyclicGroup, +}; use crate::extension_tower::{FieldExt, Fp12, Fp2, Fp6, Stack, BN254}; use crate::memory::segments::Segment::BnPairing; @@ -20,38 +20,31 @@ fn extract_stack(interpreter: Interpreter<'static>) -> Vec { .collect::>() } -fn setup_mul_fp6_test( - f: Fp6, - g: Fp6, - label: &str, -) -> InterpreterMemoryInitialization { +fn run_bn_mul_fp6(f: Fp6, g: Fp6, label: &str) -> Vec { let mut stack = f.on_stack(); if label == "mul_fp254_6" { stack.extend(g.on_stack()); } stack.push(U256::from(0xdeadbeefu32)); - InterpreterMemoryInitialization { + + let setup = InterpreterMemoryInitialization { label: label.to_string(), stack, segment: BnPairing, memory: vec![], - } + }; + let interpreter = run_interpreter_with_memory(setup).unwrap(); + extract_stack(interpreter) } #[test] -fn test_mul_fp6() -> Result<()> { +fn test_bn_mul_fp6() -> Result<()> { let mut rng = rand::thread_rng(); let f: Fp6 = rng.gen::>(); let g: Fp6 = rng.gen::>(); - let setup_normal: InterpreterMemoryInitialization = setup_mul_fp6_test(f, g, "mul_fp254_6"); - let setup_square: InterpreterMemoryInitialization = setup_mul_fp6_test(f, f, "square_fp254_6"); - - let intrptr_normal: Interpreter = run_interpreter_with_memory(setup_normal).unwrap(); - let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap(); - - let out_normal: Vec = extract_stack(intrptr_normal); - let out_square: Vec = extract_stack(intrptr_square); + let out_normal: Vec = run_bn_mul_fp6(f, g, "mul_fp254_6"); + let out_square: Vec = run_bn_mul_fp6(f, f, "square_fp254_6"); let exp_normal: Vec = (f * g).on_stack(); let exp_square: Vec = (f * f).on_stack(); @@ -62,14 +55,10 @@ fn test_mul_fp6() -> Result<()> { Ok(()) } -fn setup_mul_fp12_test( - out: usize, - f: Fp12, - g: Fp12, - label: &str, -) -> InterpreterMemoryInitialization { - let in0: usize = 200; - let in1: usize = 212; +fn run_bn_mul_fp12(f: Fp12, g: Fp12, label: &str) -> Vec { + let in0: usize = 100; + let in1: usize = 112; + let out: usize = 124; let mut stack = vec![ U256::from(in0), @@ -80,37 +69,27 @@ fn setup_mul_fp12_test( if label == "square_fp254_12" { stack.remove(0); } - InterpreterMemoryInitialization { + + let setup = InterpreterMemoryInitialization { label: label.to_string(), stack, segment: BnPairing, memory: vec![(in0, f.on_stack()), (in1, g.on_stack())], - } + }; + let interpreter = run_interpreter_with_memory(setup).unwrap(); + interpreter.extract_kernel_memory(BnPairing, out..out + 12) } #[test] -fn test_mul_fp12() -> Result<()> { - let out: usize = 224; - +fn test_bn_mul_fp12() -> Result<()> { let mut rng = rand::thread_rng(); let f: Fp12 = rng.gen::>(); let g: Fp12 = rng.gen::>(); - let h: Fp12 = gen_fp12_sparse(&mut rng); + let h: Fp12 = gen_bn_fp12_sparse(&mut rng); - let setup_normal: InterpreterMemoryInitialization = - setup_mul_fp12_test(out, f, g, "mul_fp254_12"); - let setup_sparse: InterpreterMemoryInitialization = - setup_mul_fp12_test(out, f, h, "mul_fp254_12_sparse"); - let setup_square: InterpreterMemoryInitialization = - setup_mul_fp12_test(out, f, f, "square_fp254_12"); - - let intrptr_normal: Interpreter = run_interpreter_with_memory(setup_normal).unwrap(); - let intrptr_sparse: Interpreter = run_interpreter_with_memory(setup_sparse).unwrap(); - let intrptr_square: Interpreter = run_interpreter_with_memory(setup_square).unwrap(); - - let out_normal: Vec = intrptr_normal.extract_kernel_memory(BnPairing, out..out + 12); - let out_sparse: Vec = intrptr_sparse.extract_kernel_memory(BnPairing, out..out + 12); - let out_square: Vec = intrptr_square.extract_kernel_memory(BnPairing, out..out + 12); + let out_normal: Vec = run_bn_mul_fp12(f, g, "mul_fp254_12"); + let out_sparse: Vec = run_bn_mul_fp12(f, h, "mul_fp254_12_sparse"); + let out_square: Vec = run_bn_mul_fp12(f, f, "square_fp254_12"); let exp_normal: Vec = (f * g).on_stack(); let exp_sparse: Vec = (f * h).on_stack(); @@ -123,57 +102,58 @@ fn test_mul_fp12() -> Result<()> { Ok(()) } -fn setup_frob_fp6_test(f: Fp6, n: usize) -> InterpreterMemoryInitialization { - InterpreterMemoryInitialization { - label: String::from("test_frob_fp254_6_") + &(n.to_string()), +fn run_bn_frob_fp6(f: Fp6, n: usize) -> Vec { + let setup = InterpreterMemoryInitialization { + label: format!("test_frob_fp254_6_{}", n), stack: f.on_stack(), segment: BnPairing, memory: vec![], - } + }; + let interpreter = run_interpreter_with_memory(setup).unwrap(); + extract_stack(interpreter) } #[test] -fn test_frob_fp6() -> Result<()> { +fn test_bn_frob_fp6() -> Result<()> { let mut rng = rand::thread_rng(); let f: Fp6 = rng.gen::>(); for n in 1..4 { - let setup_frob = setup_frob_fp6_test(f, n); - let intrptr_frob: Interpreter = run_interpreter_with_memory(setup_frob).unwrap(); - let out_frob: Vec = extract_stack(intrptr_frob); - let exp_frob: Vec = f.frob(n).on_stack(); - assert_eq!(out_frob, exp_frob); + let output: Vec = run_bn_frob_fp6(f, n); + let expected: Vec = f.frob(n).on_stack(); + assert_eq!(output, expected); } Ok(()) } -fn setup_frob_fp12_test(ptr: usize, f: Fp12, n: usize) -> InterpreterMemoryInitialization { - InterpreterMemoryInitialization { - label: String::from("test_frob_fp254_12_") + &(n.to_string()), +fn run_bn_frob_fp12(f: Fp12, n: usize) -> Vec { + let ptr: usize = 100; + let setup = InterpreterMemoryInitialization { + label: format!("test_frob_fp254_12_{}", n), stack: vec![U256::from(ptr)], segment: BnPairing, memory: vec![(ptr, f.on_stack())], - } + }; + let interpreter = run_interpreter_with_memory(setup).unwrap(); + interpreter.extract_kernel_memory(BnPairing, ptr..ptr + 12) } #[test] -fn test_frob_fp12() -> Result<()> { - let ptr: usize = 200; +fn test_bn_frob_fp12() -> Result<()> { let mut rng = rand::thread_rng(); let f: Fp12 = rng.gen::>(); + for n in [1, 2, 3, 6] { - let setup_frob = setup_frob_fp12_test(ptr, f, n); - let intrptr_frob: Interpreter = run_interpreter_with_memory(setup_frob).unwrap(); - let out_frob: Vec = intrptr_frob.extract_kernel_memory(BnPairing, ptr..ptr + 12); - let exp_frob: Vec = f.frob(n).on_stack(); - assert_eq!(out_frob, exp_frob); + let output = run_bn_frob_fp12(f, n); + let expected: Vec = f.frob(n).on_stack(); + assert_eq!(output, expected); } Ok(()) } #[test] -fn test_inv_fp12() -> Result<()> { - let ptr: usize = 200; - let inv: usize = 212; +fn test_bn_inv_fp12() -> Result<()> { + let ptr: usize = 100; + let inv: usize = 112; let mut rng = rand::thread_rng(); let f: Fp12 = rng.gen::>(); @@ -193,101 +173,54 @@ fn test_inv_fp12() -> Result<()> { } #[test] -fn test_invariant_exponent() -> Result<()> { - let ptr: usize = 200; +fn test_bn_final_exponent() -> Result<()> { + let ptr: usize = 100; + let mut rng = rand::thread_rng(); let f: Fp12 = rng.gen::>(); let setup = InterpreterMemoryInitialization { - label: "bn254_invariant_exponent".to_string(), - stack: vec![U256::from(ptr), U256::from(0xdeadbeefu32)], + label: "bn254_final_exponent".to_string(), + stack: vec![ + U256::zero(), + U256::zero(), + U256::from(ptr), + U256::from(0xdeadbeefu32), + ], segment: BnPairing, memory: vec![(ptr, f.on_stack())], }; let interpreter: Interpreter = run_interpreter_with_memory(setup).unwrap(); let output: Vec = interpreter.extract_kernel_memory(BnPairing, ptr..ptr + 12); - let expected: Vec = invariant_exponent(f).on_stack(); + let expected: Vec = bn_final_exponent(f).on_stack(); assert_eq!(output, expected); Ok(()) } -// The curve is cyclic with generator (1, 2) -pub const CURVE_GENERATOR: Curve = { - Curve { - x: BN254 { val: U256::one() }, - y: BN254 { - val: U256([2, 0, 0, 0]), - }, - } -}; - -// The twisted curve is cyclic with generator (x, y) as follows -pub const TWISTED_GENERATOR: TwistedCurve = { - TwistedCurve { - x: Fp2 { - re: BN254 { - val: U256([ - 0x46debd5cd992f6ed, - 0x674322d4f75edadd, - 0x426a00665e5c4479, - 0x1800deef121f1e76, - ]), - }, - im: BN254 { - val: U256([ - 0x97e485b7aef312c2, - 0xf1aa493335a9e712, - 0x7260bfb731fb5d25, - 0x198e9393920d483a, - ]), - }, - }, - y: Fp2 { - re: BN254 { - val: U256([ - 0x4ce6cc0166fa7daa, - 0xe3d1e7690c43d37b, - 0x4aab71808dcb408f, - 0x12c85ea5db8c6deb, - ]), - }, - im: BN254 { - val: U256([ - 0x55acdadcd122975b, - 0xbc4b313370b38ef3, - 0xec9e99ad690c3395, - 0x090689d0585ff075, - ]), - }, - }, - } -}; - #[test] -fn test_miller() -> Result<()> { - let ptr: usize = 200; - let out: usize = 206; - let inputs: Vec = vec![ - CURVE_GENERATOR.x.val, - CURVE_GENERATOR.y.val, - TWISTED_GENERATOR.x.re.val, - TWISTED_GENERATOR.x.im.val, - TWISTED_GENERATOR.y.re.val, - TWISTED_GENERATOR.y.im.val, - ]; +fn test_bn_miller() -> Result<()> { + let ptr: usize = 100; + let out: usize = 106; + + let mut rng = rand::thread_rng(); + let p: Curve = rng.gen::>(); + let q: Curve> = rng.gen::>>(); + + let mut input = p.on_stack(); + input.extend(q.on_stack()); let setup = InterpreterMemoryInitialization { label: "bn254_miller".to_string(), stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)], segment: BnPairing, - memory: vec![(ptr, inputs)], + memory: vec![(ptr, input)], }; let interpreter = run_interpreter_with_memory(setup).unwrap(); let output: Vec = interpreter.extract_kernel_memory(BnPairing, out..out + 12); - let expected = miller_loop(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack(); + let expected = bn_miller_loop(p, q).on_stack(); assert_eq!(output, expected); @@ -295,29 +228,41 @@ fn test_miller() -> Result<()> { } #[test] -fn test_tate() -> Result<()> { - let ptr: usize = 200; - let out: usize = 206; - let inputs: Vec = vec![ - CURVE_GENERATOR.x.val, - CURVE_GENERATOR.y.val, - TWISTED_GENERATOR.x.re.val, - TWISTED_GENERATOR.x.im.val, - TWISTED_GENERATOR.y.re.val, - TWISTED_GENERATOR.y.im.val, - ]; +fn test_bn_pairing() -> Result<()> { + let out: usize = 100; + let ptr: usize = 112; + + let mut rng = rand::thread_rng(); + let k: usize = rng.gen_range(1..10); + let mut acc: i32 = 0; + let mut input: Vec = vec![]; + for _ in 1..k { + let m: i32 = rng.gen_range(-8..8); + let n: i32 = rng.gen_range(-8..8); + acc -= m * n; + + let p: Curve = Curve::::int(m); + let q: Curve> = Curve::>::int(n); + input.extend(p.on_stack()); + input.extend(q.on_stack()); + } + let p: Curve = Curve::::int(acc); + let q: Curve> = Curve::>::GENERATOR; + input.extend(p.on_stack()); + input.extend(q.on_stack()); let setup = InterpreterMemoryInitialization { - label: "bn254_tate".to_string(), - stack: vec![U256::from(ptr), U256::from(out), U256::from(0xdeadbeefu32)], + label: "bn254_pairing".to_string(), + stack: vec![ + U256::from(k), + U256::from(ptr), + U256::from(out), + U256::from(0xdeadbeefu32), + ], segment: BnPairing, - memory: vec![(ptr, inputs)], + memory: vec![(ptr, input)], }; let interpreter = run_interpreter_with_memory(setup).unwrap(); - let output: Vec = interpreter.extract_kernel_memory(BnPairing, out..out + 12); - let expected = tate(CURVE_GENERATOR, TWISTED_GENERATOR).on_stack(); - - assert_eq!(output, expected); - + assert_eq!(interpreter.stack()[0], U256::one()); Ok(()) } diff --git a/evm/src/bn254_pairing.rs b/evm/src/curve_pairings.rs similarity index 63% rename from evm/src/bn254_pairing.rs rename to evm/src/curve_pairings.rs index 7277c2a8..708e7fb2 100644 --- a/evm/src/bn254_pairing.rs +++ b/evm/src/curve_pairings.rs @@ -1,25 +1,75 @@ -use std::ops::Add; +use std::ops::{Add, Mul, Neg}; +use ethereum_types::U256; +use rand::distributions::Standard; +use rand::prelude::Distribution; use rand::Rng; -use crate::extension_tower::{FieldExt, Fp12, Fp2, Fp6, BN254}; +use crate::extension_tower::{FieldExt, Fp12, Fp2, Fp6, Stack, BN254}; -// The curve consists of pairs (x, y): (BN254, BN254) | y^2 = x^3 + 2 #[derive(Debug, Copy, Clone, PartialEq)] -pub struct Curve { - pub x: BN254, - pub y: BN254, +pub struct Curve +where + T: FieldExt, +{ + pub x: T, + pub y: T, +} + +impl Curve { + pub fn unit() -> Self { + Curve { + x: T::ZERO, + y: T::ZERO, + } + } +} + +impl Curve { + pub fn on_stack(self) -> Vec { + let mut stack = self.x.on_stack(); + stack.extend(self.y.on_stack()); + stack + } +} + +impl Curve +where + T: FieldExt, + Curve: CyclicGroup, +{ + pub fn int(z: i32) -> Self { + Curve::::GENERATOR * z + } +} + +impl Distribution> for Standard +where + T: FieldExt, + Curve: CyclicGroup, +{ + fn sample(&self, rng: &mut R) -> Curve { + Curve::::GENERATOR * rng.gen::() + } } /// Standard addition formula for elliptic curves, restricted to the cases -/// where neither inputs nor output would ever be the identity O. source: /// https://en.wikipedia.org/wiki/Elliptic_curve#Algebraic_interpretation -impl Add for Curve { +impl Add for Curve { type Output = Self; fn add(self, other: Self) -> Self { + if self == Curve::::unit() { + return other; + } + if other == Curve::::unit() { + return self; + } + if self == -other { + return Curve::::unit(); + } let m = if self == other { - BN254::new(3) * self.x * self.x / (BN254::new(2) * self.y) + T::new(3) * self.x * self.x / (T::new(2) * self.y) } else { (other.y - self.y) / (other.x - self.x) }; @@ -31,33 +81,130 @@ impl Add for Curve { } } -// The twisted curve consists of pairs (x, y): (Fp2, Fp2) | y^2 = x^3 + 3/(9 + i) -#[derive(Debug, Copy, Clone, PartialEq)] -pub struct TwistedCurve { - pub x: Fp2, - pub y: Fp2, +impl Neg for Curve { + type Output = Curve; + + fn neg(self) -> Self { + Curve { + x: self.x, + y: -self.y, + } + } +} + +pub trait CyclicGroup { + const GENERATOR: Self; +} + +/// The BN curve consists of pairs +/// (x, y): (BN254, BN254) | y^2 = x^3 + 2 +// with generator given by (1, 2) +impl CyclicGroup for Curve { + const GENERATOR: Curve = Curve { + x: BN254 { val: U256::one() }, + y: BN254 { + val: U256([2, 0, 0, 0]), + }, + }; +} + +impl Mul for Curve +where + T: FieldExt, + Curve: CyclicGroup, +{ + type Output = Curve; + + fn mul(self, other: i32) -> Self { + if other == 0 { + return Curve::::unit(); + } + if self == Curve::::unit() { + return Curve::::unit(); + } + + let mut x: Curve = self; + if other.is_negative() { + x = -x; + } + let mut result = Curve::::unit(); + + let mut exp = other.unsigned_abs() as usize; + while exp > 0 { + if exp % 2 == 1 { + result = result + x; + } + exp >>= 1; + x = x + x; + } + result + } +} + +/// The twisted curve consists of pairs +/// (x, y): (Fp2, Fp2) | y^2 = x^3 + 3/(9 + i) +/// with generator given as follows +impl CyclicGroup for Curve> { + const GENERATOR: Curve> = Curve { + x: Fp2 { + re: BN254 { + val: U256([ + 0x46debd5cd992f6ed, + 0x674322d4f75edadd, + 0x426a00665e5c4479, + 0x1800deef121f1e76, + ]), + }, + im: BN254 { + val: U256([ + 0x97e485b7aef312c2, + 0xf1aa493335a9e712, + 0x7260bfb731fb5d25, + 0x198e9393920d483a, + ]), + }, + }, + y: Fp2 { + re: BN254 { + val: U256([ + 0x4ce6cc0166fa7daa, + 0xe3d1e7690c43d37b, + 0x4aab71808dcb408f, + 0x12c85ea5db8c6deb, + ]), + }, + im: BN254 { + val: U256([ + 0x55acdadcd122975b, + 0xbc4b313370b38ef3, + 0xec9e99ad690c3395, + 0x090689d0585ff075, + ]), + }, + }, + }; } // The tate pairing takes a point each from the curve and its twist and outputs an Fp12 element -pub fn tate(p: Curve, q: TwistedCurve) -> Fp12 { - let miller_output = miller_loop(p, q); - invariant_exponent(miller_output) +pub fn bn_tate(p: Curve, q: Curve>) -> Fp12 { + let miller_output = bn_miller_loop(p, q); + bn_final_exponent(miller_output) } /// Standard code for miller loop, can be found on page 99 at this url: /// https://static1.squarespace.com/static/5fdbb09f31d71c1227082339/t/5ff394720493bd28278889c6/1609798774687/PairingsForBeginners.pdf#page=107 -/// where EXP is a hardcoding of the array of Booleans that the loop traverses -pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 { +/// where BN_EXP is a hardcoding of the array of Booleans that the loop traverses +pub fn bn_miller_loop(p: Curve, q: Curve>) -> Fp12 { let mut r = p; let mut acc: Fp12 = Fp12::::UNIT; let mut line: Fp12; - for i in EXP { - line = tangent(r, q); + for i in BN_EXP { + line = bn_tangent(r, q); r = r + r; acc = line * acc * acc; if i { - line = cord(p, r, q); + line = bn_cord(p, r, q); r = r + p; acc = line * acc; } @@ -66,22 +213,22 @@ pub fn miller_loop(p: Curve, q: TwistedCurve) -> Fp12 { } /// The sloped line function for doubling a point -pub fn tangent(p: Curve, q: TwistedCurve) -> Fp12 { +pub fn bn_tangent(p: Curve, q: Curve>) -> Fp12 { let cx = -BN254::new(3) * p.x * p.x; let cy = BN254::new(2) * p.y; - sparse_embed(p.y * p.y - BN254::new(9), q.x * cx, q.y * cy) + bn_sparse_embed(p.y * p.y - BN254::new(9), q.x * cx, q.y * cy) } /// The sloped line function for adding two points -pub fn cord(p1: Curve, p2: Curve, q: TwistedCurve) -> Fp12 { +pub fn bn_cord(p1: Curve, p2: Curve, q: Curve>) -> Fp12 { let cx = p2.y - p1.y; let cy = p1.x - p2.x; - sparse_embed(p1.y * p2.x - p2.y * p1.x, q.x * cx, q.y * cy) + bn_sparse_embed(p1.y * p2.x - p2.y * p1.x, q.x * cx, q.y * cy) } /// The tangent and cord functions output sparse Fp12 elements. /// This map embeds the nonzero coefficients into an Fp12. -pub fn sparse_embed(g000: BN254, g01: Fp2, g11: Fp2) -> Fp12 { +pub fn bn_sparse_embed(g000: BN254, g01: Fp2, g11: Fp2) -> Fp12 { let g0 = Fp6 { t0: Fp2 { re: g000, @@ -100,8 +247,8 @@ pub fn sparse_embed(g000: BN254, g01: Fp2, g11: Fp2) -> Fp12(rng: &mut R) -> Fp12 { - sparse_embed( +pub fn gen_bn_fp12_sparse(rng: &mut R) -> Fp12 { + bn_sparse_embed( rng.gen::(), rng.gen::>(), rng.gen::>(), @@ -120,10 +267,10 @@ pub fn gen_fp12_sparse(rng: &mut R) -> Fp12 { /// (p^4 - p^2 + 1)/N = p^3 + (a2)p^2 - (a1)p - a0 /// where 0 < a0, a1, a2 < p. Then the final power is given by /// y = y_3 * (y^a2)_2 * (y^-a1)_1 * (y^-a0) -pub fn invariant_exponent(f: Fp12) -> Fp12 { +pub fn bn_final_exponent(f: Fp12) -> Fp12 { let mut y = f.frob(6) / f; y = y.frob(2) * y; - let (y_a2, y_a1, y_a0) = get_custom_powers(y); + let (y_a2, y_a1, y_a0) = get_bn_custom_powers(y); y.frob(3) * y_a2.frob(2) * y_a1.frob(1) * y_a0 } @@ -135,10 +282,10 @@ pub fn invariant_exponent(f: Fp12) -> Fp12 { /// y^a2, y^a1 = y^a4 * y^a2 * y^a2 * y^(-a0), y^(-a0) /// /// Representing a4, a2, a0 in *little endian* binary, define -/// EXPS4 = [(a4[i], a2[i], a0[i]) for i in 0..len(a4)] -/// EXPS2 = [ (a2[i], a0[i]) for i in len(a4)..len(a2)] -/// EXPS0 = [ a0[i] for i in len(a2)..len(a0)] -fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) { +/// BN_EXPS4 = [(a4[i], a2[i], a0[i]) for i in 0..len(a4)] +/// BN_EXPS2 = [ (a2[i], a0[i]) for i in len(a4)..len(a2)] +/// BN_EXPS0 = [ a0[i] for i in len(a2)..len(a0)] +fn get_bn_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) { let mut sq: Fp12 = f; let mut y0: Fp12 = Fp12::::UNIT; let mut y2: Fp12 = Fp12::::UNIT; @@ -147,7 +294,7 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) // proceed via standard squaring algorithm for exponentiation // must keep multiplying all three values: a4, a2, a0 - for (a, b, c) in EXPS4 { + for (a, b, c) in BN_EXPS4 { if a { y4 = y4 * sq; } @@ -163,7 +310,7 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) y4 = y4 * sq; // must keep multiplying remaining two values: a2, a0 - for (a, b) in EXPS2 { + for (a, b) in BN_EXPS2 { if a { y2 = y2 * sq; } @@ -176,7 +323,7 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) y2 = y2 * sq; // must keep multiplying final remaining value: a0 - for a in EXPS0 { + for a in BN_EXPS0 { if a { y0 = y0 * sq; } @@ -192,7 +339,7 @@ fn get_custom_powers(f: Fp12) -> (Fp12, Fp12, Fp12) (y2, y4 * y2 * y2 * y0_inv, y0_inv) } -const EXP: [bool; 253] = [ +const BN_EXP: [bool; 253] = [ true, false, false, false, false, false, true, true, false, false, true, false, false, false, true, false, false, true, true, true, false, false, true, true, true, false, false, true, false, true, true, true, false, false, false, false, true, false, false, true, true, false, @@ -216,7 +363,7 @@ const EXP: [bool; 253] = [ // The folowing constants are defined above get_custom_powers -const EXPS4: [(bool, bool, bool); 64] = [ +const BN_EXPS4: [(bool, bool, bool); 64] = [ (true, true, false), (true, true, true), (true, true, true), @@ -283,7 +430,7 @@ const EXPS4: [(bool, bool, bool); 64] = [ (true, true, true), ]; -const EXPS2: [(bool, bool); 62] = [ +const BN_EXPS2: [(bool, bool); 62] = [ (true, false), (true, true), (false, false), @@ -348,7 +495,7 @@ const EXPS2: [(bool, bool); 62] = [ (true, true), ]; -const EXPS0: [bool; 65] = [ +const BN_EXPS0: [bool; 65] = [ false, false, true, false, false, true, true, false, true, false, true, true, true, false, true, false, false, false, true, false, false, true, false, true, false, true, true, false, false, false, false, false, true, false, true, false, true, true, true, false, false, true, diff --git a/evm/src/extension_tower.rs b/evm/src/extension_tower.rs index ddcfe254..0e654c88 100644 --- a/evm/src/extension_tower.rs +++ b/evm/src/extension_tower.rs @@ -1,4 +1,4 @@ -use std::mem::transmute; +use std::fmt::Debug; use std::ops::{Add, Div, Mul, Neg, Sub}; use ethereum_types::{U256, U512}; @@ -7,6 +7,8 @@ use rand::Rng; pub trait FieldExt: Copy + + std::fmt::Debug + + std::cmp::PartialEq + std::ops::Add + std::ops::Neg + std::ops::Sub @@ -15,6 +17,7 @@ pub trait FieldExt: { const ZERO: Self; const UNIT: Self; + fn new(val: usize) -> Self; fn inv(self) -> Self; } @@ -30,14 +33,6 @@ pub struct BN254 { pub val: U256, } -impl BN254 { - pub fn new(val: usize) -> BN254 { - BN254 { - val: U256::from(val), - } - } -} - impl Distribution for Standard { fn sample(&self, rng: &mut R) -> BN254 { let xs = rng.gen::<[u64; 4]>(); @@ -91,6 +86,11 @@ impl Mul for BN254 { impl FieldExt for BN254 { const ZERO: Self = BN254 { val: U256::zero() }; const UNIT: Self = BN254 { val: U256::one() }; + fn new(val: usize) -> BN254 { + BN254 { + val: U256::from(val), + } + } fn inv(self) -> BN254 { let exp = BN_BASE - 2; let mut current = self; @@ -131,12 +131,6 @@ pub struct BLS381 { } impl BLS381 { - pub fn new(val: usize) -> BLS381 { - BLS381 { - val: U512::from(val), - } - } - pub fn lo(self) -> U256 { U256(self.val.0[..4].try_into().unwrap()) } @@ -234,6 +228,11 @@ impl Mul for BLS381 { impl FieldExt for BLS381 { const ZERO: Self = BLS381 { val: U512::zero() }; const UNIT: Self = BLS381 { val: U512::one() }; + fn new(val: usize) -> BLS381 { + BLS381 { + val: U512::from(val), + } + } fn inv(self) -> BLS381 { let exp = BLS_BASE - 2; let mut current = self; @@ -365,6 +364,14 @@ impl FieldExt for Fp2 { re: T::UNIT, im: T::ZERO, }; + + fn new(val: usize) -> Fp2 { + Fp2 { + re: T::new(val), + im: T::ZERO, + } + } + /// The inverse of z is given by z'/||z||^2 since ||z||^2 = zz' fn inv(self) -> Fp2 { let norm_sq = self.norm_sq(); @@ -975,6 +982,14 @@ where t2: Fp2::::ZERO, }; + fn new(val: usize) -> Fp6 { + Fp6 { + t0: Fp2::::new(val), + t1: Fp2::::ZERO, + t2: Fp2::::ZERO, + } + } + /// Let x_n = x^(p^n) and note that /// x_0 = x^(p^0) = x^1 = x /// (x_n)_m = (x^(p^n))^(p^m) = x^(p^n * p^m) = x^(p^(n+m)) = x_{n+m} @@ -1040,6 +1055,13 @@ where z1: Fp6::::ZERO, }; + fn new(val: usize) -> Fp12 { + Fp12 { + z0: Fp6::::new(val), + z1: Fp6::::ZERO, + } + } + /// By Galois Theory, given x: Fp12, the product /// phi = Prod_{i=0}^11 x_i /// lands in BN254, and hence the inverse of x is given by @@ -1204,22 +1226,51 @@ pub trait Stack { fn on_stack(self) -> Vec; } +impl Stack for BN254 { + fn on_stack(self) -> Vec { + vec![self.val] + } +} + impl Stack for BLS381 { fn on_stack(self) -> Vec { vec![self.lo(), self.hi()] } } -impl Stack for Fp6 { +impl Stack for Fp2 +where + T: FieldExt + Stack, +{ fn on_stack(self) -> Vec { - let f: [U256; 6] = unsafe { transmute(self) }; - f.into_iter().collect() + let mut stack = self.re.on_stack(); + stack.extend(self.im.on_stack()); + stack } } -impl Stack for Fp12 { +impl Stack for Fp6 +where + T: FieldExt, + Fp2: Adj + Stack, +{ fn on_stack(self) -> Vec { - let f: [U256; 12] = unsafe { transmute(self) }; - f.into_iter().collect() + let mut stack = self.t0.on_stack(); + stack.extend(self.t1.on_stack()); + stack.extend(self.t2.on_stack()); + stack + } +} + +impl Stack for Fp12 +where + T: FieldExt, + Fp2: Adj, + Fp6: Stack, +{ + fn on_stack(self) -> Vec { + let mut stack = self.z0.on_stack(); + stack.extend(self.z1.on_stack()); + stack } } diff --git a/evm/src/lib.rs b/evm/src/lib.rs index dc07d233..02730321 100644 --- a/evm/src/lib.rs +++ b/evm/src/lib.rs @@ -9,11 +9,11 @@ pub mod all_stark; pub mod arithmetic; -pub mod bn254_pairing; pub mod config; pub mod constraint_consumer; pub mod cpu; pub mod cross_table_lookup; +pub mod curve_pairings; pub mod extension_tower; pub mod fixed_recursive_verifier; pub mod generation; diff --git a/evm/src/witness/util.rs b/evm/src/witness/util.rs index a5ebf2ac..94e13e50 100644 --- a/evm/src/witness/util.rs +++ b/evm/src/witness/util.rs @@ -45,9 +45,7 @@ pub(crate) fn kernel_peek( segment: Segment, virt: usize, ) -> U256 { - state - .memory - .get(MemoryAddress::new(state.registers.context, segment, virt)) + state.memory.get(MemoryAddress::new(0, segment, virt)) } pub(crate) fn mem_read_with_log(