diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index bdef51f7..ac0d6f7c 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -10,6 +10,9 @@ pub(crate) fn combined_kernel() -> Kernel { let files = vec![ include_str!("asm/basic_macros.asm"), include_str!("asm/exp.asm"), + include_str!("asm/curve_mul.asm"), + include_str!("asm/curve_add.asm"), + include_str!("asm/moddiv.asm"), include_str!("asm/storage_read.asm"), include_str!("asm/storage_write.asm"), ]; diff --git a/evm/src/cpu/kernel/asm/curve_add.asm b/evm/src/cpu/kernel/asm/curve_add.asm new file mode 100644 index 00000000..fdbbf997 --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve_add.asm @@ -0,0 +1,397 @@ +// #define N 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 // BN254 base field order + +// BN254 elliptic curve addition. +// Uses the standard affine addition formula. +global ec_add: + // Uncomment for test inputs. + // PUSH 0xdeadbeef + // PUSH 2 + // PUSH 1 + // PUSH 0x1bf9384aa3f0b3ad763aee81940cacdde1af71617c06f46e11510f14f3d5d121 + // PUSH 0xe7313274bb29566ff0c8220eb9841de1d96c2923c6a4028f7dd3c6a14cee770 + JUMPDEST + // stack: x0, y0, x1, y1, retdest + + // Check if points are valid BN254 points. + DUP2 + // stack: y0, x0, y0, x1, y1, retdest + DUP2 + // stack: x0, y0, x0, y0, x1, y1, retdest + %ec_check + // stack: isValid(x0, y0), x0, y0, x1, y1, retdest + DUP5 + // stack: x1, isValid(x0, y0), x0, y0, x1, y1, retdest + DUP5 + // stack: x1, y1, isValid(x0, y0), x0, y0, x1, y1, retdest + %ec_check + // stack: isValid(x1, y1), isValid(x0, y0), x0, y0, x1, y1, retdest + AND + // stack: isValid(x1, y1) & isValid(x0, y0), x0, y0, x1, y1, retdest + PUSH ec_add_valid_points + // stack: ec_add_valid_points, isValid(x1, y1) & isValid(x0, y0), x0, y0, x1, y1, retdest + JUMPI + // stack: x0, y0, x1, y1, retdest + + // Otherwise return + POP + // stack: y0, x1, y1, retdest + POP + // stack: x1, y1, retdest + POP + // stack: y1, retdest + POP + // stack: retdest + %ec_invalid_input + +// BN254 elliptic curve addition. +// Assumption: (x0,y0) and (x1,y1) are valid points. +global ec_add_valid_points: + JUMPDEST + // stack: x0, y0, x1, y1, retdest + + // Check if the first point is the identity. + DUP2 + // stack: y0, x0, y0, x1, y1, retdest + DUP2 + // stack: x0, y0, x0, y0, x1, y1, retdest + %ec_isidentity + // stack: (x0,y0)==(0,0), x0, y0, x1, y1, retdest + PUSH ec_add_first_zero + // stack: ec_add_first_zero, (x0,y0)==(0,0), x0, y0, x1, y1, retdest + JUMPI + // stack: x0, y0, x1, y1, retdest + + // Check if the first point is the identity. + DUP4 + // stack: y1, x0, y0, x1, y1, retdest + DUP4 + // stack: x1, y1, x0, y0, x1, y1, retdest + %ec_isidentity + // stack: (x1,y1)==(0,0), x0, y0, x1, y1, retdest + PUSH ec_add_snd_zero + // stack: ec_add_snd_zero, (x1,y1)==(0,0), x0, y0, x1, y1, retdest + JUMPI + // stack: x0, y0, x1, y1, retdest + + // Check if both points have the same x-coordinate. + DUP3 + // stack: x1, x0, y0, x1, y1, retdest + DUP2 + // stack: x0, x1, x0, y0, x1, y1, retdest + EQ + // stack: x0 == x1, x0, y0, x1, y1, retdest + PUSH ec_add_equal_first_coord + // stack: ec_add_equal_first_coord, x0 == x1, x0, y0, x1, y1, retdest + JUMPI + // stack: x0, y0, x1, y1, retdest + + // Otherwise, we can use the standard formula. + // Compute lambda = (y0 - y1)/(x0 - x1) + DUP4 + // stack: y1, x0, y0, x1, y1, retdest + DUP3 + // stack: y0, y1, x0, y0, x1, y1, retdest + %submod + // stack: y0 - y1, x0, y0, x1, y1, retdest + DUP4 + // stack: x1, y0 - y1, x0, y0, x1, y1, retdest + DUP3 + // stack: x0, x1, y0 - y1, x0, y0, x1, y1, retdest + %submod + // stack: x0 - x1, y0 - y1, x0, y0, x1, y1, retdest + %moddiv + // stack: lambda, x0, y0, x1, y1, retdest + PUSH ec_add_valid_points_with_lambda + // stack: ec_add_valid_points_with_lambda, lambda, x0, y0, x1, y1, retdest + JUMP + +// BN254 elliptic curve addition. +// Assumption: (x0,y0) == (0,0) +ec_add_first_zero: + JUMPDEST + // stack: x0, y0, x1, y1, retdest + + // Just return (x1,y1) + POP + // stack: y0, x1, y1, retdest + POP + // stack: x1, y1, retdest + SWAP1 + // stack: y1, x1, retdest + SWAP2 + // stack: retdest, x1, y1 + JUMP + +// BN254 elliptic curve addition. +// Assumption: (x1,y1) == (0,0) +ec_add_snd_zero: + JUMPDEST + // stack: x0, y0, x1, y1, retdest + + // Just return (x1,y1) + SWAP2 + // stack: x1, y0, x0, y1, retdest + POP + // stack: y0, x0, y1, retdest + SWAP2 + // stack: y1, x0, y0, retdest + POP + // stack: x0, y0, retdest + SWAP1 + // stack: y0, x0, retdest + SWAP2 + // stack: retdest, x0, y0 + JUMP + +// BN254 elliptic curve addition. +// Assumption: lambda = (y0 - y1)/(x0 - x1) +ec_add_valid_points_with_lambda: + JUMPDEST + // stack: lambda, x0, y0, x1, y1, retdest + + // Compute x2 = lambda^2 - x1 - x0 + DUP2 + // stack: x0, lambda, x0, y0, x1, y1, retdest + DUP5 + // stack: x1, x0, lambda, x0, y0, x1, y1, retdest + %bn_base + // stack: N, x1, x0, lambda, x0, y0, x1, y1, retdest + DUP4 + // stack: lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest + DUP1 + // stack: lambda, lambda, N, x1, x0, lambda, x0, y0, x1, y1, retdest + MULMOD + // stack: lambda^2, x1, x0, lambda, x0, y0, x1, y1, retdest + %submod + // stack: lambda^2 - x1, x0, lambda, x0, y0, x1, y1, retdest + %submod + // stack: x2, lambda, x0, y0, x1, y1, retdest + + // Compute y2 = lambda*(x1 - x2) - y1 + %bn_base + // stack: N, x2, lambda, x0, y0, x1, y1, retdest + DUP2 + // stack: x2, N, x2, lambda, x0, y0, x1, y1, retdest + DUP7 + // stack: x1, x2, N, x2, lambda, x0, y0, x1, y1, retdest + %submod + // stack: x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest + DUP4 + // stack: lambda, x1 - x2, N, x2, lambda, x0, y0, x1, y1, retdest + MULMOD + // stack: lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest + DUP7 + // stack: y1, lambda * (x1 - x2), x2, lambda, x0, y0, x1, y1, retdest + SWAP1 + // stack: lambda * (x1 - x2), y1, x2, lambda, x0, y0, x1, y1, retdest + %submod + // stack: y2, x2, lambda, x0, y0, x1, y1, retdest + + // Return x2,y2 + SWAP5 + // stack: x1, x2, lambda, x0, y0, y2, y1, retdest + POP + // stack: x2, lambda, x0, y0, y2, y1, retdest + SWAP5 + // stack: y1, lambda, x0, y0, y2, x2, retdest + POP + // stack: lambda, x0, y0, y2, x2, retdest + POP + // stack: x0, y0, y2, x2, retdest + POP + // stack: y0, y2, x2, retdest + POP + // stack: y2, x2, retdest + SWAP2 + // stack: retdest, x2, y2 + JUMP + +// BN254 elliptic curve addition. +// Assumption: (x0,y0) and (x1,y1) are valid points and x0 == x1 +ec_add_equal_first_coord: + JUMPDEST + // stack: x0, y0, x1, y1, retdest with x0 == x1 + + // Check if the points are equal + DUP2 + // stack: y0, x0, y0, x1, y1, retdest + DUP5 + // stack: y1, y0, x0, y0, x1, y1, retdest + EQ + // stack: y1 == y0, x0, y0, x1, y1, retdest + PUSH ec_add_equal_points + // stack: ec_add_equal_points, y1 == y0, x0, y0, x1, y1, retdest + JUMPI + // stack: x0, y0, x1, y1, retdest + + // Otherwise, one is the negation of the other so we can return (0,0). + POP + // stack: y0, x1, y1, retdest + POP + // stack: x1, y1, retdest + POP + // stack: y1, retdest + POP + // stack: retdest + PUSH 0 + // stack: 0, retdest + PUSH 0 + // stack: 0, 0, retdest + SWAP2 + // stack: retdest, 0, 0 + JUMP + + +// BN254 elliptic curve addition. +// Assumption: x0 == x1 and y0 == y1 +// Standard doubling formula. +ec_add_equal_points: + JUMPDEST + // stack: x0, y0, x1, y1, retdest + + // Compute lambda = 3/2 * x0^2 / y0 + %bn_base + // stack: N, x0, y0, x1, y1, retdest + %bn_base + // stack: N, N, x0, y0, x1, y1, retdest + DUP3 + // stack: x0, N, N, x0, y0, x1, y1, retdest + DUP1 + // stack: x0, x0, N, N, x0, y0, x1, y1, retdest + MULMOD + // stack: x0^2, N, x0, y0, x1, y1, retdest with + PUSH 0x183227397098d014dc2822db40c0ac2ecbc0b548b438e5469e10460b6c3e7ea5 // 3/2 in the base field + // stack: 3/2, x0^2, N, x0, y0, x1, y1, retdest + MULMOD + // stack: 3/2 * x0^2, x0, y0, x1, y1, retdest + DUP3 + // stack: y0, 3/2 * x0^2, x0, y0, x1, y1, retdest + %moddiv + // stack: lambda, x0, y0, x1, y1, retdest + PUSH ec_add_valid_points_with_lambda + // stack: ec_add_valid_points_with_lambda, lambda, x0, y0, x1, y1, retdest + JUMP + +// BN254 elliptic curve doubling. +// Assumption: (x0,y0) is a valid point. +// Standard doubling formula. +global ec_double: + JUMPDEST + // stack: x0, y0, retdest + DUP2 + // stack: y0, x0, y0, retdest + DUP2 + // stack: x0, y0, x0, y0, retdest + PUSH ec_add_equal_points + // stack: ec_add_equal_points, x0, y0, x0, y0, retdest + JUMP + +// Push the order of the BN254 base field. +%macro bn_base + PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 +%endmacro + +// Assumption: x, y < N and 2N < 2^256. +// Note: Doesn't hold for Secp256k1 base field. +%macro submod + // stack: x, y + %bn_base + // stack: N, x, y + ADD + // stack: N + x, y // Doesn't overflow since 2N < 2^256 + SUB + // stack: N + x - y // Doesn't underflow since y < N + %bn_base + // stack: N, N + x - y + SWAP1 + // stack: N + x - y, N + MOD + // stack: (N + x - y) % N = (x-y) % N +%endmacro + +// Check if (x,y) is a valid curve point. +// Puts y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) on top of the stack. +%macro ec_check + // stack: x, y + %bn_base + // stack: N, x, y + DUP2 + // stack: x, N, x, y + LT + // stack: x < N, x, y + %bn_base + // stack: N, x < N, x, y + DUP4 + // stack: y, N, x < N, x, y + LT + // stack: y < N, x < N, x, y + AND + // stack: (y < N) & (x < N), x, y + SWAP2 + // stack: y, x, (y < N) & (x < N), x + SWAP1 + // stack: x, y, (y < N) & (x < N) + %bn_base + // stack: N, x, y, b + %bn_base + // stack: N, N, x, y, b + DUP3 + // stack: x, N, N, x, y, b + %bn_base + // stack: N, x, N, N, x, y, b + DUP2 + // stack: x, N, x, N, N, x, y, b + DUP1 + // stack: x, x, N, x, N, N, x, y, b + MULMOD + // stack: x^2 % N, x, N, N, x, y, b + MULMOD + // stack: x^3 % N, N, x, y, b + PUSH 3 + // stack: 3, x^3 % N, N, x, y, b + ADDMOD + // stack: (x^3 + 3) % N, x, y, b + DUP3 + // stack: y, (x^3 + 3) % N, x, y, b + %bn_base + // stack: N, y, (x^3 + 3) % N, x, y, b + SWAP1 + // stack: y, N, (x^3 + 3) % N, x, y, b + DUP1 + // stack: y, y, N, (x^3 + 3) % N, x, y, b + MULMOD + // stack: y^2 % N, (x^3 + 3) % N, x, y, b + EQ + // stack: y^2 % N == (x^3 + 3) % N, x, y, b + SWAP2 + // stack: y, x, y^2 % N == (x^3 + 3) % N, b + %ec_isidentity + // stack: (x,y)==(0,0), y^2 % N == (x^3 + 3) % N, b + SWAP2 + // stack: b, y^2 % N == (x^3 + 3) % N, (x,y)==(0,0) + AND + // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N), (x,y)==(0,0) + OR + // stack: y^2 % N == (x^3 + 3) % N & (x < N) & (y < N) || (x,y)==(0,0) +%endmacro + +// Check if (x,y)==(0,0) +%macro ec_isidentity + // stack: x, y + OR + // stack: x | y + ISZERO + // stack: (x,y) == (0,0) +%endmacro + +// Return (u256::MAX, u256::MAX) which is used to indicate the input was invalid. +%macro ec_invalid_input + // stack: retdest + PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + // stack: u256::MAX, retdest + PUSH 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + // stack: u256::MAX, u256::MAX, retdest + SWAP2 + // stack: retdest, u256::MAX, u256::MAX + JUMP +%endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/curve_mul.asm b/evm/src/cpu/kernel/asm/curve_mul.asm new file mode 100644 index 00000000..0826b0e3 --- /dev/null +++ b/evm/src/cpu/kernel/asm/curve_mul.asm @@ -0,0 +1,139 @@ +// BN254 elliptic curve scalar multiplication. +// Recursive implementation, same algorithm as in `exp.asm`. +global ec_mul: + // Uncomment for test inputs. + // PUSH 0xdeadbeef + // PUSH 0xd + // PUSH 2 + // PUSH 1 + JUMPDEST + // stack: x, y, s, retdest + DUP2 + // stack: y, x, y, s, retdest + DUP2 + // stack: x, y, x, y, s, retdest + %ec_isidentity + // stack: (x,y)==(0,0), x, y, s, retdest + PUSH ret_zero + // stack: ret_zero, y==0 & x==0, x, y, s, retdest + JUMPI + // stack: x, y, s, retdest + DUP2 + // stack: y, x, y, s, retdest + DUP2 + // stack: x, y, x, y, s, retdest + %ec_check + // stack: isValid(x, y), x, y, s, retdest + PUSH ec_mul_valid_point + // stack: ec_mul_valid_point, isValid(x, y), x, y, s, retdest + JUMPI + // stack: x, y, s, retdest + POP + // stack: y, s, retdest + POP + // stack: s, retdest + POP + // stack: retdest + %ec_invalid_input + +// Same algorithm as in `exp.asm` +ec_mul_valid_point: + JUMPDEST + // stack: x, y, s, retdest + DUP3 + // stack: s, x, y, s, retdest + PUSH step_case + // stack: step_case, s, x, y, s, retdest + JUMPI + // stack: x, y, s, retdest + PUSH ret_zero + // stack: ret_zero, x, y, s, retdest + JUMP + +step_case: + JUMPDEST + // stack: x, y, s, retdest + PUSH recursion_return + // stack: recursion_return, x, y, s, retdest + PUSH 2 + // stack: 2, recursion_return, x, y, s, retdest + DUP5 + // stack: s, 2, recursion_return, x, y, s, retdest + DIV + // stack: s / 2, recursion_return, x, y, s, retdest + PUSH step_case_contd + // stack: step_case_contd, s / 2, recursion_return, x, y, s, retdest + DUP5 + // stack: y, step_case_contd, s / 2, recursion_return, x, y, s, retdest + DUP5 + // stack: x, y, step_case_contd, s / 2, recursion_return, x, y, s, retdest + PUSH ec_double + // stack: ec_double, x, y, step_case_contd, s / 2, recursion_return, x, y, s, retdest + JUMP + +// Assumption: 2(x,y) = (x',y') +step_case_contd: + JUMPDEST + // stack: x', y', s / 2, recursion_return, x, y, s, retdest + PUSH ec_mul_valid_point + // stack: ec_mul_valid_point, x', y', s / 2, recursion_return, x, y, s, retdest + JUMP + +recursion_return: + JUMPDEST + // stack: x', y', x, y, s, retdest + SWAP4 + // stack: s, y', x, y, x', retdest + PUSH 1 + // stack: 1, s, y', x, y, x', retdest + AND + // stack: s & 1, y', x, y, x', retdest + SWAP1 + // stack: y', s & 1, x, y, x', retdest + SWAP2 + // stack: x, s & 1, y', y, x', retdest + SWAP3 + // stack: y, s & 1, y', x, x', retdest + SWAP4 + // stack: x', s & 1, y', x, y, retdest + SWAP1 + // stack: s & 1, x', y', x, y, retdest + PUSH odd_scalar + // stack: odd_scalar, s & 1, x', y', x, y, retdest + JUMPI + // stack: x', y', x, y, retdest + SWAP3 + // stack: y, y', x, x', retdest + POP + // stack: y', x, x', retdest + SWAP1 + // stack: x, y', x', retdest + POP + // stack: y', x', retdest + SWAP2 + // stack: retdest, x', y' + JUMP + +odd_scalar: + JUMPDEST + // stack: x', y', x, y, retdest + PUSH ec_add_valid_points + // stack: ec_add_valid_points, x', y', x, y, retdest + JUMP + +ret_zero: + JUMPDEST + // stack: x, y, s, retdest + POP + // stack: y, s, retdest + POP + // stack: s, retdest + POP + // stack: retdest + PUSH 0 + // stack: 0, retdest + PUSH 0 + // stack: 0, 0, retdest + SWAP2 + // stack: retdest, 0, 0 + JUMP diff --git a/evm/src/cpu/kernel/asm/moddiv.asm b/evm/src/cpu/kernel/asm/moddiv.asm new file mode 100644 index 00000000..891897e5 --- /dev/null +++ b/evm/src/cpu/kernel/asm/moddiv.asm @@ -0,0 +1,506 @@ +/// Division modulo 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47, the BN254 base field order +/// To replace with more efficient method using non-determinism later. + +// Returns y * (x^-1) where the inverse is taken modulo N +%macro moddiv + // stack: x, y + %inverse + // stack: x^-1, y + %mulmodn +%endmacro + +%macro mulmodn + // stack: x, y + PUSH 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 + // stack: N, x, y + SWAP2 + // stack: y, x, N + MULMOD +%endmacro + +%macro squaremodn + // stack: x + DUP1 + // stack: x, x + %mulmodn +%endmacro + +// Computes the inverse modulo N using x^-1 = x^(N-2) mod N and square-and-multiply modular exponentiation. +%macro inverse + DUP1 + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + %squaremodn + %squaremodn + DUP2 + %mulmodn + SWAP1 + // stack: x, x^-1 + POP + // stack: x^-1 +%endmacro