diff --git a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm index c29d8f14..bd903b21 100644 --- a/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm +++ b/evm/src/cpu/kernel/asm/curve/bn254/curve_arithmetic/glv.asm @@ -42,6 +42,7 @@ // ``` global bn_glv_decompose: // stack: k, retdest + %mod_const(@BN_SCALAR) PUSH @BN_SCALAR DUP1 DUP1 // Compute c2 which is the top 256 bits of k*g1. Use asm from https://medium.com/wicketh/mathemagic-full-multiply-27650fec525d. PUSH @U256_MAX @@ -73,7 +74,15 @@ global bn_glv_decompose: // We compute k2 = q1 + q2 - N, but we check for underflow and return N-q1-q2 instead if there is one, // along with a flag `underflow` set to 1 if there is an underflow, 0 otherwise. - ADD %sub_check_underflow + ADD %bn_sub_check_underflow + // stack: k2, underflow, N, k, retdest + DUP1 %ge_const(0x80000000000000000000000000000000) %jumpi(negate) + %jump(contd) +negate: + // stack: k2, underflow, N, k, retdest + SWAP1 PUSH 1 SUB SWAP1 + PUSH @BN_SCALAR SUB +contd: // stack: k2, underflow, N, k, retdest SWAP3 PUSH @BN_SCALAR DUP5 PUSH @BN_GLV_S // stack: s, k2, N, k, underflow, N, k2, retdest @@ -94,4 +103,14 @@ underflowed: %stack (k1, k2, underflow, retdest) -> (retdest, underflow, k1, k2) JUMP - +%macro bn_sub_check_underflow + // stack: x, y + DUP2 DUP2 LT + // stack: x=y, x (x, y, b, a, c) + SUB MUL ADD + %stack (res, bool) -> (res, @BN_SCALAR, bool) + MOD +%endmacro