diff --git a/evm/src/cpu/kernel/asm/bignum/modmul.asm b/evm/src/cpu/kernel/asm/bignum/modmul.asm index 92ee63f8..32fa13ad 100644 --- a/evm/src/cpu/kernel/asm/bignum/modmul.asm +++ b/evm/src/cpu/kernel/asm/bignum/modmul.asm @@ -4,163 +4,154 @@ // Stores a * b % m in output_loc, leaving a, b, and m unchanged. // a, b, and m must have the same length. // Both output_loc and scratch_1 must have size length. -// All of scratch_2, scratch_3, and scratch_4 must have size 2 * length and be initialized with zeroes. +// Both scratch_2 and scratch_3 have size 2 * length and be initialized with zeroes. global modmul_bignum: - // stack: length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest // The prover provides x := (a * b) % m, which we store in output_loc. PUSH 0 - // stack: i=0, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i=0, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest modmul_remainder_loop: - // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest PROVER_INPUT(bignum_modmul::remainder) - // stack: PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest DUP7 - // stack: output_loc, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: output_loc, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest DUP3 - // stack: i, output_loc, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i, output_loc, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest ADD - // stack: output_loc[i], PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: output_loc[i], PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest %mstore_kernel_general - // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest %increment - // stack: i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest DUP2 DUP2 - // stack: i+1, length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i+1, length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest EQ - // stack: i+1==length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i+1==length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest ISZERO - // stack: i+1!=length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i+1!=length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest %jumpi(modmul_remainder_loop) modmul_remainder_end: - // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest POP // The prover provides k := (a * b) / m, which we store in scratch_1. - // stack: length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest PUSH 0 - // stack: i=0, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i=0, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest modmul_quotient_loop: - // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest PROVER_INPUT(bignum_modmul::quotient) - // stack: PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest DUP8 - // stack: scratch_1, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: scratch_1, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest DUP3 - // stack: i, scratch_1, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i, scratch_1, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest ADD - // stack: scratch_1[i], PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: scratch_1[i], PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest %mstore_kernel_general - // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest %increment - // stack: i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest DUP2 DUP2 - // stack: i+1, length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i+1, length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest %neq - // stack: i+1!=length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i+1!=length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest %jumpi(modmul_quotient_loop) modmul_quotient_end: - // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest POP - // stack: length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest + // stack: length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest // Verification step 1: calculate x + k * m. - // Store k * m in scratch_2, using scratch_3 as scratch space. + // Store k * m in scratch_2. PUSH modmul_return_1 - // stack: modmul_return_1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, scratch_4, retdest - %stack (return, len, a, b, m, out, s1, s2, s3) -> (len, s1, m, s2, s3, return, len, a, b, out, s2, s3) - // stack: length, scratch_1, m_start_loc, scratch_2, scratch_3, modmul_return_1, length, a_start_loc, b_start_loc, output_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: modmul_return_1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest + %stack (return, len, a, b, m, out, s1, s2) -> (len, s1, m, s2, return, len, a, b, out, s2) + // stack: length, scratch_1, m_start_loc, scratch_2, modmul_return_1, length, a_start_loc, b_start_loc, output_loc, scratch_2, scratch_3, retdest + STOP %jump(mul_bignum) modmul_return_1: - // stack: length, a_start_loc, b_start_loc, output_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: length, a_start_loc, b_start_loc, output_loc, scratch_2, scratch_3, retdest // Add x into k * m (in scratch_2). PUSH modmul_return_2 - // stack: modmul_return_2, length, a_start_loc, b_start_loc, output_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: modmul_return_2, length, a_start_loc, b_start_loc, output_loc, scratch_2, scratch_3, retdest %stack (return, len, a, b, out, s2) -> (len, s2, out, return, len, a, b, s2) - // stack: length, scratch_2, output_loc, modmul_return_2, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: length, scratch_2, output_loc, modmul_return_2, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest %jump(add_bignum) modmul_return_2: - // stack: carry, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: carry, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest ISZERO %jumpi(no_carry) - // input is correct, x + k * m will equal a * b, which has length at most 2 * length). - - // stack: length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest DUP4 - // stack: scratch_2, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: scratch_2, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest DUP2 - // stack: length, scratch_2, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: length, scratch_2, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest ADD - // stack: cur_loc=scratch_2 + length, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: cur_loc=scratch_2 + length, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest increment_loop: - // stack: cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest DUP1 %mload_kernel_general - // stack: val, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: val, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest %increment - // stack: val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest DUP1 - // stack: val+1, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: val+1, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest %eq_const(@BIGNUM_LIMB_BASE) - // stack: val+1==limb_base, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: val+1==limb_base, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest DUP1 - // stack: val+1==limb_base, val+1==limb_base, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: val+1==limb_base, val+1==limb_base, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest ISZERO - // stack: val+1!=limb_base, val+1==limb_base, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: val+1!=limb_base, val+1==limb_base, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest SWAP1 SWAP2 - // stack: val+1, val+1!=limb_base, val+1==limb_base, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: val+1, val+1!=limb_base, val+1==limb_base, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest MUL - // stack: to_write=(val+1)*(val+1!=limb_base), continue=val+1==limb_base, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: to_write=(val+1)*(val+1!=limb_base), continue=val+1==limb_base, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest DUP3 - // stack: cur_loc, to_write, continue, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: cur_loc, to_write, continue, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest %mstore_kernel_general - // stack: continue, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: continue, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest SWAP1 - // stack: cur_loc, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: cur_loc, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest %increment - // stack: cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest DUP1 - // stack: cur_loc + 1, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: cur_loc + 1, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest DUP8 - // stack: scratch_3, cur_loc + 1, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: scratch_3, cur_loc + 1, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest EQ - // stack: cur_loc + 1 == scratch_3, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: cur_loc + 1 == scratch_3, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest ISZERO - // stack: cur_loc + 1 != scratch_3, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: cur_loc + 1 != scratch_3, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest SWAP1 SWAP2 - // stack: continue, cur_loc + 1 != scratch_3, cur_loc + 1, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: continue, cur_loc + 1 != scratch_3, cur_loc + 1, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest MUL - // stack: new_continue=continue*(cur_loc + 1 != scratch_3), cur_loc + 1, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: new_continue=continue*(cur_loc + 1 != scratch_3), cur_loc + 1, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest %jumpi(increment_loop) - // stack: cur_loc + 1, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: cur_loc + 1, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest POP no_carry: - // stack: length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest + // stack: length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest // Calculate a * b. - // Store zeroes in scratch_3. - DUP1 - // stack: length, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest - DUP6 - // stack: scratch_3, length, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest - %clear_kernel_general - // stack: length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest - - // Store a * b in scratch_3, using scratch_4 as scratch space. + // Store a * b in scratch_3. PUSH modmul_return_3 - // stack: modmul_return_3, length, a_start_loc, b_start_loc, scratch_2, scratch_3, scratch_4, retdest - %stack (return, len, a, b, s2, s3, s4) -> (len, a, b, s3, s4, return, len, s2, s3) - // stack: length, a_start_loc, b_start_loc, scratch_3, scratch_4, modmul_return_3, length, scratch_2, scratch_3, retdest + // stack: modmul_return_3, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest + %stack (return, len, a, b, s2, s3) -> (len, a, b, s3, return, len, s2, s3) + // stack: length, a_start_loc, b_start_loc, scratch_3, modmul_return_3, length, scratch_2, scratch_3, retdest %jump(mul_bignum) modmul_return_3: // stack: length, scratch_2, scratch_3, retdest