diff --git a/evm/src/cpu/kernel/asm/bignum/addmul.asm b/evm/src/cpu/kernel/asm/bignum/addmul.asm index 55769bf8..e1bd3bf2 100644 --- a/evm/src/cpu/kernel/asm/bignum/addmul.asm +++ b/evm/src/cpu/kernel/asm/bignum/addmul.asm @@ -1,7 +1,7 @@ // Arithmetic on little-endian integers represented with 128-bit limbs. // All integers must be under a given length bound, and are padded with leading zeroes. -// Sets a[0:len+1] += b[0:len] * val. +// Sets a[0:len] += b[0:len] * val, and returns the carry. global addmul_bignum: // stack: len, a_start_loc, b_start_loc, val, retdest PUSH 0 diff --git a/evm/src/cpu/kernel/asm/bignum/mul.asm b/evm/src/cpu/kernel/asm/bignum/mul.asm index 1b947c23..d62ce907 100644 --- a/evm/src/cpu/kernel/asm/bignum/mul.asm +++ b/evm/src/cpu/kernel/asm/bignum/mul.asm @@ -3,83 +3,51 @@ // Stores a * b in output_loc, leaving a and b unchanged. // Both a and b have length len; a * b will have length 2 * len. -// Both output_loc and scratch_space must be initialized as zeroes (2 * len of them in the case -// of output_loc, and len + 1 of them in the case of scratch_space). +// output_loc must be initialized as 2 * len zeroes. global mul_bignum: - // stack: len, a_start_loc, b_start_loc, output_loc, scratch_space, retdest + // stack: len, a_start_loc, b_start_loc, output_loc, retdest DUP1 - // stack: len, n=len, a_start_loc, bi=b_start_loc, output_cur=output_loc, scratch_space, retdest + // stack: n=len, len, a_start_loc, bi=b_start_loc, output_cur=output_loc, retdest mul_loop: - // stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest - - // Copy a from a_start_loc into scratch_space. - DUP1 - // stack: len, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP4 - // stack: a_start_loc, len, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP8 - // stack: scratch_space, a_start_loc, len, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - %memcpy_kernel_general - // stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest - - // Insert a zero into scratch_space[len]. - DUP6 - // stack: scratch_space, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP2 - // stack: len, scratch_space, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - ADD - // stack: scratch_space + len, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - PUSH 0 - SWAP1 - // stack: scratch_space + len, 0, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - %mstore_kernel_general - // stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest - - // Multiply a by b[i] and add into output_cur. + // stack: n, len, a_start_loc, bi, output_cur, retdest PUSH mul_return - // stack: mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // stack: mul_return, n, len, a_start_loc, bi, output_cur, retdest DUP5 - // stack: bi, mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // stack: bi, mul_return, n, len, a_start_loc, bi, output_cur, retdest %mload_kernel_general - // stack: b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // stack: b[i], mul_return, n, len, a_start_loc, bi, output_cur, retdest, b DUP5 - // stack: a_start_loc, b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // stack: a_start_loc, b[i], mul_return, n, len, a_start_loc, bi, output_cur, retdest, b DUP8 - // stack: output_cur, a_start_loc, b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP5 - // stack: len, output_cur, a_start_loc, b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // stack: output_loc, a_start_loc, b[i], mul_return, n, len, a_start_loc, bi, output_cur, retdest, b + DUP6 + // stack: len, output_loc, a_start_loc, b[i], mul_return, n, len, a_start_loc, bi, output_cur, retdest, b %jump(addmul_bignum) mul_return: - // stack: carry, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // stack: carry, n, len, a_start_loc, bi, output_cur, retdest DUP6 - // stack: output_cur, carry, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP3 - // stack: len, output_cur, carry, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // stack: output_cur, carry, n, len, a_start_loc, bi, output_cur, retdest + DUP4 + // stack: len, output_cur, carry, n, len, a_start_loc, bi, output_cur, retdest ADD - // stack: output_cur + len, carry, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - %increment - // stack: output_cur + len + 1, carry, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // stack: output_cur + len, carry, n, len, a_start_loc, bi, output_cur, retdest %mstore_kernel_general - // stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest - - // Increment output_cur and b[i], decrement n, and check if we're done. - DUP5 - // stack: output_cur, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - %increment - // stack: output_cur+1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP5 - %increment - // stack: bi+1, output_cur+1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP5 - // stack: a_start_loc, bi+1, output_cur+1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP5 + // stack: n, len, a_start_loc, bi, output_cur, retdest %decrement - // stack: n-1, a_start_loc, bi+1, output_cur+1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - %stack (new: 4, len, old: 4) -> (len, new) - // stack: len, n-1, a_start_loc, bi+1, output_cur+1, scratch_space, retdest - DUP2 - // stack: n-1, len, n-1, a_start_loc, bi+1, output_cur+1, scratch_space, retdest + // stack: n-1, len, a_start_loc, bi, output_cur, retdest + SWAP3 + %increment + SWAP3 + // stack: n-1, len, a_start_loc, bi+1, output_cur, retdest + SWAP4 + %increment + SWAP4 + // stack: n-1, len, a_start_loc, bi+1, output_cur+1, retdest + DUP1 + // stack: n-1, n-1, len, a_start_loc, bi+1, output_cur+1, retdest %jumpi(mul_loop) - // stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest - %pop6 +mul_end: + // stack: n-1, len, a_start_loc, bi+1, output_cur+1, retdest + %pop5 + // stack: retdest JUMP diff --git a/evm/src/cpu/kernel/tests/bignum.rs b/evm/src/cpu/kernel/tests/bignum.rs index 72e38ab1..0b0cfe2f 100644 --- a/evm/src/cpu/kernel/tests/bignum.rs +++ b/evm/src/cpu/kernel/tests/bignum.rs @@ -231,7 +231,6 @@ fn test_mul_bignum() -> Result<()> { let a_start_loc = 0.into(); let b_start_loc = length; let output_loc = length * 2; - let scratch_space = length * 4; // Prepare stack. let retdest = 0xDEADBEEFu32.into(); @@ -240,7 +239,6 @@ fn test_mul_bignum() -> Result<()> { a_start_loc, b_start_loc, output_loc, - scratch_space, retdest, ]; initial_stack.reverse();