diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 53787e48..53584a1f 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -12,6 +12,7 @@ pub static KERNEL: Lazy = Lazy::new(combined_kernel); pub(crate) fn combined_kernel() -> Kernel { let files = vec![ include_str!("asm/bignum/add.asm"), + include_str!("asm/bignum/addmul.asm"), include_str!("asm/bignum/ge.asm"), include_str!("asm/bignum/iszero.asm"), include_str!("asm/bignum/mul.asm"), diff --git a/evm/src/cpu/kernel/asm/bignum/add.asm b/evm/src/cpu/kernel/asm/bignum/add.asm index 508078ce..d38c9701 100644 --- a/evm/src/cpu/kernel/asm/bignum/add.asm +++ b/evm/src/cpu/kernel/asm/bignum/add.asm @@ -5,7 +5,7 @@ global add_bignum: // stack: len, a_start_loc, b_start_loc, retdest PUSH 0 - // stack: carry=0, i=len, a_start_loc, b_start_loc, retdest + // stack: carry=0, i=len, a_cur_loc=a_start_loc, b_cur_loc=b_start_loc, retdest add_loop: // stack: carry, i, a_cur_loc, b_cur_loc, retdest DUP4 diff --git a/evm/src/cpu/kernel/asm/bignum/addmul.asm b/evm/src/cpu/kernel/asm/bignum/addmul.asm new file mode 100644 index 00000000..742bb829 --- /dev/null +++ b/evm/src/cpu/kernel/asm/bignum/addmul.asm @@ -0,0 +1,99 @@ +// Arithmetic on little-endian integers represented with 128-bit limbs. +// All integers must be under a given length bound, and are padded with leading zeroes. + +// Sets a[0:len] += b[0:len] * val. +global addmul_bignum: + // stack: len, a_start_loc, b_start_loc, val, retdest + PUSH 0 + // stack: carry=0, i=len, a_cur_loc=a_start_loc, b_cur_loc=b_start_loc, val, retdest +addmul_loop: + // stack: carry, i, a_cur_loc, b_cur_loc, val, retdest + DUP4 + // stack: b_cur_loc, carry, i, a_cur_loc, b_cur_loc, val, retdest + %mload_kernel_general + // stack: b[cur], carry, i, a_cur_loc, b_cur_loc, val, retdest + DUP6 + // stack: val, b[cur], carry, i, a_cur_loc, b_cur_loc, val, retdest + MUL + // stack: val * b[cur], carry, i, a_cur_loc, b_cur_loc, val, retdest + DUP1 + // stack: val * b[cur], val * b[cur], carry, i, a_cur_loc, b_cur_loc, val, retdest + %shr_const(128) + // stack: (val * b[cur]) // 2^128, val * b[cur], carry, i, a_cur_loc, b_cur_loc, val, retdest + SWAP1 + // stack: val * b[cur], (val * b[cur]) // 2^128, carry, i, a_cur_loc, b_cur_loc, val, retdest + %shl_const(128) + %shr_const(128) + // stack: prod_lo = val * b[cur] % 2^128, prod_hi = (val * b[cur]) // 2^128, carry, i, a_cur_loc, b_cur_loc, val, retdest + DUP5 + // stack: a_cur_loc, prod_lo, prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest + %mload_kernel_general + // stack: a[cur], prod_lo, prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest + DUP1 + // stack: a[cur], a[cur], prod_lo, prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest + SWAP2 + // stack: prod_lo, a[cur], a[cur], prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest + ADD + %shl_const(128) + %shr_const(128) + // stack: prod_lo' = (prod_lo + a[cur]) % 2^128, a[cur], prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest + DUP1 + // stack: prod_lo', prod_lo', a[cur], prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest + SWAP2 + // stack: a[cur], prod_lo', prod_lo', prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest + GT + // stack: prod_lo_carry = a[cur] > prod_lo', prod_lo', prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest + SWAP1 + // stack: prod_lo', prod_lo_carry, prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest + SWAP2 + // stack: prod_hi, prod_lo_carry, prod_lo', carry, i, a_cur_loc, b_cur_loc, val, retdest + ADD + // stack: prod_hi' = prod_hi + prod_lo_carry, prod_lo', carry, i, a_cur_loc, b_cur_loc, val, retdest + DUP3 + // stack: carry, prod_lo', prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest + DUP2 + // stack: prod_lo', carry, prod_lo', prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest + ADD + %shl_const(128) + %shr_const(128) + // stack: to_write = (prod_lo' + carry) % 2^128, prod_lo', prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest + SWAP1 + // stack: prod_lo', to_write, prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest + DUP2 + // stack: to_write, prod_lo', to_write, prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest + LT + // stack: carry_new = to_write < prod_lo', to_write, prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest + %stack (cn, tw, ph, c) -> (cn, ph, tw) + // stack: carry_new, prod_hi', to_write, i, a_cur_loc, b_cur_loc, val, retdest + ADD + // stack: carry = carry_new' + prod_hi', to_write, i, a_cur_loc, b_cur_loc, val, retdest + SWAP1 + // stack: to_write, carry, i, a_cur_loc, b_cur_loc, val, retdest + DUP4 + // stack: a_cur_loc, to_write, carry, i, a_cur_loc, b_cur_loc, val, retdest + %mstore_kernel_general + // stack: carry, i, a_cur_loc, b_cur_loc, val, retdest + SWAP1 + // stack: i, carry, a_cur_loc, b_cur_loc, val, retdest + %decrement + // stack: i-1, carry, a_cur_loc, b_cur_loc, val, retdest + SWAP2 + // stack: a_cur_loc, carry, i-1, b_cur_loc, val, retdest + %increment + // stack: a_cur_loc+1, carry, i-1, b_cur_loc, val, retdest + SWAP3 + // stack: b_cur_loc, carry, i-1, a_cur_loc+1, val, retdest + %increment + // stack: b_cur_loc+1, carry, i-1, a_cur_loc+1, val, retdest + %stack (b, c, i, a) -> (c, i, a, b) + // stack: carry, i-1, a_cur_loc+1, b_cur_loc+1, val, retdest + DUP2 + // stack: i-1, carry, i-1, a_cur_loc+1, b_cur_loc+1, val, retdest + %jumpi(addmul_loop) +addmul_end: + // stack: carry_new, i-1, a_cur_loc+1, b_cur_loc+1, retdest + %stack (c, i, a, b) -> (c) + // stack: carry_new, retdest + SWAP1 + // stack: retdest, carry_new + JUMP diff --git a/evm/src/cpu/kernel/asm/bignum/mul.asm b/evm/src/cpu/kernel/asm/bignum/mul.asm index d96518fe..1b947c23 100644 --- a/evm/src/cpu/kernel/asm/bignum/mul.asm +++ b/evm/src/cpu/kernel/asm/bignum/mul.asm @@ -1,101 +1,6 @@ // Arithmetic on little-endian integers represented with 128-bit limbs. // All integers must be under a given length bound, and are padded with leading zeroes. -// Multiplies a bignum by a single-limb value. Resulting limbs may be larger than 128 bits. -// This is a naive multiplication algorithm (BasecaseMultiply from Modern Computer Arithmetic). -mul_bignum_helper: - // stack: len, start_loc, val, retdest - DUP2 - // stack: start_loc, len, start_loc, val, retdest - ADD - // stack: end_loc, start_loc, val, retdest - SWAP2 - SWAP1 - // stack: i=start_loc, val, end_loc, retdest -mul_helper_loop: - // stack: i, val, end_loc, retdest - DUP1 - // stack: i, i, val, end_loc, retdest - %mload_kernel_general - // stack: bignum[i], i, val, end_loc, retdest - DUP3 - // stack: val, bignum[i], i, val, end_loc, retdest - MUL - // stack: val * bignum[i], i, val, end_loc, retdest - DUP2 - // stack: i, val * bignum[i], i, val, end_loc, retdest - %mstore_kernel_general - // stack: i, val, end_loc, retdest - %increment - // stack: i + 1, val, end_loc, retdest - DUP1 - // stack: i + 1, i + 1, val, end_loc, retdest - DUP4 - // stack: end_loc, i + 1, i + 1, val, end_loc, retdest - GT - %jumpi(mul_helper_loop) - // stack: n = 0, i, val, retdest - %pop3 - // stack: retdest - JUMP - -// Reduces a bignum with limbs possibly greater than 128 bits to a normalized bignum with length len + 1. -// Used after `mul_bignum_helper` to complete the process of multiplying a bignum by a constant value. -mul_bignum_reduce_helper: - // stack: len, start_loc, retdest - DUP2 - // stack: start_loc, len, start_loc, retdest - ADD - // stack: end_loc, start_loc, retdest - SWAP1 - // stack: i=start_loc, end_loc, retdest -reduce_loop: - // stack: i, end_loc, retdest - DUP1 - // stack: i, i, end_loc, retdest - %mload_kernel_general - // stack: bignum[i], i, end_loc, retdest - DUP1 - // stack: bignum[i], bignum[i], i, end_loc, retdest - %shl_const(128) - %shr_const(128) - // stack: bignum[i] % 2^128, bignum[i], i, end_loc, retdest - SWAP1 - // stack: bignum[i], bignum[i] % 2^128, i, end_loc, retdest - %shr_const(128) - // stack: bignum[i] // 2^128, bignum[i] % 2^128, i, end_loc, retdest - DUP3 - // stack: i, bignum[i] // 2^128, bignum[i] % 2^128, i, end_loc, retdest - %increment - // stack: i+1, bignum[i] // 2^128, bignum[i] % 2^128, i, end_loc, retdest - SWAP1 - // stack: bignum[i] // 2^128, i+1, bignum[i] % 2^128, i, end_loc, retdest - DUP2 - // stack: i+1, bignum[i] // 2^128, i+1, bignum[i] % 2^128, i, end_loc, retdest - %mload_kernel_general - // stack: bignum[i+1], bignum[i] // 2^128, i+1, bignum[i] % 2^128, i, end_loc, retdest - ADD - // stack: bignum[i+1] + bignum[i] // 2^128, i+1, bignum[i] % 2^128, i, end_loc, retdest - SWAP1 - // stack: i+1, bignum[i+1] + bignum[i] // 2^128, bignum[i] % 2^128, i, end_loc, retdest - %mstore_kernel_general - // stack: bignum[i] % 2^128, i, end_loc, retdest - DUP2 - // stack: i, bignum[i] % 2^128, i, end_loc, retdest - %mstore_kernel_general - // stack: i, end_loc, retdest - %increment - // stack: i + 1, end_loc, retdest - %stack (vals: 2) -> (vals, vals) - // stack: i + 1, end_loc, i + 1, end_loc, retdest - EQ - %jumpi(reduce_loop) -reduce_end: - // stack: n = 0, i, retdest - %pop2 - // stack: retdest - JUMP - // Stores a * b in output_loc, leaving a and b unchanged. // Both a and b have length len; a * b will have length 2 * len. // Both output_loc and scratch_space must be initialized as zeroes (2 * len of them in the case @@ -130,43 +35,21 @@ mul_loop: %mstore_kernel_general // stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest - // Use scratch_space to multiply a by b[i]. - PUSH mul_return_1 - // stack: mul_return_1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // Multiply a by b[i] and add into output_cur. + PUSH mul_return + // stack: mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest DUP5 - // stack: bi, mul_return_1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // stack: bi, mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest %mload_kernel_general - // stack: b[i], mul_return_1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + // stack: b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + DUP5 + // stack: a_start_loc, b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest DUP8 - // stack: scratch_space, b[i], mul_return_1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP4 - // stack: len, scratch_space, b[i], mul_return_1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - %jump(mul_bignum_helper) -mul_return_1: - // stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest - PUSH mul_return_2 - // stack: mul_return_2, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP7 - // stack: scratch_space, mul_return_2, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP3 - // stack: len, scratch_space, mul_return_2, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - %jump(mul_bignum_reduce_helper) -mul_return_2: - // stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest - - // Add the multiplication result into output_cur = output_len[i]. - PUSH mul_return_3 - // stack: mul_return_3, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP7 - // stack: scratch_space, mul_return_3, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP7 - // stack: output_cur, scratch_space, mul_return_3, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - DUP4 - // stack: len, output_cur, scratch_space, mul_return_3, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - %increment - // stack: len + 1, output_cur, scratch_space, mul_return_3, len, n, a_start_loc, bi, output_cur, scratch_space, retdest - %jump(add_bignum) -mul_return_3: + // stack: output_cur, a_start_loc, b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + DUP5 + // stack: len, output_cur, a_start_loc, b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest + %jump(addmul_bignum) +mul_return: // stack: carry, len, n, a_start_loc, bi, output_cur, scratch_space, retdest DUP6 // stack: output_cur, carry, len, n, a_start_loc, bi, output_cur, scratch_space, retdest diff --git a/evm/src/cpu/kernel/tests/bignum.rs b/evm/src/cpu/kernel/tests/bignum.rs index f866caad..49d28342 100644 --- a/evm/src/cpu/kernel/tests/bignum.rs +++ b/evm/src/cpu/kernel/tests/bignum.rs @@ -210,6 +210,8 @@ fn test_mul_bignum() -> Result<()> { // Run mul function. interpreter.run()?; + dbg!(interpreter.stack()); + // Determine actual product. let new_memory = interpreter.get_kernel_general_memory(); let output_location: usize = output_loc.try_into().unwrap();