addmul initial

This commit is contained in:
Nicholas Ward 2023-02-13 12:53:49 -08:00
parent aa605b673b
commit 9976a4b040
5 changed files with 115 additions and 130 deletions

View File

@ -12,6 +12,7 @@ pub static KERNEL: Lazy<Kernel> = Lazy::new(combined_kernel);
pub(crate) fn combined_kernel() -> Kernel {
let files = vec![
include_str!("asm/bignum/add.asm"),
include_str!("asm/bignum/addmul.asm"),
include_str!("asm/bignum/ge.asm"),
include_str!("asm/bignum/iszero.asm"),
include_str!("asm/bignum/mul.asm"),

View File

@ -5,7 +5,7 @@
global add_bignum:
// stack: len, a_start_loc, b_start_loc, retdest
PUSH 0
// stack: carry=0, i=len, a_start_loc, b_start_loc, retdest
// stack: carry=0, i=len, a_cur_loc=a_start_loc, b_cur_loc=b_start_loc, retdest
add_loop:
// stack: carry, i, a_cur_loc, b_cur_loc, retdest
DUP4

View File

@ -0,0 +1,99 @@
// Arithmetic on little-endian integers represented with 128-bit limbs.
// All integers must be under a given length bound, and are padded with leading zeroes.
// Sets a[0:len] += b[0:len] * val.
global addmul_bignum:
// stack: len, a_start_loc, b_start_loc, val, retdest
PUSH 0
// stack: carry=0, i=len, a_cur_loc=a_start_loc, b_cur_loc=b_start_loc, val, retdest
addmul_loop:
// stack: carry, i, a_cur_loc, b_cur_loc, val, retdest
DUP4
// stack: b_cur_loc, carry, i, a_cur_loc, b_cur_loc, val, retdest
%mload_kernel_general
// stack: b[cur], carry, i, a_cur_loc, b_cur_loc, val, retdest
DUP6
// stack: val, b[cur], carry, i, a_cur_loc, b_cur_loc, val, retdest
MUL
// stack: val * b[cur], carry, i, a_cur_loc, b_cur_loc, val, retdest
DUP1
// stack: val * b[cur], val * b[cur], carry, i, a_cur_loc, b_cur_loc, val, retdest
%shr_const(128)
// stack: (val * b[cur]) // 2^128, val * b[cur], carry, i, a_cur_loc, b_cur_loc, val, retdest
SWAP1
// stack: val * b[cur], (val * b[cur]) // 2^128, carry, i, a_cur_loc, b_cur_loc, val, retdest
%shl_const(128)
%shr_const(128)
// stack: prod_lo = val * b[cur] % 2^128, prod_hi = (val * b[cur]) // 2^128, carry, i, a_cur_loc, b_cur_loc, val, retdest
DUP5
// stack: a_cur_loc, prod_lo, prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest
%mload_kernel_general
// stack: a[cur], prod_lo, prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest
DUP1
// stack: a[cur], a[cur], prod_lo, prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest
SWAP2
// stack: prod_lo, a[cur], a[cur], prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest
ADD
%shl_const(128)
%shr_const(128)
// stack: prod_lo' = (prod_lo + a[cur]) % 2^128, a[cur], prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest
DUP1
// stack: prod_lo', prod_lo', a[cur], prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest
SWAP2
// stack: a[cur], prod_lo', prod_lo', prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest
GT
// stack: prod_lo_carry = a[cur] > prod_lo', prod_lo', prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest
SWAP1
// stack: prod_lo', prod_lo_carry, prod_hi, carry, i, a_cur_loc, b_cur_loc, val, retdest
SWAP2
// stack: prod_hi, prod_lo_carry, prod_lo', carry, i, a_cur_loc, b_cur_loc, val, retdest
ADD
// stack: prod_hi' = prod_hi + prod_lo_carry, prod_lo', carry, i, a_cur_loc, b_cur_loc, val, retdest
DUP3
// stack: carry, prod_lo', prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest
DUP2
// stack: prod_lo', carry, prod_lo', prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest
ADD
%shl_const(128)
%shr_const(128)
// stack: to_write = (prod_lo' + carry) % 2^128, prod_lo', prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest
SWAP1
// stack: prod_lo', to_write, prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest
DUP2
// stack: to_write, prod_lo', to_write, prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest
LT
// stack: carry_new = to_write < prod_lo', to_write, prod_hi', carry, i, a_cur_loc, b_cur_loc, val, retdest
%stack (cn, tw, ph, c) -> (cn, ph, tw)
// stack: carry_new, prod_hi', to_write, i, a_cur_loc, b_cur_loc, val, retdest
ADD
// stack: carry = carry_new' + prod_hi', to_write, i, a_cur_loc, b_cur_loc, val, retdest
SWAP1
// stack: to_write, carry, i, a_cur_loc, b_cur_loc, val, retdest
DUP4
// stack: a_cur_loc, to_write, carry, i, a_cur_loc, b_cur_loc, val, retdest
%mstore_kernel_general
// stack: carry, i, a_cur_loc, b_cur_loc, val, retdest
SWAP1
// stack: i, carry, a_cur_loc, b_cur_loc, val, retdest
%decrement
// stack: i-1, carry, a_cur_loc, b_cur_loc, val, retdest
SWAP2
// stack: a_cur_loc, carry, i-1, b_cur_loc, val, retdest
%increment
// stack: a_cur_loc+1, carry, i-1, b_cur_loc, val, retdest
SWAP3
// stack: b_cur_loc, carry, i-1, a_cur_loc+1, val, retdest
%increment
// stack: b_cur_loc+1, carry, i-1, a_cur_loc+1, val, retdest
%stack (b, c, i, a) -> (c, i, a, b)
// stack: carry, i-1, a_cur_loc+1, b_cur_loc+1, val, retdest
DUP2
// stack: i-1, carry, i-1, a_cur_loc+1, b_cur_loc+1, val, retdest
%jumpi(addmul_loop)
addmul_end:
// stack: carry_new, i-1, a_cur_loc+1, b_cur_loc+1, retdest
%stack (c, i, a, b) -> (c)
// stack: carry_new, retdest
SWAP1
// stack: retdest, carry_new
JUMP

View File

@ -1,101 +1,6 @@
// Arithmetic on little-endian integers represented with 128-bit limbs.
// All integers must be under a given length bound, and are padded with leading zeroes.
// Multiplies a bignum by a single-limb value. Resulting limbs may be larger than 128 bits.
// This is a naive multiplication algorithm (BasecaseMultiply from Modern Computer Arithmetic).
mul_bignum_helper:
// stack: len, start_loc, val, retdest
DUP2
// stack: start_loc, len, start_loc, val, retdest
ADD
// stack: end_loc, start_loc, val, retdest
SWAP2
SWAP1
// stack: i=start_loc, val, end_loc, retdest
mul_helper_loop:
// stack: i, val, end_loc, retdest
DUP1
// stack: i, i, val, end_loc, retdest
%mload_kernel_general
// stack: bignum[i], i, val, end_loc, retdest
DUP3
// stack: val, bignum[i], i, val, end_loc, retdest
MUL
// stack: val * bignum[i], i, val, end_loc, retdest
DUP2
// stack: i, val * bignum[i], i, val, end_loc, retdest
%mstore_kernel_general
// stack: i, val, end_loc, retdest
%increment
// stack: i + 1, val, end_loc, retdest
DUP1
// stack: i + 1, i + 1, val, end_loc, retdest
DUP4
// stack: end_loc, i + 1, i + 1, val, end_loc, retdest
GT
%jumpi(mul_helper_loop)
// stack: n = 0, i, val, retdest
%pop3
// stack: retdest
JUMP
// Reduces a bignum with limbs possibly greater than 128 bits to a normalized bignum with length len + 1.
// Used after `mul_bignum_helper` to complete the process of multiplying a bignum by a constant value.
mul_bignum_reduce_helper:
// stack: len, start_loc, retdest
DUP2
// stack: start_loc, len, start_loc, retdest
ADD
// stack: end_loc, start_loc, retdest
SWAP1
// stack: i=start_loc, end_loc, retdest
reduce_loop:
// stack: i, end_loc, retdest
DUP1
// stack: i, i, end_loc, retdest
%mload_kernel_general
// stack: bignum[i], i, end_loc, retdest
DUP1
// stack: bignum[i], bignum[i], i, end_loc, retdest
%shl_const(128)
%shr_const(128)
// stack: bignum[i] % 2^128, bignum[i], i, end_loc, retdest
SWAP1
// stack: bignum[i], bignum[i] % 2^128, i, end_loc, retdest
%shr_const(128)
// stack: bignum[i] // 2^128, bignum[i] % 2^128, i, end_loc, retdest
DUP3
// stack: i, bignum[i] // 2^128, bignum[i] % 2^128, i, end_loc, retdest
%increment
// stack: i+1, bignum[i] // 2^128, bignum[i] % 2^128, i, end_loc, retdest
SWAP1
// stack: bignum[i] // 2^128, i+1, bignum[i] % 2^128, i, end_loc, retdest
DUP2
// stack: i+1, bignum[i] // 2^128, i+1, bignum[i] % 2^128, i, end_loc, retdest
%mload_kernel_general
// stack: bignum[i+1], bignum[i] // 2^128, i+1, bignum[i] % 2^128, i, end_loc, retdest
ADD
// stack: bignum[i+1] + bignum[i] // 2^128, i+1, bignum[i] % 2^128, i, end_loc, retdest
SWAP1
// stack: i+1, bignum[i+1] + bignum[i] // 2^128, bignum[i] % 2^128, i, end_loc, retdest
%mstore_kernel_general
// stack: bignum[i] % 2^128, i, end_loc, retdest
DUP2
// stack: i, bignum[i] % 2^128, i, end_loc, retdest
%mstore_kernel_general
// stack: i, end_loc, retdest
%increment
// stack: i + 1, end_loc, retdest
%stack (vals: 2) -> (vals, vals)
// stack: i + 1, end_loc, i + 1, end_loc, retdest
EQ
%jumpi(reduce_loop)
reduce_end:
// stack: n = 0, i, retdest
%pop2
// stack: retdest
JUMP
// Stores a * b in output_loc, leaving a and b unchanged.
// Both a and b have length len; a * b will have length 2 * len.
// Both output_loc and scratch_space must be initialized as zeroes (2 * len of them in the case
@ -130,43 +35,21 @@ mul_loop:
%mstore_kernel_general
// stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest
// Use scratch_space to multiply a by b[i].
PUSH mul_return_1
// stack: mul_return_1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
// Multiply a by b[i] and add into output_cur.
PUSH mul_return
// stack: mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP5
// stack: bi, mul_return_1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
// stack: bi, mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
%mload_kernel_general
// stack: b[i], mul_return_1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
// stack: b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP5
// stack: a_start_loc, b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP8
// stack: scratch_space, b[i], mul_return_1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP4
// stack: len, scratch_space, b[i], mul_return_1, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
%jump(mul_bignum_helper)
mul_return_1:
// stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest
PUSH mul_return_2
// stack: mul_return_2, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP7
// stack: scratch_space, mul_return_2, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP3
// stack: len, scratch_space, mul_return_2, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
%jump(mul_bignum_reduce_helper)
mul_return_2:
// stack: len, n, a_start_loc, bi, output_cur, scratch_space, retdest
// Add the multiplication result into output_cur = output_len[i].
PUSH mul_return_3
// stack: mul_return_3, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP7
// stack: scratch_space, mul_return_3, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP7
// stack: output_cur, scratch_space, mul_return_3, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP4
// stack: len, output_cur, scratch_space, mul_return_3, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
%increment
// stack: len + 1, output_cur, scratch_space, mul_return_3, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
%jump(add_bignum)
mul_return_3:
// stack: output_cur, a_start_loc, b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP5
// stack: len, output_cur, a_start_loc, b[i], mul_return, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
%jump(addmul_bignum)
mul_return:
// stack: carry, len, n, a_start_loc, bi, output_cur, scratch_space, retdest
DUP6
// stack: output_cur, carry, len, n, a_start_loc, bi, output_cur, scratch_space, retdest

View File

@ -210,6 +210,8 @@ fn test_mul_bignum() -> Result<()> {
// Run mul function.
interpreter.run()?;
dbg!(interpreter.stack());
// Determine actual product.
let new_memory = interpreter.get_kernel_general_memory();
let output_location: usize = output_loc.try_into().unwrap();