112 lines
5.0 KiB
NASM
Raw Normal View History

2023-02-13 12:53:49 -08:00
// Arithmetic on little-endian integers represented with 128-bit limbs.
// All integers must be under a given length bound, and are padded with leading zeroes.
2023-03-15 13:39:43 -07:00
// Sets a[0:len] += b[0:len] * val, and returns the carry (a limb of up to 128 bits).
2023-02-13 12:53:49 -08:00
global addmul_bignum:
// stack: len, a_start_loc, b_start_loc, val, retdest
2023-03-14 15:33:36 -07:00
DUP1
// stack: len, len, a_start_loc, b_start_loc, val, retdest
ISZERO
%jumpi(len_zero)
2023-02-13 12:53:49 -08:00
PUSH 0
2023-03-15 13:39:43 -07:00
// stack: carry_limb=0, i=len, a_cur_loc=a_start_loc, b_cur_loc=b_start_loc, val, retdest
2023-02-13 12:53:49 -08:00
addmul_loop:
2023-03-15 13:39:43 -07:00
// stack: carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
DUP4
2023-03-15 13:39:43 -07:00
// stack: b_cur_loc, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
%mload_kernel_general
2023-03-15 13:39:43 -07:00
// stack: b[cur], carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
DUP6
2023-03-15 13:39:43 -07:00
// stack: val, b[cur], carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
MUL
2023-03-15 13:39:43 -07:00
// stack: val * b[cur], carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
DUP1
2023-03-15 13:39:43 -07:00
// stack: val * b[cur], val * b[cur], carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
%shr_const(128)
2023-03-15 13:39:43 -07:00
// stack: (val * b[cur]) // 2^128, val * b[cur], carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
SWAP1
2023-03-15 13:39:43 -07:00
// stack: val * b[cur], (val * b[cur]) // 2^128, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
%shl_const(128)
%shr_const(128)
2023-03-15 13:39:43 -07:00
// stack: prod_lo = val * b[cur] % 2^128, prod_hi = (val * b[cur]) // 2^128, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
DUP5
2023-03-15 13:39:43 -07:00
// stack: a_cur_loc, prod_lo, prod_hi, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
%mload_kernel_general
2023-03-15 13:39:43 -07:00
// stack: a[cur], prod_lo, prod_hi, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
DUP1
2023-03-15 13:39:43 -07:00
// stack: a[cur], a[cur], prod_lo, prod_hi, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
SWAP2
2023-03-15 13:39:43 -07:00
// stack: prod_lo, a[cur], a[cur], prod_hi, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
ADD
%shl_const(128)
%shr_const(128)
2023-03-15 13:39:43 -07:00
// stack: prod_lo' = (prod_lo + a[cur]) % 2^128, a[cur], prod_hi, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
DUP1
2023-03-15 13:39:43 -07:00
// stack: prod_lo', prod_lo', a[cur], prod_hi, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
SWAP2
2023-03-15 13:39:43 -07:00
// stack: a[cur], prod_lo', prod_lo', prod_hi, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
GT
2023-03-15 13:39:43 -07:00
// stack: prod_lo_carry_limb = a[cur] > prod_lo', prod_lo', prod_hi, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
SWAP1
2023-03-15 13:39:43 -07:00
// stack: prod_lo', prod_lo_carry_limb, prod_hi, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
SWAP2
2023-03-15 13:39:43 -07:00
// stack: prod_hi, prod_lo_carry_limb, prod_lo', carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
ADD
2023-03-15 13:39:43 -07:00
// stack: prod_hi' = prod_hi + prod_lo_carry_limb, prod_lo', carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
DUP3
2023-03-15 13:39:43 -07:00
// stack: carry_limb, prod_hi', prod_lo', carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-15 11:20:09 -08:00
DUP3
2023-03-15 13:39:43 -07:00
// stack: prod_lo', carry_limb, prod_hi', prod_lo', carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
ADD
%shl_const(128)
%shr_const(128)
2023-03-15 13:39:43 -07:00
// stack: to_write = (prod_lo' + carry_limb) % 2^128, prod_hi', prod_lo', carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-15 11:20:09 -08:00
SWAP2
2023-03-15 13:39:43 -07:00
// stack: prod_lo', prod_hi', to_write, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-15 11:20:09 -08:00
DUP3
2023-03-15 13:39:43 -07:00
// stack: to_write, prod_lo', prod_hi', to_write, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
LT
2023-03-15 13:39:43 -07:00
// stack: carry_limb_new = to_write < prod_lo', prod_hi', to_write, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-15 11:20:09 -08:00
%stack (vals: 3, c) -> (vals)
2023-03-15 13:39:43 -07:00
// stack: carry_limb_new, prod_hi', to_write, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
ADD
2023-03-15 13:39:43 -07:00
// stack: carry_limb = carry_limb_new' + prod_hi', to_write, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
SWAP1
2023-03-15 13:39:43 -07:00
// stack: to_write, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
DUP4
2023-03-15 13:39:43 -07:00
// stack: a_cur_loc, to_write, carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
%mstore_kernel_general
2023-03-15 13:39:43 -07:00
// stack: carry_limb, i, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
SWAP1
2023-03-15 13:39:43 -07:00
// stack: i, carry_limb, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
%decrement
2023-03-15 13:39:43 -07:00
// stack: i-1, carry_limb, a_cur_loc, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
SWAP2
2023-03-15 13:39:43 -07:00
// stack: a_cur_loc, carry_limb, i-1, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
%increment
2023-03-15 13:39:43 -07:00
// stack: a_cur_loc+1, carry_limb, i-1, b_cur_loc, val, retdest
2023-02-13 12:53:49 -08:00
SWAP3
2023-03-15 13:39:43 -07:00
// stack: b_cur_loc, carry_limb, i-1, a_cur_loc+1, val, retdest
2023-02-13 12:53:49 -08:00
%increment
2023-03-15 13:39:43 -07:00
// stack: b_cur_loc+1, carry_limb, i-1, a_cur_loc+1, val, retdest
2023-02-13 12:53:49 -08:00
%stack (b, c, i, a) -> (c, i, a, b)
2023-03-15 13:39:43 -07:00
// stack: carry_limb, i-1, a_cur_loc+1, b_cur_loc+1, val, retdest
2023-02-13 12:53:49 -08:00
DUP2
2023-03-15 13:39:43 -07:00
// stack: i-1, carry_limb, i-1, a_cur_loc+1, b_cur_loc+1, val, retdest
2023-02-13 12:53:49 -08:00
%jumpi(addmul_loop)
addmul_end:
2023-03-15 13:39:43 -07:00
// stack: carry_limb_new, i-1, a_cur_loc+1, b_cur_loc+1, val, retdest
2023-02-13 14:09:27 -08:00
%stack (c, i, a, b, v) -> (c)
2023-03-15 13:39:43 -07:00
// stack: carry_limb_new, retdest
2023-02-13 12:53:49 -08:00
SWAP1
2023-03-15 13:39:43 -07:00
// stack: retdest, carry_limb_new
2023-02-13 12:53:49 -08:00
JUMP
2023-03-14 15:33:36 -07:00
len_zero:
// stack: len, a_start_loc, b_start_loc, val, retdest
%pop4
// stack: retdest
PUSH 0
2023-03-15 13:39:43 -07:00
// stack: carry_limb=0, retdest
2023-03-14 15:33:36 -07:00
SWAP1
JUMP