Nicholas Ward e06f84dd7c modmul fix
2023-03-20 13:09:42 -07:00

193 lines
9.7 KiB
NASM

// Arithmetic on little-endian integers represented with 128-bit limbs.
// All integers must be under a given length bound, and are padded with leading zeroes.
// Stores a * b % m in output_loc, leaving a, b, and m unchanged.
// a, b, and m must have the same length.
// Both output_loc and scratch_1 must have size length.
// Both scratch_2 and scratch_3 have size 2 * length and be initialized with zeroes.
global modmul_bignum:
// stack: length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
// The prover provides x := (a * b) % m, which we store in output_loc.
PUSH 0
// stack: i=0, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
modmul_remainder_loop:
// stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
PROVER_INPUT(bignum_modmul::remainder)
// stack: PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
DUP7
// stack: output_loc, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
DUP3
// stack: i, output_loc, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
ADD
// stack: output_loc[i], PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
%mstore_kernel_general
// stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
%increment
// stack: i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
DUP2
DUP2
// stack: i+1, length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
EQ
// stack: i+1==length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
ISZERO
// stack: i+1!=length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
%jumpi(modmul_remainder_loop)
modmul_remainder_end:
// stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
POP
// The prover provides k := (a * b) / m, which we store in scratch_1.
// stack: length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
PUSH 0
// stack: i=0, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
modmul_quotient_loop:
// stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
PROVER_INPUT(bignum_modmul::quotient)
// stack: PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
DUP8
// stack: scratch_1, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
DUP3
// stack: i, scratch_1, PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
ADD
// stack: scratch_1[i], PI, i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
%mstore_kernel_general
// stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
%increment
// stack: i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
DUP2
DUP2
// stack: i+1, length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
%neq
// stack: i+1!=length, i+1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
%jumpi(modmul_quotient_loop)
modmul_quotient_end:
// stack: i, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
POP
// stack: length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
// Verification step 1: calculate x + k * m.
// Store k * m in scratch_2.
PUSH modmul_return_1
// stack: modmul_return_1, length, a_start_loc, b_start_loc, m_start_loc, output_loc, scratch_1, scratch_2, scratch_3, retdest
%stack (return, len, a, b, m, out, s1, s2) -> (len, s1, m, s2, return, len, a, b, out, s2)
// stack: length, scratch_1, m_start_loc, scratch_2, modmul_return_1, length, a_start_loc, b_start_loc, output_loc, scratch_2, scratch_3, retdest
%jump(mul_bignum)
modmul_return_1:
// stack: length, a_start_loc, b_start_loc, output_loc, scratch_2, scratch_3, retdest
// Add x into k * m (in scratch_2).
PUSH modmul_return_2
// stack: modmul_return_2, length, a_start_loc, b_start_loc, output_loc, scratch_2, scratch_3, retdest
%stack (return, len, a, b, out, s2) -> (len, s2, out, return, len, a, b, s2)
// stack: length, scratch_2, output_loc, modmul_return_2, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
%jump(add_bignum)
modmul_return_2:
// stack: carry, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
ISZERO
%jumpi(no_carry)
// stack: length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
DUP4
// stack: scratch_2, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
DUP2
// stack: length, scratch_2, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
ADD
// stack: cur_loc=scratch_2 + length, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
increment_loop:
// stack: cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
DUP1
%mload_kernel_general
// stack: val, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
%increment
// stack: val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
DUP1
// stack: val+1, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
%eq_const(@BIGNUM_LIMB_BASE)
// stack: val+1==limb_base, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
DUP1
// stack: val+1==limb_base, val+1==limb_base, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
ISZERO
// stack: val+1!=limb_base, val+1==limb_base, val+1, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
SWAP1
SWAP2
// stack: val+1, val+1!=limb_base, val+1==limb_base, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
MUL
// stack: to_write=(val+1)*(val+1!=limb_base), continue=val+1==limb_base, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
DUP3
// stack: cur_loc, to_write, continue, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
%mstore_kernel_general
// stack: continue, cur_loc, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
SWAP1
// stack: cur_loc, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
%increment
// stack: cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
DUP1
// stack: cur_loc + 1, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
DUP8
// stack: scratch_3, cur_loc + 1, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
EQ
// stack: cur_loc + 1 == scratch_3, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
ISZERO
// stack: cur_loc + 1 != scratch_3, cur_loc + 1, continue, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
SWAP1
SWAP2
// stack: continue, cur_loc + 1 != scratch_3, cur_loc + 1, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
MUL
// stack: new_continue=continue*(cur_loc + 1 != scratch_3), cur_loc + 1, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
%jumpi(increment_loop)
// stack: cur_loc + 1, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
POP
no_carry:
// stack: length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
// Calculate a * b.
// Store a * b in scratch_3.
PUSH modmul_return_3
// stack: modmul_return_3, length, a_start_loc, b_start_loc, scratch_2, scratch_3, retdest
%stack (return, len, a, b, s2, s3) -> (len, a, b, s3, return, len, s2, s3)
// stack: length, a_start_loc, b_start_loc, scratch_3, modmul_return_3, length, scratch_2, scratch_3, retdest
%jump(mul_bignum)
modmul_return_3:
// stack: length, scratch_2, scratch_3, retdest
// Check that x + k * m = a * b.
// Walk through scratch_2 and scratch_3, checking that they are equal.
// stack: n=length, i=scratch_2, j=scratch_3, retdest
modmul_check_loop:
// stack: n, i, j, retdest
%stack (l, idx: 2) -> (idx, l, idx)
// stack: i, j, n, i, j, retdest
%mload_kernel_general
SWAP1
%mload_kernel_general
SWAP1
// stack: mem[i], mem[j], n, i, j, retdest
%assert_eq
// stack: n, i, j, retdest
%decrement
// stack: n-1, i, j, retdest
SWAP1
// stack: i, n-1, j, retdest
%increment
// stack: i+1, n-1, j, retdest
SWAP2
// stack: j, n-1, i+1, retdest
%increment
// stack: j+1, n-1, i+1, retdest
SWAP2
SWAP1
// stack: n-1, i+1, j+1, retdest
DUP1
// stack: n-1, n-1, i+1, j+1, retdest
%jumpi(modmul_check_loop)
modmul_check_end:
// stack: n-1, i+1, j+1, retdest
%pop3
// stack: retdest
JUMP