Merge pull request #889 from mir-protocol/hash-asm-optimization

Hash asm optimization
This commit is contained in:
Nicholas Ward 2023-03-16 15:05:35 -07:00 committed by GitHub
commit 9f75132ffa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 186 additions and 414 deletions

View File

@ -85,8 +85,7 @@ compression_loop:
// stack: cur_message_addr + 1, cur_block_byte + 8, ...
%endrep
// stack: end_message_addr, end_block_start_byte, t, cur_block, is_last_block, retdest
POP
POP
%pop2
// stack: t, cur_block, is_last_block, retdest
SWAP1
// stack: cur_block, t, is_last_block, retdest
@ -128,15 +127,14 @@ compression_loop:
// stack: 0, start + 8, invert_if_last_block, t, cur_block, retdest
%rep 4
// stack: i, loc, ...
DUP2
DUP2
// stack: i, loc, i, loc,...
DUP1
// stack: i, i, loc, ...
%blake2b_iv
// stack: IV_i, loc, i, loc,...
SWAP1
// stack: loc, IV_i, i, loc,...
// stack: IV_i, i, loc, ...
DUP3
// stack: loc, IV_i, i, loc, ...
%mstore_kernel_general
// stack: i, loc,...
// stack: i, loc, ...
%increment
SWAP1
%increment
@ -147,15 +145,11 @@ compression_loop:
%stack (i, loc, inv, last, t) -> (t, t, i, loc, inv, last)
// stack: t, t, 4, start + 12, invert_if_last_block, cur_block, retdest
%shr_const(64)
// stack: t >> 64, t, 4, start + 12, invert_if_last_block, cur_block, retdest
// stack: t_hi = t >> 64, t, 4, start + 12, invert_if_last_block, cur_block, retdest
SWAP1
// stack: t, t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest
PUSH 1
%shl_const(64)
// stack: 1 << 64, t, t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest
SWAP1
MOD
// stack: t_lo = t % (1 << 64), t_hi = t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest
// stack: t, t_hi, 4, start + 12, invert_if_last_block, cur_block, retdest
%mod_const(0x10000000000000000)
// stack: t_lo = t % (1 << 64), t_hi, 4, start + 12, invert_if_last_block, cur_block, retdest
%stack (t_lo, t_hi, i, loc, inv) -> (i, loc, t_lo, t_hi, inv, 0)
// stack: 4, start + 12, t_lo, t_hi, invert_if_last_block, 0, cur_block, retdest
@ -163,25 +157,31 @@ compression_loop:
// the values (t % 2**64, t >> 64, invert_if, 0).
%rep 4
// stack: i, loc, val, next_val,...
%stack (i, loc, val) -> (i, val, loc, i, loc)
// stack: i, val, loc, i, loc, next_val,...
DUP1
// stack: i, i, loc, val, next_val,...
%blake2b_iv
// stack: IV_i, val, loc, i, loc, next_val,...
// stack: IV_i, i, loc, val, next_val,...
DUP4
// stack: val, IV_i, i, loc, val, next_val,...
XOR
// stack: val ^ IV_i, loc, i, loc, next_val,...
SWAP1
// stack: loc, val ^ IV_i, i, loc, next_val,...
// stack: val ^ IV_i, i, loc, val, next_val,...
DUP3
// stack: loc, val ^ IV_i, i, loc, val, next_val,...
%mstore_kernel_general
// stack: i, loc, next_val,...
// stack: i, loc, val, next_val,...
%increment
SWAP1
// stack: i + 1, loc, val, next_val,...
SWAP2
// stack: val, loc, i + 1, next_val,...
POP
// stack: loc, i + 1, next_val,...
%increment
// stack: loc + 1, i + 1, next_val,...
SWAP1
// stack: i + 1, loc + 1, next_val,...
%endrep
// stack: 8, loc + 16, cur_block, retdest
POP
POP
%pop2
// stack: cur_block, retdest
// Run 12 rounds of G functions.
@ -209,10 +209,9 @@ hash_generate_return:
PUSH 0
%mload_kernel_general
// stack: num_blocks, cur_block + 1, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest
EQ
// stack: last_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest
%jumpi(compression_end)
%jump(compression_loop)
GT
// stack: not_last_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest
%jumpi(compression_loop)
compression_end:
// stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest

View File

@ -8,22 +8,7 @@
global sha2_compression:
// stack: message_schedule_addr, retdest
PUSH 0
// stack: i=0, message_schedule_addr, retdest
SWAP1
// stack: message_schedule_addr, i=0, retdest
PUSH 0
// stack: 0, message_schedule_addr, i=0, retdest
%mload_kernel_general
// stack: num_blocks, message_schedule_addr, i=0, retdest
DUP1
// stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest
%scratch_space_addr_from_num_blocks
// stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest
SWAP1
// stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
// Push the initial hash values; these constants are called H^(0) in the spec.
PUSH 0x5be0cd19 // H^(0)_7
PUSH 0x1f83d9ab // H^(0)_6
PUSH 0x9b05688c // H^(0)_5
PUSH 0x510e527f // H^(0)_4
@ -31,255 +16,145 @@ global sha2_compression:
PUSH 0x3c6ef372 // H^(0)_2
PUSH 0xbb67ae85 // H^(0)_1
PUSH 0x6a09e667 // H^(0)_0
// stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
PUSH 0x5be0cd19 // H^(0)_7
// stack: h[0], a[0], b[0], c[0], d[0], e[0], f[0], g[0], message_schedule_addr, retdest
SWAP8
// stack: message_schedule_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest
PUSH 0
// stack: i=0, message_schedule_addr, a[0]..h[0], retdest
SWAP1
// stack: message_schedule_addr, i=0, a[0]..h[0], retdest
PUSH 0
// stack: 0, message_schedule_addr, i=0, a[0]..h[0], retdest
%mload_kernel_general
// stack: num_blocks, message_schedule_addr, i=0, a[0]..h[0], retdest
DUP1
// stack: num_blocks, num_blocks, message_schedule_addr, i=0, a[0]..h[0], retdest
%scratch_space_addr_from_num_blocks
// stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, a[0]..h[0], retdest
SWAP1
// stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, a[0]..h[0], retdest
compression_start_block:
// Store the current values of the working variables, as the "initial values" to be added back in at the end of this block.
DUP10
// stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP2
DUP2
// stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP3
DUP2
// stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP4
DUP2
// stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP5
DUP2
// stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP6
DUP2
// stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP7
DUP2
// stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP8
DUP2
// stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%add_const(4)
// stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
DUP9
DUP2
// stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
%mstore_kernel_general_u32
// stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
POP
// stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest
// We keep the current values of the working variables saved at the end of the stack.
// These are the "initial values" to be added back in at the end of this block.
// stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, a[0]..h[0], retdest
%rep 8
DUP12
%endrep
// stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, a[0]..h[0], retdest
compression_loop:
// Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i].
// stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP11
// stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP13
// stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%mul_const(4)
// stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
ADD
// stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%mload_kernel_general_u32
// stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
PUSH sha2_constants_k
// stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP14
// stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%mul_const(4)
// stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
ADD
// stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%mload_kernel_code_u32
// stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%stack (start: 6, e, f, g, h) -> (e, f, g, h, start, e, f, g, h)
// stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP10
DUP10
DUP10
DUP10
// stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%sha2_temp_word1
// stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%stack (t, a, b, c) -> (a, b, c, t, a, b, c)
// stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP4
DUP4
DUP4
// stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%sha2_temp_word2
// stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP6
// stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP3
// stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%add_u32
// stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
SWAP2
// stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%add_u32
// stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%stack (a, e, b, c, d, old_d, f, g, h, old_h) -> (a, b, c, d, e, f, g, h)
// stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP12
// stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%increment
// stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP1
// stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
%eq_const(64)
// stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP1
// stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
DUP12
// stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
SUB
// stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
// stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest
SWAP13
// stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
// stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest
SWAP1
// stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
PUSH 256
MUL
// stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
// stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest
%mul_const(256)
// stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest
ADD
// stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest
// stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest
SWAP12
// stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest
// stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, a[0]..h[0], retdest
SWAP10
// stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest
// stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_a[0]..h[0], retdest
POP
// stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest
// stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_a[0]..h[0], retdest
%and_const(63)
// stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest
// stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, a[0]..h[0], retdest
SWAP12
// stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest
// stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, a[0]..h[0], retdest
POP
// stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest
// stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, a[0]..h[0], retdest
DUP12
// stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest
ISZERO
%jumpi(compression_end_block)
%jump(compression_loop)
// stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, a[0]..h[0], retdest
%jumpi(compression_loop)
compression_end_block:
// Add the initial values of the eight working variables (from the start of this block's compression) back into them.
// stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
// stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%mload_kernel_general_u32
// stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP1
// stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(4)
%mload_kernel_general_u32
// stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP2
// stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(8)
%mload_kernel_general_u32
// stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP3
// stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(12)
%mload_kernel_general_u32
// stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP4
// stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(16)
%mload_kernel_general_u32
// stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP5
// stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(20)
%mload_kernel_general_u32
// stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP6
// stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(24)
%mload_kernel_general_u32
// stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP7
// stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
DUP10
%add_const(28)
%mload_kernel_general_u32
// stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
%add_u32
// stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest
SWAP8
// stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
// stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest
PUSH 0
// stack: 0, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest
%rep 8
SWAP13
%add_u32
SWAP12
%endrep
// stack: 0, num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest
POP
// stack: num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest
DUP1
// stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
// stack: num_blocks, num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest
ISZERO
// In this case, we've finished all the blocks.
%jumpi(compression_end)
// stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
%stack (num_blocks, working: 8) -> (working, num_blocks)
// stack: num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest
%jump(compression_start_block)
compression_end:
// stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
POP
// stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest
%shl_const(32)
ADD // OR
%shl_const(32)
ADD // OR
%shl_const(32)
ADD // OR
%shl_const(32)
ADD // OR
%shl_const(32)
ADD // OR
%shl_const(32)
ADD // OR
%shl_const(32)
ADD // OR
// stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest
SWAP3
// stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest
%pop3
// stack: num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest
%pop4
// stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest
%rep 7
%shl_const(32)
ADD // OR
%endrep
// stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest
SWAP1
JUMP

View File

@ -19,9 +19,7 @@ global sha2_pad:
// STEP 1: append 1
// insert 128 (= 1 << 7) at x[num_bytes+1]
// stack: num_bytes, retdest
PUSH 1
PUSH 7
SHL
PUSH 0x80
// stack: 128, num_bytes, retdest
DUP2
// stack: num_bytes, 128, num_bytes, retdest
@ -40,14 +38,12 @@ global sha2_pad:
// STEP 3: calculate length := num_bytes*8
SWAP1
// stack: num_bytes, num_blocks, retdest
PUSH 8
MUL
%mul_const(8)
// stack: length = num_bytes*8, num_blocks, retdest
// STEP 4: write length to x[num_blocks*64-7..num_blocks*64]
DUP2
// stack: num_blocks, length, num_blocks, retdest
PUSH 64
MUL
%mul_const(64)
// stack: last_addr = num_blocks*64, length, num_blocks, retdest
%sha2_write_length
// stack: num_blocks, retdest

View File

@ -55,16 +55,13 @@ gen_message_schedule_from_block_0_loop:
// stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest
%decrement
DUP1
ISZERO
%jumpi(gen_message_schedule_from_block_0_end)
%jump(gen_message_schedule_from_block_0_loop)
%jumpi(gen_message_schedule_from_block_0_loop)
gen_message_schedule_from_block_0_end:
// stack: old counter=0, output_addr, block[0], block[1], retdest
POP
PUSH 8
// stack: counter=8, output_addr, block[0], block[1], retdest
%stack (counter, out, b0, b1) -> (out, counter, b1, b0)
// stack: output_addr, counter, block[1], block[0], retdest
// stack: output_addr, block[0], block[1], retdest
%stack (out, b0, b1) -> (out, 8, b1, b0)
// stack: output_addr, counter=8, block[1], block[0], retdest
%add_const(64)
// stack: output_addr + 64, counter, block[1], block[0], retdest
SWAP1
@ -96,9 +93,7 @@ gen_message_schedule_from_block_1_loop:
// stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest
%decrement
DUP1
ISZERO
%jumpi(gen_message_schedule_from_block_1_end)
%jump(gen_message_schedule_from_block_1_loop)
%jumpi(gen_message_schedule_from_block_1_loop)
gen_message_schedule_from_block_1_end:
// stack: old counter=0, output_addr, block[1], block[0], retdest
POP
@ -118,11 +113,7 @@ gen_message_schedule_remaining_loop:
// stack: output_addr, counter, block[0], block[1], retdest
DUP1
// stack: output_addr, output_addr, counter, block[0], block[1], retdest
PUSH 2
PUSH 4
MUL
SWAP1
SUB
%sub_const(8)
// stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest
%mload_kernel_general_u32
// stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest
@ -132,11 +123,7 @@ gen_message_schedule_remaining_loop:
// stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
DUP1
// stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
PUSH 7
PUSH 4
MUL
SWAP1
SUB
%sub_const(28)
// stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
%mload_kernel_general_u32
// stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
@ -144,11 +131,7 @@ gen_message_schedule_remaining_loop:
// stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
DUP1
// stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
PUSH 15
PUSH 4
MUL
SWAP1
SUB
%sub_const(60)
// stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
%mload_kernel_general_u32
// stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
@ -158,11 +141,7 @@ gen_message_schedule_remaining_loop:
// stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
DUP1
// stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
PUSH 16
PUSH 4
MUL
SWAP1
SUB
%sub_const(64)
// stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
%mload_kernel_general_u32
// stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest
@ -185,9 +164,7 @@ gen_message_schedule_remaining_loop:
%decrement
// stack: counter - 1, output_addr + 4, block[0], block[1], retdest
DUP1
ISZERO
%jumpi(gen_message_schedule_remaining_end)
%jump(gen_message_schedule_remaining_loop)
%jumpi(gen_message_schedule_remaining_loop)
gen_message_schedule_remaining_end:
// stack: counter=0, output_addr, block[0], block[1], retdest
%pop4
@ -230,9 +207,7 @@ gen_all_message_schedules_loop_end:
// stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
DUP2
// stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
ISZERO
%jumpi(gen_all_message_schedules_end)
%jump(gen_all_message_schedules_loop)
%jumpi(gen_all_message_schedules_loop)
gen_all_message_schedules_end:
// stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest
%pop3

View File

@ -26,14 +26,15 @@
// stack: x, x
%rotr(7)
// stack: rotr(x, 7), x
%stack (rotated, x) -> (x, x, rotated)
SWAP1
// stack: x, rotr(x, 7)
DUP1
// stack: x, x, rotr(x, 7)
%rotr(18)
// stack: rotr(x, 18), x, rotr(x, 7)
SWAP1
// stack: x, rotr(x, 18), rotr(x, 7)
PUSH 3
SHR
%div_const(8) // equivalent to %shr_const(3)
// stack: shr(x, 3), rotr(x, 18), rotr(x, 7)
XOR
XOR
@ -45,7 +46,9 @@
// stack: x, x
%rotr(17)
// stack: rotr(x, 17), x
%stack (rotated, x) -> (x, x, rotated)
SWAP1
// stack: x, rotr(x, 17)
DUP1
// stack: x, x, rotr(x, 17)
%rotr(19)
// stack: rotr(x, 19), x, rotr(x, 17)
@ -64,7 +67,9 @@
// stack: x, x
%rotr(2)
// stack: rotr(x, 2), x
%stack (rotated, x) -> (x, x, rotated)
SWAP1
// stack: x, rotr(x, 2)
DUP1
// stack: x, x, rotr(x, 2)
%rotr(13)
// stack: rotr(x, 13), x, rotr(x, 2)
@ -82,7 +87,9 @@
// stack: x, x
%rotr(6)
// stack: rotr(x, 6), x
%stack (rotated, x) -> (x, x, rotated)
SWAP1
// stack: x, rotr(x, 6)
DUP1
// stack: x, x, rotr(x, 6)
%rotr(11)
// stack: rotr(x, 11), x, rotr(x, 6)
@ -100,11 +107,13 @@
// stack: x, x, y, z
NOT
// stack: not x, x, y, z
%stack (notx, x, y, z) -> (notx, z, x, y)
// stack: not x, z, x, y
SWAP1
// stack: x, not x, y, z
SWAP3
// stack: z, not x, y, x
AND
// stack: (not x) and z, x, y
%stack (nxz, x, y) -> (x, y, nxz)
// stack: (not x) and z, y, x
SWAP2
// stack: x, y, (not x) and z
AND
// stack: x and y, (not x) and z
@ -113,18 +122,22 @@
%macro sha2_majority
// stack: x, y, z
%stack (xyz: 3) -> (xyz, xyz)
// stack: x, y, z, x, y, z
DUP1
// stack: x, x, y, z
DUP3
// stack: y, x, x, y, z
DUP5
// stack: z, y, x, x, y, z
AND
// stack: x and y, z, x, y, z
// stack: z and y, x, x, y, z
SWAP4
// stack: z, x, x, y, z and y
AND
// stack: z and x, x, y, z and y
SWAP2
// stack: x, z, x and y, y, z
// stack: y, x, z and x, z and y
AND
// stack: x and z, x and y, y, z
%stack (a: 2, b: 2) -> (b, a)
// stack: y, z, x and z, x and y
AND
// stack: y and z, x and z, x and y
// stack: y and x, z and x, z and y
OR
OR
%endmacro

View File

@ -10,110 +10,24 @@
// stack: last_addr, length % (1 << 8), length, last_addr
%mstore_kernel_general
// stack: length, last_addr
SWAP1
%decrement
SWAP1
// stack: length, last_addr - 1
%shr_const(8)
// stack: length >> 8, last_addr - 1
DUP1
// stack: length >> 8, length >> 8, last_addr - 1
%and_const(0xff)
// stack: (length >> 8) % (1 << 8), length >> 8, last_addr - 1
DUP3
// stack: last_addr - 1, (length >> 8) % (1 << 8), length >> 8, last_addr - 1
%mstore_kernel_general
// stack: length >> 8, last_addr - 1
SWAP1
%decrement
SWAP1
// stack: length >> 8, last_addr - 2
%shr_const(8)
// stack: length >> 16, last_addr - 2
DUP1
// stack: length >> 16, length >> 16, last_addr - 2
%and_const(0xff)
// stack: (length >> 16) % (1 << 8), length >> 16, last_addr - 2
DUP3
// stack: last_addr - 2, (length >> 16) % (1 << 8), length >> 16, last_addr - 2
%mstore_kernel_general
%rep 7
// For i = 0 to 6
// stack: length >> (8 * i), last_addr - i - 1
SWAP1
%decrement
SWAP1
// stack: length >> (8 * i), last_addr - i - 2
%div_const(256) // equivalent to %shr_const(8)
// stack: length >> (8 * (i + 1)), last_addr - i - 2
DUP1
// stack: length >> (8 * (i + 1)), length >> (8 * (i + 1)), last_addr - i - 2
%mod_const(256)
// stack: (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2
DUP3
// stack: last_addr - i - 2, (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2
%mstore_kernel_general
%endrep
// stack: length >> 16, last_addr - 2
SWAP1
%decrement
SWAP1
// stack: length >> 16, last_addr - 3
%shr_const(8)
// stack: length >> 24, last_addr - 3
DUP1
// stack: length >> 24, length >> 24, last_addr - 3
%and_const(0xff)
// stack: (length >> 24) % (1 << 8), length >> 24, last_addr - 3
DUP3
// stack: last_addr - 3, (length >> 24) % (1 << 8), length >> 24, last_addr - 3
%mstore_kernel_general
// stack: length >> 24, last_addr - 3
SWAP1
%decrement
SWAP1
// stack: length >> 24, last_addr - 4
%shr_const(8)
// stack: length >> 32, last_addr - 4
DUP1
// stack: length >> 32, length >> 32, last_addr - 4
%and_const(0xff)
// stack: (length >> 32) % (1 << 8), length >> 32, last_addr - 4
DUP3
// stack: last_addr - 4, (length >> 32) % (1 << 8), length >> 32, last_addr - 4
%mstore_kernel_general
// stack: length >> 32, last_addr - 4
SWAP1
%decrement
SWAP1
// stack: length >> 32, last_addr - 5
%shr_const(8)
// stack: length >> 40, last_addr - 5
DUP1
// stack: length >> 40, length >> 40, last_addr - 5
%and_const(0xff)
// stack: (length >> 40) % (1 << 8), length >> 40, last_addr - 5
DUP3
// stack: last_addr - 5, (length >> 40) % (1 << 8), length >> 40, last_addr - 5
%mstore_kernel_general
// stack: length >> 40, last_addr - 5
SWAP1
%decrement
SWAP1
// stack: length >> 40, last_addr - 6
%shr_const(8)
// stack: length >> 48, last_addr - 6
DUP1
// stack: length >> 48, length >> 48, last_addr - 6
%and_const(0xff)
// stack: (length >> 48) % (1 << 8), length >> 48, last_addr - 6
DUP3
// stack: last_addr - 6, (length >> 48) % (1 << 8), length >> 48, last_addr - 6
%mstore_kernel_general
// stack: length >> 48, last_addr - 6
SWAP1
%decrement
SWAP1
// stack: length >> 48, last_addr - 7
%shr_const(8)
// stack: length >> 56, last_addr - 7
DUP1
// stack: length >> 56, length >> 56, last_addr - 7
%and_const(0xff)
// stack: (length >> 56) % (1 << 8), length >> 56, last_addr - 7
DUP3
// stack: last_addr - 7, (length >> 56) % (1 << 8), length >> 56, last_addr - 7
%mstore_kernel_general
%pop2
// stack: (empty)
%endmacro