diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index cd1f6a80..6e8cdb0a 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -85,8 +85,7 @@ compression_loop: // stack: cur_message_addr + 1, cur_block_byte + 8, ... %endrep // stack: end_message_addr, end_block_start_byte, t, cur_block, is_last_block, retdest - POP - POP + %pop2 // stack: t, cur_block, is_last_block, retdest SWAP1 // stack: cur_block, t, is_last_block, retdest @@ -128,15 +127,14 @@ compression_loop: // stack: 0, start + 8, invert_if_last_block, t, cur_block, retdest %rep 4 // stack: i, loc, ... - DUP2 - DUP2 - // stack: i, loc, i, loc,... + DUP1 + // stack: i, i, loc, ... %blake2b_iv - // stack: IV_i, loc, i, loc,... - SWAP1 - // stack: loc, IV_i, i, loc,... + // stack: IV_i, i, loc, ... + DUP3 + // stack: loc, IV_i, i, loc, ... %mstore_kernel_general - // stack: i, loc,... + // stack: i, loc, ... %increment SWAP1 %increment @@ -147,15 +145,11 @@ compression_loop: %stack (i, loc, inv, last, t) -> (t, t, i, loc, inv, last) // stack: t, t, 4, start + 12, invert_if_last_block, cur_block, retdest %shr_const(64) - // stack: t >> 64, t, 4, start + 12, invert_if_last_block, cur_block, retdest + // stack: t_hi = t >> 64, t, 4, start + 12, invert_if_last_block, cur_block, retdest SWAP1 - // stack: t, t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest - PUSH 1 - %shl_const(64) - // stack: 1 << 64, t, t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest - SWAP1 - MOD - // stack: t_lo = t % (1 << 64), t_hi = t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest + // stack: t, t_hi, 4, start + 12, invert_if_last_block, cur_block, retdest + %mod_const(0x10000000000000000) + // stack: t_lo = t % (1 << 64), t_hi, 4, start + 12, invert_if_last_block, cur_block, retdest %stack (t_lo, t_hi, i, loc, inv) -> (i, loc, t_lo, t_hi, inv, 0) // stack: 4, start + 12, t_lo, t_hi, invert_if_last_block, 0, cur_block, retdest @@ -163,25 +157,31 @@ compression_loop: // the values (t % 2**64, t >> 64, invert_if, 0). %rep 4 // stack: i, loc, val, next_val,... - %stack (i, loc, val) -> (i, val, loc, i, loc) - // stack: i, val, loc, i, loc, next_val,... + DUP1 + // stack: i, i, loc, val, next_val,... %blake2b_iv - // stack: IV_i, val, loc, i, loc, next_val,... + // stack: IV_i, i, loc, val, next_val,... + DUP4 + // stack: val, IV_i, i, loc, val, next_val,... XOR - // stack: val ^ IV_i, loc, i, loc, next_val,... - SWAP1 - // stack: loc, val ^ IV_i, i, loc, next_val,... + // stack: val ^ IV_i, i, loc, val, next_val,... + DUP3 + // stack: loc, val ^ IV_i, i, loc, val, next_val,... %mstore_kernel_general - // stack: i, loc, next_val,... + // stack: i, loc, val, next_val,... %increment - SWAP1 + // stack: i + 1, loc, val, next_val,... + SWAP2 + // stack: val, loc, i + 1, next_val,... + POP + // stack: loc, i + 1, next_val,... %increment + // stack: loc + 1, i + 1, next_val,... SWAP1 // stack: i + 1, loc + 1, next_val,... %endrep // stack: 8, loc + 16, cur_block, retdest - POP - POP + %pop2 // stack: cur_block, retdest // Run 12 rounds of G functions. @@ -209,10 +209,9 @@ hash_generate_return: PUSH 0 %mload_kernel_general // stack: num_blocks, cur_block + 1, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest - EQ - // stack: last_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest - %jumpi(compression_end) - %jump(compression_loop) + GT + // stack: not_last_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %jumpi(compression_loop) compression_end: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest diff --git a/evm/src/cpu/kernel/asm/hash/sha2/compression.asm b/evm/src/cpu/kernel/asm/hash/sha2/compression.asm index 8c219ebb..8f7d942c 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/compression.asm @@ -8,22 +8,7 @@ global sha2_compression: // stack: message_schedule_addr, retdest - PUSH 0 - // stack: i=0, message_schedule_addr, retdest - SWAP1 - // stack: message_schedule_addr, i=0, retdest - PUSH 0 - // stack: 0, message_schedule_addr, i=0, retdest - %mload_kernel_general - // stack: num_blocks, message_schedule_addr, i=0, retdest - DUP1 - // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest - %scratch_space_addr_from_num_blocks - // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest - SWAP1 - // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest // Push the initial hash values; these constants are called H^(0) in the spec. - PUSH 0x5be0cd19 // H^(0)_7 PUSH 0x1f83d9ab // H^(0)_6 PUSH 0x9b05688c // H^(0)_5 PUSH 0x510e527f // H^(0)_4 @@ -31,255 +16,145 @@ global sha2_compression: PUSH 0x3c6ef372 // H^(0)_2 PUSH 0xbb67ae85 // H^(0)_1 PUSH 0x6a09e667 // H^(0)_0 - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + PUSH 0x5be0cd19 // H^(0)_7 + // stack: h[0], a[0], b[0], c[0], d[0], e[0], f[0], g[0], message_schedule_addr, retdest + SWAP8 + // stack: message_schedule_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest + PUSH 0 + // stack: i=0, message_schedule_addr, a[0]..h[0], retdest + SWAP1 + // stack: message_schedule_addr, i=0, a[0]..h[0], retdest + PUSH 0 + // stack: 0, message_schedule_addr, i=0, a[0]..h[0], retdest + %mload_kernel_general + // stack: num_blocks, message_schedule_addr, i=0, a[0]..h[0], retdest + DUP1 + // stack: num_blocks, num_blocks, message_schedule_addr, i=0, a[0]..h[0], retdest + %scratch_space_addr_from_num_blocks + // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, a[0]..h[0], retdest + SWAP1 + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, a[0]..h[0], retdest compression_start_block: - // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. - DUP10 - // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP2 - DUP2 - // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP3 - DUP2 - // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP4 - DUP2 - // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP5 - DUP2 - // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP6 - DUP2 - // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP7 - DUP2 - // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP8 - DUP2 - // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP9 - DUP2 - // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - POP - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + // We keep the current values of the working variables saved at the end of the stack. + // These are the "initial values" to be added back in at the end of this block. + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, a[0]..h[0], retdest + %rep 8 + DUP12 + %endrep + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, a[0]..h[0], retdest compression_loop: // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. - // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP11 - // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP13 - // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %mul_const(4) - // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest ADD - // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %mload_kernel_general_u32 - // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest PUSH sha2_constants_k - // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP14 - // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %mul_const(4) - // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest ADD - // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %mload_kernel_code_u32 - // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %stack (start: 6, e, f, g, h) -> (e, f, g, h, start, e, f, g, h) - // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest + DUP10 + DUP10 + DUP10 + DUP10 + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %sha2_temp_word1 - // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %stack (t, a, b, c) -> (a, b, c, t, a, b, c) - // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest + DUP4 + DUP4 + DUP4 + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %sha2_temp_word2 - // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP6 - // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP3 - // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %add_u32 - // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest SWAP2 - // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %add_u32 - // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %stack (a, e, b, c, d, old_d, f, g, h, old_h) -> (a, b, c, d, e, f, g, h) - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP12 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %increment - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP1 - // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %eq_const(64) - // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP1 - // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP12 - // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest SUB - // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest SWAP13 - // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest SWAP1 - // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - PUSH 256 - MUL - // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest + %mul_const(256) + // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest ADD - // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest SWAP12 - // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest + // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, a[0]..h[0], retdest SWAP10 - // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_a[0]..h[0], retdest POP - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_a[0]..h[0], retdest %and_const(63) - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, a[0]..h[0], retdest SWAP12 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, a[0]..h[0], retdest POP - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, a[0]..h[0], retdest DUP12 - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - ISZERO - %jumpi(compression_end_block) - %jump(compression_loop) + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, a[0]..h[0], retdest + %jumpi(compression_loop) compression_end_block: // Add the initial values of the eight working variables (from the start of this block's compression) back into them. - // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_general_u32 - // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP1 - // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(4) - %mload_kernel_general_u32 - // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP2 - // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(8) - %mload_kernel_general_u32 - // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP3 - // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(12) - %mload_kernel_general_u32 - // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP4 - // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(16) - %mload_kernel_general_u32 - // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP5 - // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(20) - %mload_kernel_general_u32 - // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP6 - // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(24) - %mload_kernel_general_u32 - // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP7 - // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(28) - %mload_kernel_general_u32 - // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP8 - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest + PUSH 0 + // stack: 0, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest + %rep 8 + SWAP13 + %add_u32 + SWAP12 + %endrep + // stack: 0, num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest + POP + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest DUP1 - // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + // stack: num_blocks, num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest ISZERO // In this case, we've finished all the blocks. %jumpi(compression_end) - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - %stack (num_blocks, working: 8) -> (working, num_blocks) + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest %jump(compression_start_block) compression_end: - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - POP - // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest - SWAP3 - // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest - %pop3 + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest + %pop4 + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest + %rep 7 + %shl_const(32) + ADD // OR + %endrep // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest SWAP1 JUMP diff --git a/evm/src/cpu/kernel/asm/hash/sha2/main.asm b/evm/src/cpu/kernel/asm/hash/sha2/main.asm index 058224f6..1deab294 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/main.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/main.asm @@ -19,9 +19,7 @@ global sha2_pad: // STEP 1: append 1 // insert 128 (= 1 << 7) at x[num_bytes+1] // stack: num_bytes, retdest - PUSH 1 - PUSH 7 - SHL + PUSH 0x80 // stack: 128, num_bytes, retdest DUP2 // stack: num_bytes, 128, num_bytes, retdest @@ -40,14 +38,12 @@ global sha2_pad: // STEP 3: calculate length := num_bytes*8 SWAP1 // stack: num_bytes, num_blocks, retdest - PUSH 8 - MUL + %mul_const(8) // stack: length = num_bytes*8, num_blocks, retdest // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] DUP2 // stack: num_blocks, length, num_blocks, retdest - PUSH 64 - MUL + %mul_const(64) // stack: last_addr = num_blocks*64, length, num_blocks, retdest %sha2_write_length // stack: num_blocks, retdest diff --git a/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm index 78d98634..d8f0500d 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm @@ -55,16 +55,13 @@ gen_message_schedule_from_block_0_loop: // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest %decrement DUP1 - ISZERO - %jumpi(gen_message_schedule_from_block_0_end) - %jump(gen_message_schedule_from_block_0_loop) + %jumpi(gen_message_schedule_from_block_0_loop) gen_message_schedule_from_block_0_end: // stack: old counter=0, output_addr, block[0], block[1], retdest POP - PUSH 8 - // stack: counter=8, output_addr, block[0], block[1], retdest - %stack (counter, out, b0, b1) -> (out, counter, b1, b0) - // stack: output_addr, counter, block[1], block[0], retdest + // stack: output_addr, block[0], block[1], retdest + %stack (out, b0, b1) -> (out, 8, b1, b0) + // stack: output_addr, counter=8, block[1], block[0], retdest %add_const(64) // stack: output_addr + 64, counter, block[1], block[0], retdest SWAP1 @@ -96,9 +93,7 @@ gen_message_schedule_from_block_1_loop: // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest %decrement DUP1 - ISZERO - %jumpi(gen_message_schedule_from_block_1_end) - %jump(gen_message_schedule_from_block_1_loop) + %jumpi(gen_message_schedule_from_block_1_loop) gen_message_schedule_from_block_1_end: // stack: old counter=0, output_addr, block[1], block[0], retdest POP @@ -118,11 +113,7 @@ gen_message_schedule_remaining_loop: // stack: output_addr, counter, block[0], block[1], retdest DUP1 // stack: output_addr, output_addr, counter, block[0], block[1], retdest - PUSH 2 - PUSH 4 - MUL - SWAP1 - SUB + %sub_const(8) // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest @@ -132,11 +123,7 @@ gen_message_schedule_remaining_loop: // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest DUP1 // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - PUSH 7 - PUSH 4 - MUL - SWAP1 - SUB + %sub_const(28) // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest @@ -144,11 +131,7 @@ gen_message_schedule_remaining_loop: // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest DUP1 // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - PUSH 15 - PUSH 4 - MUL - SWAP1 - SUB + %sub_const(60) // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest @@ -158,11 +141,7 @@ gen_message_schedule_remaining_loop: // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest DUP1 // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - PUSH 16 - PUSH 4 - MUL - SWAP1 - SUB + %sub_const(64) // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest @@ -185,9 +164,7 @@ gen_message_schedule_remaining_loop: %decrement // stack: counter - 1, output_addr + 4, block[0], block[1], retdest DUP1 - ISZERO - %jumpi(gen_message_schedule_remaining_end) - %jump(gen_message_schedule_remaining_loop) + %jumpi(gen_message_schedule_remaining_loop) gen_message_schedule_remaining_end: // stack: counter=0, output_addr, block[0], block[1], retdest %pop4 @@ -230,9 +207,7 @@ gen_all_message_schedules_loop_end: // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest DUP2 // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - ISZERO - %jumpi(gen_all_message_schedules_end) - %jump(gen_all_message_schedules_loop) + %jumpi(gen_all_message_schedules_loop) gen_all_message_schedules_end: // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest %pop3 diff --git a/evm/src/cpu/kernel/asm/hash/sha2/ops.asm b/evm/src/cpu/kernel/asm/hash/sha2/ops.asm index 7d8054ca..6a4c5e3b 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/ops.asm @@ -26,14 +26,15 @@ // stack: x, x %rotr(7) // stack: rotr(x, 7), x - %stack (rotated, x) -> (x, x, rotated) + SWAP1 + // stack: x, rotr(x, 7) + DUP1 // stack: x, x, rotr(x, 7) %rotr(18) // stack: rotr(x, 18), x, rotr(x, 7) SWAP1 // stack: x, rotr(x, 18), rotr(x, 7) - PUSH 3 - SHR + %div_const(8) // equivalent to %shr_const(3) // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) XOR XOR @@ -45,7 +46,9 @@ // stack: x, x %rotr(17) // stack: rotr(x, 17), x - %stack (rotated, x) -> (x, x, rotated) + SWAP1 + // stack: x, rotr(x, 17) + DUP1 // stack: x, x, rotr(x, 17) %rotr(19) // stack: rotr(x, 19), x, rotr(x, 17) @@ -64,7 +67,9 @@ // stack: x, x %rotr(2) // stack: rotr(x, 2), x - %stack (rotated, x) -> (x, x, rotated) + SWAP1 + // stack: x, rotr(x, 2) + DUP1 // stack: x, x, rotr(x, 2) %rotr(13) // stack: rotr(x, 13), x, rotr(x, 2) @@ -82,7 +87,9 @@ // stack: x, x %rotr(6) // stack: rotr(x, 6), x - %stack (rotated, x) -> (x, x, rotated) + SWAP1 + // stack: x, rotr(x, 6) + DUP1 // stack: x, x, rotr(x, 6) %rotr(11) // stack: rotr(x, 11), x, rotr(x, 6) @@ -100,11 +107,13 @@ // stack: x, x, y, z NOT // stack: not x, x, y, z - %stack (notx, x, y, z) -> (notx, z, x, y) - // stack: not x, z, x, y + SWAP1 + // stack: x, not x, y, z + SWAP3 + // stack: z, not x, y, x AND - // stack: (not x) and z, x, y - %stack (nxz, x, y) -> (x, y, nxz) + // stack: (not x) and z, y, x + SWAP2 // stack: x, y, (not x) and z AND // stack: x and y, (not x) and z @@ -113,18 +122,22 @@ %macro sha2_majority // stack: x, y, z - %stack (xyz: 3) -> (xyz, xyz) - // stack: x, y, z, x, y, z + DUP1 + // stack: x, x, y, z + DUP3 + // stack: y, x, x, y, z + DUP5 + // stack: z, y, x, x, y, z AND - // stack: x and y, z, x, y, z + // stack: z and y, x, x, y, z + SWAP4 + // stack: z, x, x, y, z and y + AND + // stack: z and x, x, y, z and y SWAP2 - // stack: x, z, x and y, y, z + // stack: y, x, z and x, z and y AND - // stack: x and z, x and y, y, z - %stack (a: 2, b: 2) -> (b, a) - // stack: y, z, x and z, x and y - AND - // stack: y and z, x and z, x and y + // stack: y and x, z and x, z and y OR OR %endmacro diff --git a/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm b/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm index 5727498c..4f73fa79 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm @@ -10,110 +10,24 @@ // stack: last_addr, length % (1 << 8), length, last_addr %mstore_kernel_general - // stack: length, last_addr - SWAP1 - %decrement - SWAP1 - // stack: length, last_addr - 1 - %shr_const(8) - // stack: length >> 8, last_addr - 1 - DUP1 - // stack: length >> 8, length >> 8, last_addr - 1 - %and_const(0xff) - // stack: (length >> 8) % (1 << 8), length >> 8, last_addr - 1 - DUP3 - // stack: last_addr - 1, (length >> 8) % (1 << 8), length >> 8, last_addr - 1 - %mstore_kernel_general - - // stack: length >> 8, last_addr - 1 - SWAP1 - %decrement - SWAP1 - // stack: length >> 8, last_addr - 2 - %shr_const(8) - // stack: length >> 16, last_addr - 2 - DUP1 - // stack: length >> 16, length >> 16, last_addr - 2 - %and_const(0xff) - // stack: (length >> 16) % (1 << 8), length >> 16, last_addr - 2 - DUP3 - // stack: last_addr - 2, (length >> 16) % (1 << 8), length >> 16, last_addr - 2 - %mstore_kernel_general + %rep 7 + // For i = 0 to 6 + // stack: length >> (8 * i), last_addr - i - 1 + SWAP1 + %decrement + SWAP1 + // stack: length >> (8 * i), last_addr - i - 2 + %div_const(256) // equivalent to %shr_const(8) + // stack: length >> (8 * (i + 1)), last_addr - i - 2 + DUP1 + // stack: length >> (8 * (i + 1)), length >> (8 * (i + 1)), last_addr - i - 2 + %mod_const(256) + // stack: (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2 + DUP3 + // stack: last_addr - i - 2, (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2 + %mstore_kernel_general + %endrep - // stack: length >> 16, last_addr - 2 - SWAP1 - %decrement - SWAP1 - // stack: length >> 16, last_addr - 3 - %shr_const(8) - // stack: length >> 24, last_addr - 3 - DUP1 - // stack: length >> 24, length >> 24, last_addr - 3 - %and_const(0xff) - // stack: (length >> 24) % (1 << 8), length >> 24, last_addr - 3 - DUP3 - // stack: last_addr - 3, (length >> 24) % (1 << 8), length >> 24, last_addr - 3 - %mstore_kernel_general - - // stack: length >> 24, last_addr - 3 - SWAP1 - %decrement - SWAP1 - // stack: length >> 24, last_addr - 4 - %shr_const(8) - // stack: length >> 32, last_addr - 4 - DUP1 - // stack: length >> 32, length >> 32, last_addr - 4 - %and_const(0xff) - // stack: (length >> 32) % (1 << 8), length >> 32, last_addr - 4 - DUP3 - // stack: last_addr - 4, (length >> 32) % (1 << 8), length >> 32, last_addr - 4 - %mstore_kernel_general - - // stack: length >> 32, last_addr - 4 - SWAP1 - %decrement - SWAP1 - // stack: length >> 32, last_addr - 5 - %shr_const(8) - // stack: length >> 40, last_addr - 5 - DUP1 - // stack: length >> 40, length >> 40, last_addr - 5 - %and_const(0xff) - // stack: (length >> 40) % (1 << 8), length >> 40, last_addr - 5 - DUP3 - // stack: last_addr - 5, (length >> 40) % (1 << 8), length >> 40, last_addr - 5 - %mstore_kernel_general - - // stack: length >> 40, last_addr - 5 - SWAP1 - %decrement - SWAP1 - // stack: length >> 40, last_addr - 6 - %shr_const(8) - // stack: length >> 48, last_addr - 6 - DUP1 - // stack: length >> 48, length >> 48, last_addr - 6 - %and_const(0xff) - // stack: (length >> 48) % (1 << 8), length >> 48, last_addr - 6 - DUP3 - // stack: last_addr - 6, (length >> 48) % (1 << 8), length >> 48, last_addr - 6 - %mstore_kernel_general - - // stack: length >> 48, last_addr - 6 - SWAP1 - %decrement - SWAP1 - // stack: length >> 48, last_addr - 7 - %shr_const(8) - // stack: length >> 56, last_addr - 7 - DUP1 - // stack: length >> 56, length >> 56, last_addr - 7 - %and_const(0xff) - // stack: (length >> 56) % (1 << 8), length >> 56, last_addr - 7 - DUP3 - // stack: last_addr - 7, (length >> 56) % (1 << 8), length >> 56, last_addr - 7 - %mstore_kernel_general %pop2 // stack: (empty) %endmacro