diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index bf3dd1c4..9cf8aeef 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -85,8 +85,7 @@ compression_loop: // stack: cur_message_addr + 1, cur_block_byte + 8, ... %endrep // stack: end_message_addr, end_block_start_byte, t, cur_block, is_last_block, retdest - POP - POP + %pop2 // stack: t, cur_block, is_last_block, retdest SWAP1 // stack: cur_block, t, is_last_block, retdest @@ -128,15 +127,14 @@ compression_loop: // stack: 0, start + 8, invert_if_last_block, t, cur_block, retdest %rep 4 // stack: i, loc, ... - DUP2 - DUP2 - // stack: i, loc, i, loc,... + DUP1 + // stack: i, i, loc, ... %blake2b_iv - // stack: IV_i, loc, i, loc,... - SWAP1 - // stack: loc, IV_i, i, loc,... + // stack: IV_i, i, loc, ... + DUP2 + // stack: loc, IV_i, i, loc, ... %mstore_kernel_general - // stack: i, loc,... + // stack: i, loc, ... %increment SWAP1 %increment @@ -147,15 +145,11 @@ compression_loop: %stack (i, loc, inv, last, t) -> (t, t, i, loc, inv, last) // stack: t, t, 4, start + 12, invert_if_last_block, cur_block, retdest %shr_const(64) - // stack: t >> 64, t, 4, start + 12, invert_if_last_block, cur_block, retdest + // stack: t_hi = t >> 64, t, 4, start + 12, invert_if_last_block, cur_block, retdest SWAP1 - // stack: t, t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest - PUSH 1 - %shl_const(64) - // stack: 1 << 64, t, t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest - SWAP1 - MOD - // stack: t_lo = t % (1 << 64), t_hi = t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest + // stack: t, t_hi, 4, start + 12, invert_if_last_block, cur_block, retdest + %mod_const(PUSH 0x10000000000000000) + // stack: t_lo = t % (1 << 64), t_hi, 4, start + 12, invert_if_last_block, cur_block, retdest %stack (t_lo, t_hi, i, loc, inv) -> (i, loc, t_lo, t_hi, inv, 0) // stack: 4, start + 12, t_lo, t_hi, invert_if_last_block, 0, cur_block, retdest @@ -163,25 +157,31 @@ compression_loop: // the values (t % 2**64, t >> 64, invert_if, 0). %rep 4 // stack: i, loc, val, next_val,... - %stack (i, loc, val) -> (i, val, loc, i, loc) - // stack: i, val, loc, i, loc, next_val,... + DUP1 + // stack: i, i, loc, val, next_val,... %blake2b_iv - // stack: IV_i, val, loc, i, loc, next_val,... + // stack: IV_i, i, loc, val, next_val,... + DUP4 + // stack: val, IV_i, i, loc, val, next_val,... XOR - // stack: val ^ IV_i, loc, i, loc, next_val,... - SWAP1 - // stack: loc, val ^ IV_i, i, loc, next_val,... + // stack: val ^ IV_i, i, loc, val, next_val,... + DUP3 + // stack: loc, val ^ IV_i, i, loc, val, next_val,... %mstore_kernel_general - // stack: i, loc, next_val,... + // stack: i, loc, val, next_val,... %increment - SWAP1 + // stack: i + 1, loc, val, next_val,... + SWAP2 + // stack: val, loc, i + 1, next_val,... + POP + // stack: loc, i + 1, next_val,... %increment + // stack: loc + 1, i + 1, next_val,... SWAP1 // stack: i + 1, loc + 1, next_val,... %endrep // stack: 8, loc + 16, cur_block, retdest - POP - POP + %pop2 // stack: cur_block, retdest // Run 12 rounds of G functions. diff --git a/evm/src/cpu/kernel/asm/hash/sha2/compression.asm b/evm/src/cpu/kernel/asm/hash/sha2/compression.asm index fa58dce8..c28a19a2 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/compression.asm @@ -161,8 +161,7 @@ compression_loop: // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest SWAP1 // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - PUSH 256 - MUL + %mul_const(256) // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest ADD // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest @@ -260,20 +259,10 @@ compression_end: // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest POP // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR + %rep 7 + %shl_const(32) + ADD // OR + %endrep // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest SWAP3 // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest