diff --git a/evm/src/cpu/kernel/asm/sha2/compression.asm b/evm/src/cpu/kernel/asm/sha2/compression.asm index 31b30b21..efb940f9 100644 --- a/evm/src/cpu/kernel/asm/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/sha2/compression.asm @@ -1,371 +1,365 @@ global sha2_compression: // stack: message_schedule_addr, retdest - push 0 + PUSH 0 // stack: i=0, message_schedule_addr, retdest - swap1 + SWAP1 // stack: message_schedule_addr, i=0, retdest - push 0 + PUSH 0 // stack: 0, message_schedule_addr, i=0, retdest %mload_kernel_general // stack: num_blocks, message_schedule_addr, i=0, retdest - dup1 + DUP1 // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest %scratch_space_addr_from_num_blocks // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest - swap1 + SWAP1 // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(28) %mload_kernel_code_u32 // stack: h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(24) %mload_kernel_code_u32 // stack: g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(20) %mload_kernel_code_u32 // stack: f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(16) %mload_kernel_code_u32 // stack: e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(12) %mload_kernel_code_u32 // stack: d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(8) %mload_kernel_code_u32 // stack: c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %add_const(4) %mload_kernel_code_u32 // stack: b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - push sha2_constants_h + PUSH sha2_constants_h %mload_kernel_code_u32 // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest sha2_compression_start_block: // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. - dup10 + DUP10 // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup2 - dup2 + DUP2 + DUP2 // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup3 - dup2 + DUP3 + DUP2 // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup4 - dup2 + DUP4 + DUP2 // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup5 - dup2 + DUP5 + DUP2 // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup6 - dup2 + DUP6 + DUP2 // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup7 - dup2 + DUP7 + DUP2 // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup8 - dup2 + DUP8 + DUP2 // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %add_const(4) // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - dup9 - dup2 + DUP9 + DUP2 // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest %mstore_kernel_general_u32 // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - pop + POP // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest sha2_compression_loop: // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup11 + DUP11 // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup13 + DUP13 // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mul_const(4) // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - add + ADD // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mload_kernel_general_u32 // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - push sha2_constants_k + PUSH sha2_constants_k // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup14 + DUP14 // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mul_const(4) // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - add + ADD // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mload_kernel_code_u32 // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 // stack: h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 // stack: g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 // stack: f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %sha2_temp_word1 // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 + DUP4 // stack: c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 + DUP4 // stack: b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup4 + DUP4 // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %sha2_temp_word2 // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup6 + DUP6 // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup3 + DUP3 // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 + SWAP2 // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 + SWAP1 // stack: e[i+1], a[i+1], b[i+1], c[i+1], d[i+1], d[i], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 + SWAP5 // stack: d[i], a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - pop + POP // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap8 + SWAP8 // stack: h[i], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - pop + POP // stack: b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], a[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap7 + SWAP7 // stack: a[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], b[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 - swap7 - swap1 + SWAP1 + SWAP7 + SWAP1 // stack: a[i+1], b[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], c[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 - swap7 - swap2 + SWAP2 + SWAP7 + SWAP2 // stack: a[i+1], b[i+1], c[i+1], e[i+1], f[i+1], g[i+1], h[i+1], d[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap3 - swap7 - swap3 + SWAP3 + SWAP7 + SWAP3 // stack: a[i+1], b[i+1], c[i+1], d[i+1], f[i+1], g[i+1], h[i+1], e[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap4 - swap7 - swap4 + SWAP4 + SWAP7 + SWAP4 // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], g[i+1], h[i+1], f[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 - swap7 - swap5 + SWAP5 + SWAP7 + SWAP5 // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], h[i+1], g[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap6 - swap7 - swap6 + SWAP6 + SWAP7 + SWAP6 // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup12 + DUP12 // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %increment // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup1 + DUP1 // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %eq_const(64) // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup1 + DUP1 // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup12 + DUP12 // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - sub + SUB // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap13 + SWAP13 // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - swap1 + SWAP1 // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - push 256 - mul + PUSH 256 + MUL // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - add + ADD // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - swap12 + SWAP12 // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest - swap10 + SWAP10 // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - pop + POP // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest - push 64 - swap1 - mod + PUSH 64 + SWAP1 + MOD // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest - swap12 + SWAP12 // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - pop + POP // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - dup12 + DUP12 // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - //dup10 - //iszero - //dup2 - //iszero - //and - //%jumpi(sha2_stop_lol) - iszero + ISZERO %jumpi(sha2_compression_end_block) %jump(sha2_compression_loop) sha2_compression_end_block: // Add the initial values of the eight working variables (from the start of this block's compression) back into them. // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %mload_kernel_general_u32 // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap1 + SWAP1 // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(4) %mload_kernel_general_u32 // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap2 + SWAP2 // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(8) %mload_kernel_general_u32 // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap3 + SWAP3 // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(12) %mload_kernel_general_u32 // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap4 + SWAP4 // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(16) %mload_kernel_general_u32 // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap5 + SWAP5 // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(20) %mload_kernel_general_u32 // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap6 + SWAP6 // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(24) %mload_kernel_general_u32 // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap7 + SWAP7 // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - dup10 + DUP10 %add_const(28) %mload_kernel_general_u32 // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest %add_u32 // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - swap8 + SWAP8 // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - dup1 + DUP1 // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - iszero + ISZERO // In this case, we've finished all the blocks. %jumpi(sha2_compression_end) // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest // TODO: "insertion" macro for the below // Move num_blocks to the ninth spot on the stack, past the working variables. - swap1 - swap2 - swap1 - swap2 - swap3 - swap2 - swap3 - swap4 - swap3 - swap4 - swap5 - swap4 - swap5 - swap6 - swap5 - swap6 - swap7 - swap6 - swap7 - swap8 - swap7 - swap8 + SWAP1 + SWAP2 + SWAP1 + SWAP2 + SWAP3 + SWAP2 + SWAP3 + SWAP4 + SWAP3 + SWAP4 + SWAP5 + SWAP4 + SWAP5 + SWAP6 + SWAP5 + SWAP6 + SWAP7 + SWAP6 + SWAP7 + SWAP8 + SWAP7 + SWAP8 %jump(sha2_compression_start_block) sha2_compression_end: // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - pop + POP // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest %shl_const(32) - or + OR %shl_const(32) - or + OR %shl_const(32) - or + OR %shl_const(32) - or + OR %shl_const(32) - or + OR %shl_const(32) - or + OR %shl_const(32) - or + OR // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest - swap3 + SWAP3 // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest %pop3 // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest diff --git a/evm/src/cpu/kernel/asm/sha2/memory.asm b/evm/src/cpu/kernel/asm/sha2/memory.asm index 0b722287..9c68f208 100644 --- a/evm/src/cpu/kernel/asm/sha2/memory.asm +++ b/evm/src/cpu/kernel/asm/sha2/memory.asm @@ -94,8 +94,8 @@ %mload_kernel_general_u32 OR // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0, offset - swap1 - pop + SWAP1 + POP // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0 %endmacro @@ -113,64 +113,64 @@ // to kernel general memory. %macro mstore_kernel_general_u32 // stack: offset, value - swap1 + SWAP1 // stack: value, offset - push 1 - push 8 - shl + PUSH 1 + PUSH 8 + SHL // stack: 1 << 8, value, offset - swap1 + SWAP1 // stack: value, 1 << 8, offset - dup2 - dup2 + DUP2 + DUP2 // stack: value, 1 << 8, value, 1 << 8, offset - mod + MOD // stack: c_0 = value % (1 << 8), value, 1 << 8, offset - swap2 - swap1 + SWAP2 + SWAP1 // stack: value, 1 << 8, c_0, offset - push 8 - shr + PUSH 8 + SHR // stack: value >> 8, 1 << 8, c_0, offset - dup2 - dup2 + DUP2 + DUP2 // stack: value >> 8, 1 << 8, value >> 8, 1 << 8, c_0, offset - mod + MOD // stack: c_1 = (value >> 8) % (1 << 8), value >> 8, 1 << 8, c_0, offset - swap2 - swap1 + SWAP2 + SWAP1 // stack: value >> 8, 1 << 8, c_1, c_0, offset - push 8 - shr + PUSH 8 + SHR // stack: value >> 16, 1 << 8, c_1, c_0, offset - dup2 - dup2 + DUP2 + DUP2 // stack: value >> 16, 1 << 8, value >> 16, 1 << 8, c_1, c_0, offset - mod + MOD // stack: c_2 = (value >> 16) % (1 << 8), value >> 16, 1 << 8, c_1, c_0, offset - swap2 - swap1 + SWAP2 + SWAP1 // stack: value >> 16, 1 << 8, c_2, c_1, c_0, offset - push 8 - shr + PUSH 8 + SHR // stack: value >> 24, 1 << 8, c_2, c_1, c_0, offset - mod + MOD // stack: c_3 = (value >> 24) % (1 << 8), c_2, c_1, c_0, offset - dup5 + DUP5 // stack: offset, c_3, c_2, c_1, c_0, offset %mstore_kernel_general // stack: c_2, c_1, c_0, offset - dup4 + DUP4 // stack: offset, c_2, c_1, c_0, offset %add_const(1) %mstore_kernel_general // stack: c_1, c_0, offset - dup3 + DUP3 // stack: offset, c_1, c_0, offset %add_const(2) %mstore_kernel_general // stack: c_0, offset - swap1 + SWAP1 // stack: offset, c_0 %add_const(3) %mstore_kernel_general diff --git a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm index 8f0cd58d..e6daa0b8 100644 --- a/evm/src/cpu/kernel/asm/sha2/message_schedule.asm +++ b/evm/src/cpu/kernel/asm/sha2/message_schedule.asm @@ -3,212 +3,212 @@ // of message schedule (in four-byte increments) global sha2_gen_message_schedule_from_block: // stack: block_addr, output_addr, retdest - dup1 + DUP1 // stack: block_addr, block_addr, output_addr, retdest %add_const(32) // stack: block_addr + 32, block_addr, output_addr, retdest - swap1 + SWAP1 // stack: block_addr, block_addr + 32, output_addr, retdest %mload_kernel_general_u256 // stack: block[0], block_addr + 32, output_addr, retdest - swap1 + SWAP1 // stack: block_addr + 32, block[0], output_addr, retdest %mload_kernel_general_u256 // stack: block[1], block[0], output_addr, retdest - swap2 + SWAP2 // stack: output_addr, block[0], block[1], retdest %add_const(28) - push 8 + PUSH 8 // stack: counter=8, output_addr + 28, block[0], block[1], retdest %jump(sha2_gen_message_schedule_from_block_0_loop) sha2_gen_message_schedule_from_block_0_loop: // Split the first half (256 bits) of the block into the first eight (32-bit) chunks of the message sdchedule. // stack: counter, output_addr, block[0], block[1], retdest - swap2 + SWAP2 // stack: block[0], output_addr, counter, block[1], retdest - push 1 - push 32 - shl + PUSH 1 + PUSH 32 + SHL // stack: 1 << 32, block[0], output_addr, counter, block[1], retdest - dup2 - dup2 + DUP2 + DUP2 // stack: 1 << 32, block[0], 1 << 32, block[0], output_addr, counter, block[1], retdest - swap1 + SWAP1 // stack: block[0], 1 << 32, 1 << 32, block[0], output_addr, counter, block[1], retdest - mod + MOD // stack: block[0] % (1 << 32), 1 << 32, block[0], output_addr, counter, block[1], retdest - swap2 + SWAP2 // stack: block[0], 1 << 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest - div + DIV // stack: block[0] >> 32, block[0] % (1 << 32), output_addr, counter, block[1], retdest - swap1 + SWAP1 // stack: block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest - dup3 + DUP3 // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest %mstore_kernel_general_u32 // stack: block[0] >> 32, output_addr, counter, block[1], retdest - swap1 + SWAP1 // stack: output_addr, block[0] >> 32, counter, block[1], retdest %sub_const(4) // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest - swap1 + SWAP1 // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest - swap2 + SWAP2 // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest %decrement - dup1 - iszero + DUP1 + ISZERO %jumpi(sha2_gen_message_schedule_from_block_0_end) %jump(sha2_gen_message_schedule_from_block_0_loop) sha2_gen_message_schedule_from_block_0_end: // stack: old counter=0, output_addr, block[0], block[1], retdest - pop - push 8 + POP + PUSH 8 // stack: counter=8, output_addr, block[0], block[1], retdest - swap2 + SWAP2 // stack: block[0], output_addr, counter, block[1], retdest - swap3 + SWAP3 // stack: block[1], output_addr, counter, block[0], retdest - swap2 + SWAP2 // stack: counter, output_addr, block[1], block[0], retdest - swap1 + SWAP1 // stack: output_addr, counter, block[1], block[0], retdest %add_const(64) // stack: output_addr + 64, counter, block[1], block[0], retdest - swap1 + SWAP1 // stack: counter, output_addr + 64, block[1], block[0], retdest sha2_gen_message_schedule_from_block_1_loop: // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule. // stack: counter, output_addr, block[1], block[0], retdest - swap2 + SWAP2 // stack: block[1], output_addr, counter, block[0], retdest - push 1 - push 32 - shl + PUSH 1 + PUSH 32 + SHL // stack: 1 << 32, block[1], output_addr, counter, block[0], retdest - dup2 - dup2 + DUP2 + DUP2 // stack: 1 << 32, block[1], 1 << 32, block[1], output_addr, counter, block[0], retdest - swap1 + SWAP1 // stack: block[1], 1 << 32, 1 << 32, block[1], output_addr, counter, block[0], retdest - mod + MOD // stack: block[1] % (1 << 32), 1 << 32, block[1], output_addr, counter, block[0], retdest - swap2 + SWAP2 // stack: block[1], 1 << 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest - div + DIV // stack: block[1] >> 32, block[1] % (1 << 32), output_addr, counter, block[0], retdest - swap1 + SWAP1 // stack: block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest - dup3 + DUP3 // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest %mstore_kernel_general_u32 // stack: block[1] >> 32, output_addr, counter, block[0], retdest - swap1 + SWAP1 // stack: output_addr, block[1] >> 32, counter, block[0], retdest %sub_const(4) // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest - swap1 + SWAP1 // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest - swap2 + SWAP2 // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest %decrement - dup1 - iszero + DUP1 + ISZERO %jumpi(sha2_gen_message_schedule_from_block_1_end) %jump(sha2_gen_message_schedule_from_block_1_loop) sha2_gen_message_schedule_from_block_1_end: // stack: old counter=0, output_addr, block[1], block[0], retdest - pop + POP // stack: output_addr, block[0], block[1], retdest - push 48 + PUSH 48 // stack: counter=48, output_addr, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, counter, block[0], block[1], retdest %add_const(36) // stack: output_addr + 36, counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: counter, output_addr + 36, block[0], block[1], retdest sha2_gen_message_schedule_remaining_loop: // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks. // stack: counter, output_addr, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, counter, block[0], block[1], retdest - dup1 + DUP1 // stack: output_addr, output_addr, counter, block[0], block[1], retdest - push 2 - push 4 - mul - swap1 - sub + PUSH 2 + PUSH 4 + MUL + SWAP1 + SUB // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest %sha2_sigma_1 // stack: sigma_1(x[output_addr - 2*4]), output_addr, counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 + DUP1 // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 7 - push 4 - mul - swap1 - sub + PUSH 7 + PUSH 4 + MUL + SWAP1 + SUB // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 + DUP1 // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 15 - push 4 - mul - swap1 - sub + PUSH 15 + PUSH 4 + MUL + SWAP1 + SUB // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %sha2_sigma_0 // stack: sigma_0(x[output_addr - 15*4]), output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - dup1 + DUP1 // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - push 16 - push 4 - mul - swap1 - sub + PUSH 16 + PUSH 4 + MUL + SWAP1 + SUB // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - swap4 + SWAP4 // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest %add_u32 %add_u32 %add_u32 // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest - dup1 + DUP1 // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], counter, block[0], block[1], retdest - swap2 + SWAP2 // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, output_addr, counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest %mstore_kernel_general_u32 // stack: output_addr, counter, block[0], block[1], retdest %add_const(4) // stack: output_addr + 4, counter, block[0], block[1], retdest - swap1 + SWAP1 // stack: counter, output_addr + 4, block[0], block[1], retdest %decrement // stack: counter - 1, output_addr + 4, block[0], block[1], retdest - dup1 - iszero + DUP1 + ISZERO %jumpi(sha2_gen_message_schedule_remaining_end) %jump(sha2_gen_message_schedule_remaining_loop) sha2_gen_message_schedule_remaining_end: @@ -222,38 +222,38 @@ sha2_gen_message_schedule_remaining_end: // each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) global sha2_gen_all_message_schedules: // stack: output_addr, retdest - dup1 + DUP1 // stack: output_addr, output_addr, retdest - push 0 + PUSH 0 // stack: 0, output_addr, output_addr, retdest %mload_kernel_general // stack: num_blocks, output_addr, output_addr, retdest - push 1 + PUSH 1 // stack: cur_addr = 1, counter = num_blocks, output_addr, output_addr, retdest sha2_gen_all_message_schedules_loop: // stack: cur_addr, counter, cur_output_addr, output_addr, retdest - push sha2_gen_all_message_schedules_loop_end + PUSH sha2_gen_all_message_schedules_loop_end // stack: new_retdest = sha2_gen_all_message_schedules_loop_end, cur_addr, counter, cur_output_addr, output_addr, retdest - dup4 + DUP4 // stack: cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest - dup3 + DUP3 // stack: cur_addr, cur_output_addr, new_retdest, cur_addr, counter, cur_output_addr, output_addr, retdest %jump(sha2_gen_message_schedule_from_block) sha2_gen_all_message_schedules_loop_end: // stack: cur_addr, counter, cur_output_addr, output_addr, retdest %add_const(64) // stack: cur_addr + 64, counter, cur_output_addr, output_addr, retdest - swap1 + SWAP1 %decrement - swap1 + SWAP1 // stack: cur_addr + 64, counter - 1, cur_output_addr, output_addr, retdest - swap2 + SWAP2 %add_const(256) - swap2 + SWAP2 // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - dup2 + DUP2 // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - iszero + ISZERO %jumpi(sha2_gen_all_message_schedules_end) %jump(sha2_gen_all_message_schedules_loop) sha2_gen_all_message_schedules_end: diff --git a/evm/src/cpu/kernel/asm/sha2/ops.asm b/evm/src/cpu/kernel/asm/sha2/ops.asm index 6e114f1a..e84bc34c 100644 --- a/evm/src/cpu/kernel/asm/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/sha2/ops.asm @@ -1,17 +1,17 @@ // u32 addition (discarding 2^32 bit) %macro add_u32 // stack: x, y - add + ADD // stack: x + y - dup1 + DUP1 // stack: x + y, x + y %shr_const(32) // stack: (x + y) >> 32, x + y %shl_const(32) // stack: ((x + y) >> 32) << 32, x + y - swap1 + SWAP1 // stack: x + y, ((x + y) >> 32) << 32 - sub + SUB // stack: x + y - ((x + y) >> 32) << 32 %endmacro @@ -19,177 +19,177 @@ // 32-bit right rotation %macro rotr // stack: rot, value - dup2 - dup2 + DUP2 + DUP2 // stack: rot, value, rot, value - shr + SHR // stack: value >> rot, rot, value %stack (shifted, rot, value) -> (rot, value, shifted) // stack: rot, value, value >> rot - push 32 - sub + PUSH 32 + SUB // stack: 32 - rot, value, value >> rot - shl + SHL // stack: value << (32 - rot), value >> rot - push 32 - push 1 - swap1 - shl + PUSH 32 + PUSH 1 + SWAP1 + SHL // stack: 1 << 32, value << (32 - rot), value >> rot - swap1 - mod + SWAP1 + MOD // stack: (value << (32 - rot)) % (1 << 32), value >> rot - add + ADD %endmacro // 32-bit left rotation %macro rotl // stack: rot, value - dup2 - dup2 + DUP2 + DUP2 // stack: rot, value, rot, value - push 32 - sub + PUSH 32 + SUB // stack: 32 - rot, value, rot, value - shr + SHR // stack: value >> (32 - rot), rot, value %stack (shifted, rot, value) -> (rot, value, shifted) // stack: rot, value, value >> (32 - rot) - shl + SHL // stack: value << rot, value >> (32 - rot) - push 32 - push 1 - swap1 - shl + PUSH 32 + PUSH 1 + SWAP1 + SHL // stack: 1 << 32, value << rot, value >> (32 - rot) - swap1 - mod + SWAP1 + MOD // stack: (value << rot) % (1 << 32), value >> (32 - rot) - add + ADD %endmacro %macro sha2_sigma_0 // stack: x - dup1 + DUP1 // stack: x, x - push 7 + PUSH 7 %rotr // stack: rotr(x, 7), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 7) - push 18 + PUSH 18 %rotr // stack: rotr(x, 18), x, rotr(x, 7) - swap1 + SWAP1 // stack: x, rotr(x, 18), rotr(x, 7) - push 3 - shr + PUSH 3 + SHR // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) - xor - xor + XOR + XOR %endmacro %macro sha2_sigma_1 // stack: x - dup1 + DUP1 // stack: x, x - push 17 + PUSH 17 %rotr // stack: rotr(x, 17), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 17) - push 19 + PUSH 19 %rotr // stack: rotr(x, 19), x, rotr(x, 17) - swap1 + SWAP1 // stack: x, rotr(x, 19), rotr(x, 17) - push 10 - shr + PUSH 10 + SHR // stack: shr(x, 10), rotr(x, 19), rotr(x, 17) - xor - xor + XOR + XOR %endmacro %macro sha2_bigsigma_0 // stack: x - dup1 + DUP1 // stack: x, x - push 2 + PUSH 2 %rotr // stack: rotr(x, 2), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 2) - push 13 + PUSH 13 %rotr // stack: rotr(x, 13), x, rotr(x, 2) - swap1 + SWAP1 // stack: x, rotr(x, 13), rotr(x, 2) - push 22 + PUSH 22 %rotr // stack: rotr(x, 22), rotr(x, 13), rotr(x, 2) - xor - xor + XOR + XOR %endmacro %macro sha2_bigsigma_1 // stack: x - dup1 + DUP1 // stack: x, x - push 6 + PUSH 6 %rotr // stack: rotr(x, 6), x %stack (rotated, x) -> (x, x, rotated) // stack: x, x, rotr(x, 6) - push 11 + PUSH 11 %rotr // stack: rotr(x, 11), x, rotr(x, 6) - swap1 + SWAP1 // stack: x, rotr(x, 11), rotr(x, 6) - push 25 + PUSH 25 %rotr // stack: rotr(x, 25), rotr(x, 11), rotr(x, 6) - xor - xor + XOR + XOR %endmacro %macro sha2_choice // stack: x, y, z - dup1 + DUP1 // stack: x, x, y, z - not + NOT // stack: not x, x, y, z %stack (notx, x, y, z) -> (notx, z, x, y) // stack: not x, z, x, y - and + AND // stack: (not x) and z, x, y %stack (nxz, x, y) -> (x, y, nxz) // stack: x, y, (not x) and z - and + AND // stack: x and y, (not x) and z - or + OR %endmacro %macro sha2_majority // stack: x, y, z - dup3 - dup3 - dup3 + DUP3 + DUP3 + DUP3 // stack: x, y, z, x, y, z - and + AND // stack: x and y, z, x, y, z - swap2 + SWAP2 // stack: x, z, x and y, y, z - and + AND // stack: x and z, x and y, y, z - swap2 + SWAP2 // stack: y, x and y, x and z, z - swap1 + SWAP1 // stack: x and y, y, x and z, z - swap3 + SWAP3 // stack: z, y, x and z, x and y - and + AND // stack: y and z, x and z, x and y - or - or + OR + OR %endmacro \ No newline at end of file diff --git a/evm/src/cpu/kernel/asm/sha2/store_pad.asm b/evm/src/cpu/kernel/asm/sha2/store_pad.asm index 5cd02ac5..c178202b 100644 --- a/evm/src/cpu/kernel/asm/sha2/store_pad.asm +++ b/evm/src/cpu/kernel/asm/sha2/store_pad.asm @@ -1,31 +1,31 @@ global sha2_store: // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - dup1 + DUP1 // stack: num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - push 0 + PUSH 0 // stack: addr=0, num_bytes, num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest %mstore_kernel_general // stack: num_bytes, x[0], x[1], ..., x[num_bytes - 1], retdest - push 1 + PUSH 1 // stack: addr=1, counter=num_bytes, x[0], x[1], x[2], ... , x[num_bytes-1], retdest sha2_store_loop: // stack: addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - dup1 + DUP1 // stack: addr, addr, counter, x[num_bytes-counter], ... , x[num_bytes-1], retdest - swap3 + SWAP3 // stack: x[num_bytes-counter], addr, counter, addr, ... , x[num_bytes-1], retdest - swap1 + SWAP1 // stack: addr, x[num_bytes-counter], counter, addr, ... , x[num_bytes-1], retdest %mstore_kernel_general // stack: counter, addr, ... , x[num_bytes-1], retdest %decrement // stack: counter-1, addr, ... , x[num_bytes-1], retdest - dup1 + DUP1 // stack: counter-1, counter-1, addr, ... , x[num_bytes-1], retdest - iszero + ISZERO %jumpi(sha2_store_end) // stack: counter-1, addr, ... , x[num_bytes-1], retdest - swap1 + SWAP1 // stack: addr, counter-1, ... , x[num_bytes-1], retdest %increment // stack: addr+1, counter-1, ... , x[num_bytes-1], retdest @@ -42,24 +42,24 @@ sha2_store_end: // num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] global sha2_pad: // stack: retdest - push 0 + PUSH 0 %mload_kernel_general // stack: num_bytes, retdest // STEP 1: append 1 // insert 128 (= 1 << 7) at x[num_bytes+1] // stack: num_bytes, retdest - push 1 - push 7 - shl + PUSH 1 + PUSH 7 + SHL // stack: 128, num_bytes, retdest - dup2 + DUP2 // stack: num_bytes, 128, num_bytes, retdest %increment // stack: num_bytes+1, 128, num_bytes, retdest %mstore_kernel_general // stack: num_bytes, retdest // STEP 2: calculate num_blocks := (num_bytes+8)//64 + 1 - dup1 + DUP1 // stack: num_bytes, num_bytes, retdest %add_const(8) %div_const(64) @@ -67,23 +67,23 @@ global sha2_pad: %increment // stack: num_blocks = (num_bytes+8)//64 + 1, num_bytes, retdest // STEP 3: calculate length := num_bytes*8 - swap1 + SWAP1 // stack: num_bytes, num_blocks, retdest - push 8 - mul + PUSH 8 + MUL // stack: length = num_bytes*8, num_blocks, retdest // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] - dup2 + DUP2 // stack: num_blocks, length, num_blocks, retdest - push 64 - mul + PUSH 64 + MUL // stack: last_addr = num_blocks*64, length, num_blocks, retdest %sha2_write_length // stack: num_blocks, retdest - dup1 + DUP1 // stack: num_blocks, num_blocks, retdest // STEP 5: write num_blocks to x[0] - push 0 + PUSH 0 %mstore_kernel_general // stack: num_blocks, retdest %message_schedule_addr_from_num_blocks diff --git a/evm/src/cpu/kernel/asm/sha2/temp_words.asm b/evm/src/cpu/kernel/asm/sha2/temp_words.asm index cd2bd303..07aba907 100644 --- a/evm/src/cpu/kernel/asm/sha2/temp_words.asm +++ b/evm/src/cpu/kernel/asm/sha2/temp_words.asm @@ -1,14 +1,14 @@ %macro sha2_temp_word1 // stack: e, f, g, h, K[i], W[i] - dup1 + DUP1 // stack: e, e, f, g, h, K[i], W[i] %sha2_bigsigma_1 // stack: Sigma_1(e), e, f, g, h, K[i], W[i] - swap3 + SWAP3 // stack: g, e, f, Sigma_1(e), h, K[i], W[i] - swap2 + SWAP2 // stack: f, e, g, Sigma_1(e), h, K[i], W[i] - swap1 + SWAP1 // stack: e, f, g, Sigma_1(e), h, K[i], W[i] %sha2_choice // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] @@ -21,11 +21,11 @@ %macro sha2_temp_word2 // stack: a, b, c - dup1 + DUP1 // stack: a, a, b, c %sha2_bigsigma_0 // stack: Sigma_0(a), a, b, c - swap3 + SWAP3 // stack: c, a, b, Sigma_0(a) %sha2_majority // stack: Maj(c, a, b), Sigma_0(a) diff --git a/evm/src/cpu/kernel/asm/sha2/write_length.asm b/evm/src/cpu/kernel/asm/sha2/write_length.asm index 7474cd0e..40395707 100644 --- a/evm/src/cpu/kernel/asm/sha2/write_length.asm +++ b/evm/src/cpu/kernel/asm/sha2/write_length.asm @@ -1,145 +1,145 @@ %macro sha2_write_length // stack: last_addr, length - swap1 + SWAP1 // stack: length, last_addr - push 1 - push 8 - shl + PUSH 1 + PUSH 8 + SHL // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: length % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, length % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 8 - shr + PUSH 8 + SHR // stack: length >> 8, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 8) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 8) % (1 << 8), 1 << 8, length, last_addr - push 1 - swap1 - sub + PUSH 1 + SWAP1 + SUB // stack: last_addr - 1, (length >> 8) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 16 - shr + PUSH 16 + SHR // stack: length >> 16, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 16) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 16) % (1 << 8), 1 << 8, length, last_addr - push 2 - swap1 - sub + PUSH 2 + SWAP1 + SUB // stack: last_addr - 2, (length >> 16) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 24 - shr + PUSH 24 + SHR // stack: length >> 24, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 24) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 24) % (1 << 8), 1 << 8, length, last_addr - push 3 - swap1 - sub + PUSH 3 + SWAP1 + SUB // stack: last_addr - 3, (length >> 24) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 32 - shr + PUSH 32 + SHR // stack: length >> 32, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 32) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 32) % (1 << 8), 1 << 8, length, last_addr - push 4 - swap1 - sub + PUSH 4 + SWAP1 + SUB // stack: last_addr - 4, (length >> 32) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 40 - shr + PUSH 40 + SHR // stack: length >> 40, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 40) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 40) % (1 << 8), 1 << 8, length, last_addr - push 5 - swap1 - sub + PUSH 5 + SWAP1 + SUB // stack: last_addr - 5, (length >> 40) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 48 - shr + PUSH 48 + SHR // stack: length >> 48, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 48) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 48) % (1 << 8), 1 << 8, length, last_addr - push 6 - swap1 - sub + PUSH 6 + SWAP1 + SUB // stack: last_addr - 6, (length >> 48) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general // stack: 1 << 8, length, last_addr - dup1 + DUP1 // stack: 1 << 8, 1 << 8, length, last_addr - dup3 + DUP3 // stack: length, 1 << 8, 1 << 8, length, last_addr - push 56 - shr + PUSH 56 + SHR // stack: length >> 56, 1 << 8, 1 << 8, length, last_addr - mod + MOD // stack: (length >> 56) % (1 << 8), 1 << 8, length, last_addr - dup4 + DUP4 // stack: last_addr, (length >> 56) % (1 << 8), 1 << 8, length, last_addr - push 7 - swap1 - sub + PUSH 7 + SWAP1 + SUB // stack: last_addr - 7, (length >> 56) % (1 << 8), 1 << 8, length, last_addr %mstore_kernel_general %pop3