diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm b/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm index 9d65b9ed..f1d7c3e9 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm @@ -1,19 +1,3 @@ -// Load the initial hash value (the IV, but with params XOR'd into the first word). -%macro blake2b_initial_hash_value - %blake2b_iv_i(7) - %blake2b_iv_i(6) - %blake2b_iv_i(5) - %blake2b_iv_i(4) - %blake2b_iv_i(3) - %blake2b_iv_i(2) - %blake2b_iv_i(1) - // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 - PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 - %blake2b_iv_i(0) - XOR - // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 -%endmacro - // Address where the working version of the hash value is stored. %macro blake2b_hash_value_addr PUSH 0 diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index a25158d9..cd1f6a80 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -2,7 +2,9 @@ global blake2b_compression: // stack: retdest PUSH 0 // stack: cur_block = 0, retdest - %blake2b_initial_hash_value + PUSH compression_loop + // stack: compression_loop, cur_block, retdest + %jump(blake2b_initial_hash_value) compression_loop: // stack: h_0, ..., h_7, cur_block, retdest @@ -181,40 +183,20 @@ compression_loop: POP POP // stack: cur_block, retdest - %blake2b_internal_state_addr - // stack: start, cur_block, retdest - PUSH 0 - // stack: round=0, start, cur_block, retdest // Run 12 rounds of G functions. - %rep 12 - // stack: round, start, cur_block, retdest - %call_blake2b_g_function(0, 4, 8, 12, 0, 1) - %call_blake2b_g_function(1, 5, 9, 13, 2, 3) - %call_blake2b_g_function(2, 6, 10, 14, 4, 5) - %call_blake2b_g_function(3, 7, 11, 15, 6, 7) - %call_blake2b_g_function(0, 5, 10, 15, 8, 9) - %call_blake2b_g_function(1, 6, 11, 12, 10, 11) - %call_blake2b_g_function(2, 7, 8, 13, 12, 13) - %call_blake2b_g_function(3, 4, 9, 14, 14, 15) - // stack: round, start, cur_block, retdest - %increment - // stack: round + 1, start, cur_block, retdest - %endrep - // stack: 12, start, cur_block, retdest - POP - POP - + PUSH g_functions_return + // stack: g_functions_return, cur_block, retdest + %blake2b_internal_state_addr + // stack: start, g_functions_return, cur_block, retdest + %jump(run_12_rounds_g_function) +g_functions_return: // Finalize hash value. // stack: cur_block, retdest - %blake2b_generate_new_hash_value(7) - %blake2b_generate_new_hash_value(6) - %blake2b_generate_new_hash_value(5) - %blake2b_generate_new_hash_value(4) - %blake2b_generate_new_hash_value(3) - %blake2b_generate_new_hash_value(2) - %blake2b_generate_new_hash_value(1) - %blake2b_generate_new_hash_value(0) + PUSH hash_generate_return + // stack: hash_generate_return, cur_block, retdest + %jump(blake2b_generate_all_hash_values) +hash_generate_return: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest DUP9 // stack: cur_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm index 11e879fc..f2d3b1d2 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm @@ -3,28 +3,38 @@ // are in the range 0..16) in the internal state. // The internal state is stored in memory starting at the address start. // stack: a, b, c, d, x, y, start - %stack (indices: 4) -> (indices, indices) + DUP4 + DUP4 + DUP4 + DUP4 // stack: a, b, c, d, a, b, c, d, x, y, start DUP11 // stack: start, a, b, c, d, a, b, c, d, x, y, start - %stack (start, a, b, c, d) -> (d, start, c, start, b, start, a, start) - // stack: d, start, c, start, b, start, a, start, a, b, c, d, x, y, start ADD %mload_kernel_general - // stack: v[d], c, start, b, start, a, start, a, b, c, d, x, y, start - %stack (vd, remaining: 6) -> (remaining, vd) - // stack: c, start, b, start, a, start, v[d], a, b, c, d, x, y, start + // stack: v[a], b, c, d, a, b, c, d, x, y, start + SWAP1 + // stack: b, v[a], c, d, a, b, c, d, x, y, start + DUP11 + // stack: start, b, v[a], c, d, a, b, c, d, x, y, start ADD %mload_kernel_general - %stack (vc, remaining: 4) -> (remaining, vc) - // stack: b, start, a, start, v[c], v[d], a, b, c, d, x, y, start + // stack: v[b], v[a], c, d, a, b, c, d, x, y, start + SWAP2 + // stack: c, v[a], v[b], d, a, b, c, d, x, y, start + DUP11 + // stack: start, c, v[a], v[b], d, a, b, c, d, x, y, start ADD %mload_kernel_general - // stack: v[b], a, start, v[c], v[d], a, b, c, d, x, y, start - %stack (vb, remaining: 2) -> (remaining, vb) - // stack: a, start, v[b], v[c], v[d], a, b, c, d, x, y, start + // stack: v[c], v[a], v[b], d, a, b, c, d, x, y, start + SWAP3 + // stack: d, v[a], v[b], v[c], a, b, c, d, x, y, start + DUP11 + // stack: start, d, v[a], v[b], v[c], a, b, c, d, x, y, start ADD %mload_kernel_general + // stack: v[d], v[a], v[b], v[c], a, b, c, d, x, y, start + %stack (vd, vs: 3) -> (vs, vd) // stack: v[a], v[b], v[c], v[d], a, b, c, d, x, y, start DUP2 // stack: v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start @@ -124,3 +134,45 @@ %blake2b_g_function // stack: round, start %endmacro + +run_g_function_round: + // stack: round, start, retdest + %call_blake2b_g_function(0, 4, 8, 12, 0, 1) + %call_blake2b_g_function(1, 5, 9, 13, 2, 3) + %call_blake2b_g_function(2, 6, 10, 14, 4, 5) + %call_blake2b_g_function(3, 7, 11, 15, 6, 7) + %call_blake2b_g_function(0, 5, 10, 15, 8, 9) + %call_blake2b_g_function(1, 6, 11, 12, 10, 11) + %call_blake2b_g_function(2, 7, 8, 13, 12, 13) + %call_blake2b_g_function(3, 4, 9, 14, 14, 15) + %stack (r, s, ret) -> (ret, r, s) + // stack: retdest, round, start + JUMP + +global run_12_rounds_g_function: + // stack: start, retdest + PUSH 0 + // stack: round=0, start, retdest +run_next_round_g_function: + // stack: round, start, retdest + PUSH run_next_round_g_function_return + // stack: run_next_round_g_function_return, round, start, retdest + SWAP2 + // stack: start, round, run_next_round_g_function_return, retdest + SWAP1 + // stack: round, start, run_next_round_g_function_return, retdest + %jump(run_g_function_round) +run_next_round_g_function_return: + // stack: round, start, retdest + %increment + // stack: round+1, start, retdest + DUP1 + // stack: round+1, round+1, start, retdest + %lt_const(12) + // stack: round+1 < 12, round+1, start, retdest + %jumpi(run_next_round_g_function) + // stack: round+1, start, retdest + %pop2 + // stack: retdest + JUMP + diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm b/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm index 712a97c0..654b51b3 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm @@ -1,18 +1,54 @@ -%macro blake2b_generate_new_hash_value(i) +blake2b_generate_new_hash_value: + // stack: i, retdest %blake2b_hash_value_addr - %add_const($i) + // stack: addr, i, retdest + DUP2 + ADD %mload_kernel_general - // stack: h_i, ... + // stack: h_i, i, retdest %blake2b_internal_state_addr - %add_const($i) + // stack: addr, h_i, i, retdest + DUP3 + ADD %mload_kernel_general - // stack: v_i, h_i, ... + // stack: v_i, h_i, i, retdest %blake2b_internal_state_addr - %add_const($i) + // stack: addr, v_i, h_i, i, retdest + SWAP1 + // stack: v_i, addr, h_i, i, retdest + SWAP3 + // stack: i, addr, h_i, v_i, retdest + ADD %add_const(8) %mload_kernel_general - // stack: v_(i+8), v_i, h_i, ... + // stack: v_(i+8), h_i, v_i, retdest XOR XOR - // stack: h_i' = v_(i+8) ^ v_i ^ h_i, ... -%endmacro + // stack: h_i' = v_(i+8) ^ v_i ^ h_i, retdest + SWAP1 + JUMP + +global blake2b_generate_all_hash_values: + // stack: retdest + PUSH 8 + // stack: i=8, retdest +blake2b_generate_hash_loop: + // stack: i, h_i', ..., h_7', retdest + %decrement + // stack: i-1, h_i', ..., h_7', retdest + PUSH blake2b_generate_hash_return + // stack: blake2b_generate_hash_return, i-1, h_i', ..., h_7', retdest + DUP2 + // stack: i-1, blake2b_generate_hash_return, i-1, h_i', ..., h_7', retdest + %jump(blake2b_generate_new_hash_value) +blake2b_generate_hash_return: + // stack: h_(i-1)', i-1, h_i', ..., h_7', retdest + SWAP1 + // stack: i-1, h_(i-1)', h_i', ..., h_7', retdest + DUP1 + // stack: i-1, i-1, h_(i-1)', ..., h_7', retdest + %jumpi(blake2b_generate_hash_loop) + // stack: i-1=0, h_0', ..., h_7', retdest + %stack (i, h: 8, ret) -> (ret, h) + // stack: retdest, h_0'...h_7' + JUMP diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm b/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm index 94e9ba27..48df86a3 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm @@ -33,30 +33,63 @@ global blake2b_iv_const: BYTES 91, 224, 205, 25 BYTES 19, 126, 33, 121 -%macro blake2b_iv - // stack: i, ... +global blake2b_iv: + // stack: i, retdest PUSH blake2b_iv_const - // stack: blake2b_iv_const, i, ... + // stack: blake2b_iv_const, i, retdest SWAP1 - // stack: i, blake2b_iv_const, ... + // stack: i, blake2b_iv_const, retdest %mul_const(8) ADD - // stack: blake2b_iv_const + 2 * i, ... + // stack: blake2b_iv_const + 2 * i, retdest DUP1 - // stack: blake2b_iv_const + 2 * i, blake2b_iv_const + 2 * i, ... + // stack: blake2b_iv_const + 2 * i, blake2b_iv_const + 2 * i, retdest %add_const(4) - // stack: blake2b_iv_const + 2 * i + 1, blake2b_iv_const + 2 * i, ... + // stack: blake2b_iv_const + 2 * i + 1, blake2b_iv_const + 2 * i, retdest %mload_kernel_code_u32 SWAP1 %mload_kernel_code_u32 - // stack: IV_i[32:], IV_i[:32], ... + // stack: IV_i[32:], IV_i[:32], retdest %shl_const(32) - // stack: IV_i[32:] << 32, IV_i[:32], ... + // stack: IV_i[32:] << 32, IV_i[:32], retdest ADD // OR - // stack: IV_i, ... + // stack: IV_i, retdest + SWAP1 + JUMP + +%macro blake2b_iv + %stack (i) -> (i, %%after) + %jump(blake2b_iv) +%%after: %endmacro -%macro blake2b_iv_i(i) - PUSH $i - %blake2b_iv -%endmacro +// Load the initial hash value (the IV, but with params XOR'd into the first word). +global blake2b_initial_hash_value: + // stack: retdest + PUSH 8 + // stack: i=8, retdest +blake2b_initial_hash_loop: + // stack: i, IV_i, ..., IV_7, retdest + %decrement + // stack: i-1, IV_i, ..., IV_7, retdest + PUSH blake2b_initial_hash_return + // stack: blake2b_initial_hash_return, i-1, IV_i, ..., IV_7, retdest + DUP2 + // stack: i-1, blake2b_initial_hash_return, i-1, IV_i, ..., IV_7, retdest + %jump(blake2b_iv) +blake2b_initial_hash_return: + // stack: IV_(i-1), i-1, IV_i, ..., IV_7, retdest + SWAP1 + // stack: i-1, IV_(i-1), IV_i, ..., IV_7, retdest + DUP1 + // stack: i-1, i-1, IV_(i-1), ..., IV_7, retdest + %jumpi(blake2b_initial_hash_loop) + // stack: i-1=0, IV_0, ..., IV_7, retdest + POP + // stack: IV_0, ..., IV_7, retdest + PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 + XOR + // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7, retdest + %stack(iv: 8, ret) -> (ret, iv) + JUMP + diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm index 5277e611..1ef455f1 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm @@ -58,17 +58,28 @@ global permutation_9_constants: BYTES 15, 11, 9, 14 BYTES 3, 12, 13, 0 -%macro blake2b_permutation - // stack: round, i +global blake2b_permutation: + // stack: i, round, retdest PUSH permutation_0_constants - // stack: permutation_0_constants, round, i - SWAP1 - // stack: round, permutation_1_constants, i + // stack: permutation_0_constants, i, round, retdest + SWAP2 + // stack: round, i, permutation_0_constants, retdest %mod_const(10) - // stack: round % 10, permutation_1_constants, i + // stack: round % 10, i, permutation_0_constants, retdest %mul_const(16) ADD - // stack: permutation_(round)_constants, i ADD %mload_kernel_code + // stack: permutation_(round%10)_constants[i], retdest + SWAP1 + JUMP + +%macro blake2b_permutation + // stack: round, i + PUSH %%after + // stack: %%after, round, i + SWAP2 + // stack: i, round, %%after + %jump(blake2b_permutation) +%%after: %endmacro