From 93abd35ffb0abea517f93646955ca0dabde75f63 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 7 Mar 2023 11:06:49 -0800 Subject: [PATCH 01/12] optimizations --- .../kernel/asm/hash/blake2b/compression.asm | 29 +-- .../kernel/asm/hash/blake2b/g_functions.asm | 205 +++++++++++++----- 2 files changed, 154 insertions(+), 80 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index a25158d9..840d8c54 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -181,30 +181,15 @@ compression_loop: POP POP // stack: cur_block, retdest - %blake2b_internal_state_addr - // stack: start, cur_block, retdest - PUSH 0 - // stack: round=0, start, cur_block, retdest // Run 12 rounds of G functions. - %rep 12 - // stack: round, start, cur_block, retdest - %call_blake2b_g_function(0, 4, 8, 12, 0, 1) - %call_blake2b_g_function(1, 5, 9, 13, 2, 3) - %call_blake2b_g_function(2, 6, 10, 14, 4, 5) - %call_blake2b_g_function(3, 7, 11, 15, 6, 7) - %call_blake2b_g_function(0, 5, 10, 15, 8, 9) - %call_blake2b_g_function(1, 6, 11, 12, 10, 11) - %call_blake2b_g_function(2, 7, 8, 13, 12, 13) - %call_blake2b_g_function(3, 4, 9, 14, 14, 15) - // stack: round, start, cur_block, retdest - %increment - // stack: round + 1, start, cur_block, retdest - %endrep - // stack: 12, start, cur_block, retdest - POP - POP - + PUSH g_functions_return + // stack: g_functions_return, cur_block, retdest + %blake2b_internal_state_addr + // stack: start, g_functions_return, cur_block, retdest + %jump(run_12_rounds_g_function) +g_functions_return: + // Finalize hash value. // stack: cur_block, retdest %blake2b_generate_new_hash_value(7) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm index 11e879fc..831841c5 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm @@ -1,89 +1,89 @@ -%macro blake2b_g_function +blake2b_g_function: // Function to mix two input words, x and y, into the four words indexed by a, b, c, d (which // are in the range 0..16) in the internal state. // The internal state is stored in memory starting at the address start. - // stack: a, b, c, d, x, y, start + // stack: a, b, c, d, x, y, start, retdest %stack (indices: 4) -> (indices, indices) - // stack: a, b, c, d, a, b, c, d, x, y, start + // stack: a, b, c, d, a, b, c, d, x, y, start, retdest DUP11 - // stack: start, a, b, c, d, a, b, c, d, x, y, start + // stack: start, a, b, c, d, a, b, c, d, x, y, start, retdest %stack (start, a, b, c, d) -> (d, start, c, start, b, start, a, start) - // stack: d, start, c, start, b, start, a, start, a, b, c, d, x, y, start + // stack: d, start, c, start, b, start, a, start, a, b, c, d, x, y, start, retdest ADD %mload_kernel_general - // stack: v[d], c, start, b, start, a, start, a, b, c, d, x, y, start + // stack: v[d], c, start, b, start, a, start, a, b, c, d, x, y, start, retdest %stack (vd, remaining: 6) -> (remaining, vd) - // stack: c, start, b, start, a, start, v[d], a, b, c, d, x, y, start + // stack: c, start, b, start, a, start, v[d], a, b, c, d, x, y, start, retdest ADD %mload_kernel_general %stack (vc, remaining: 4) -> (remaining, vc) - // stack: b, start, a, start, v[c], v[d], a, b, c, d, x, y, start + // stack: b, start, a, start, v[c], v[d], a, b, c, d, x, y, start, retdest ADD %mload_kernel_general - // stack: v[b], a, start, v[c], v[d], a, b, c, d, x, y, start + // stack: v[b], a, start, v[c], v[d], a, b, c, d, x, y, start, retdest %stack (vb, remaining: 2) -> (remaining, vb) - // stack: a, start, v[b], v[c], v[d], a, b, c, d, x, y, start + // stack: a, start, v[b], v[c], v[d], a, b, c, d, x, y, start, retdest ADD %mload_kernel_general - // stack: v[a], v[b], v[c], v[d], a, b, c, d, x, y, start + // stack: v[a], v[b], v[c], v[d], a, b, c, d, x, y, start, retdest DUP2 - // stack: v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start + // stack: v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start, retdest DUP10 - // stack: x, v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start + // stack: x, v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start, retdest ADD ADD %as_u64 - // stack: v[a]' = (v[a] + v[b] + x) % 2^64, v[b], v[c], v[d], a, b, c, d, x, y, start + // stack: v[a]' = (v[a] + v[b] + x) % 2^64, v[b], v[c], v[d], a, b, c, d, x, y, start, retdest %stack (a, b, c, d) -> (a, d, a, b, c, d) - // stack: v[a]', v[d], v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start + // stack: v[a]', v[d], v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start, retdest XOR %rotr_64(32) - // stack: v[d]' = (v[d] ^ v[a]') >>> 32, v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start + // stack: v[d]' = (v[d] ^ v[a]') >>> 32, v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start, retdest %stack (top: 4, vd) -> (top) - // stack: v[d]', v[a]', v[b], v[c], a, b, c, d, x, y, start + // stack: v[d]', v[a]', v[b], v[c], a, b, c, d, x, y, start, retdest %stack (d, a, b, c) -> (c, d, a, b, d) - // stack: v[c], v[d]', v[a]', v[b], v[d]', a, b, c, d, x, y, start + // stack: v[c], v[d]', v[a]', v[b], v[d]', a, b, c, d, x, y, start, retdest ADD %as_u64 - // stack: v[c]' = (v[c] + v[d]') % 2^64, v[a]', v[b], v[d]', a, b, c, d, x, y, start + // stack: v[c]' = (v[c] + v[d]') % 2^64, v[a]', v[b], v[d]', a, b, c, d, x, y, start, retdest %stack (c, a, b, d) -> (b, c, a, c, d) - // stack: v[b], v[c]', v[a]', v[c]', v[d]', a, b, c, d, x, y, start + // stack: v[b], v[c]', v[a]', v[c]', v[d]', a, b, c, d, x, y, start, retdest XOR %rotr_64(24) - // stack: v[b]' = (v[b] ^ v[c]') >>> 24, v[a]', v[c]', v[d]', a, b, c, d, x, y, start + // stack: v[b]' = (v[b] ^ v[c]') >>> 24, v[a]', v[c]', v[d]', a, b, c, d, x, y, start, retdest SWAP1 - // stack: v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start + // stack: v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest DUP2 - // stack: v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start + // stack: v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest DUP11 - // stack: y, v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start + // stack: y, v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest ADD ADD %as_u64 - // stack: v[a]'' = (v[a]' + v[b]' + y) % 2^64, v[b]', v[c]', v[d]', a, b, c, d, x, y, start + // stack: v[a]'' = (v[a]' + v[b]' + y) % 2^64, v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest SWAP3 - // stack: v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start + // stack: v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start, retdest DUP4 - // stack: v[a]'', v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start + // stack: v[a]'', v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start, retdest XOR %rotr_64(16) - // stack: v[d]'' = (v[a]'' ^ v[d]') >>> 8, v[b]', v[c]', v[a]'', a, b, c, d, x, y, start + // stack: v[d]'' = (v[a]'' ^ v[d]') >>> 8, v[b]', v[c]', v[a]'', a, b, c, d, x, y, start, retdest SWAP2 - // stack: v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start + // stack: v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest DUP3 - // stack: v[d]'', v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start + // stack: v[d]'', v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest ADD %as_u64 - // stack: v[c]'' = (v[c]' + v[d]'') % 2^64, v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start + // stack: v[c]'' = (v[c]' + v[d]'') % 2^64, v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest DUP1 - // stack: v[c]'', v[c]'', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start + // stack: v[c]'', v[c]'', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest SWAP2 - // stack: v[b]', v[c]'', v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start + // stack: v[b]', v[c]'', v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest XOR %rotr_64(63) - // stack: v[b]'' = (v[b]' ^ v[c]'') >>> 7, v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start + // stack: v[b]'' = (v[b]' ^ v[c]'') >>> 7, v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest %stack (vb, vc, vd, va, a, b, c, d, x, y, start) -> (start, a, va, start, b, vb, start, c, vc, start, d, vd) - // stack: start, a, v[a]'', start, b, v[b]'', start, c, v[c]'', start, d, v[d]'' + // stack: start, a, v[a]'', start, b, v[b]'', start, c, v[c]'', start, d, v[d]'', retdest ADD %mstore_kernel_general ADD @@ -92,35 +92,124 @@ %mstore_kernel_general ADD %mstore_kernel_general -%endmacro + // stack: retdest + JUMP -%macro call_blake2b_g_function(a, b, c, d, x_idx, y_idx) - // stack: round, start - PUSH $y_idx - DUP2 - // stack: round, y_idx, round, start +call_blake2b_g_function: + // stack: a, b, c, d, x_idx, y_idx, round, start, retdest + DUP6 + // stack: y_idx, a, b, c, d, x_idx, y_idx, round, start, retdest + DUP8 + // stack: round, y_idx, a, b, c, d, x_idx, y_idx, round, start, retdest %blake2b_permutation - // stack: s[y_idx], round, start + // stack: s[y_idx], a, b, c, d, x_idx, y_idx, round, start, retdest %blake2b_message_addr ADD %mload_kernel_general - // stack: m[s[y_idx]], round, start - PUSH $x_idx - DUP3 - // stack: round, 2, m[s[y_idx]], round, start + // stack: m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest + DUP6 + // stack: x_idx, m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest + DUP9 + // stack: round, x_idx, m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest %blake2b_permutation - // stack: s[x_idx], m[s[y_idx]], round, start + // stack: s[x_idx], m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest %blake2b_message_addr ADD %mload_kernel_general - // stack: m[s[x_idx]], m[s[y_idx]], round, start - %stack (ss: 2, r, s) -> (ss, s, r, s) - // stack: m[s[x_idx]], m[s[y_idx]], start, round, start - PUSH $d - PUSH $c - PUSH $b - PUSH $a - // stack: a, b, c, d, m[s[x_idx]], m[s[y_idx]], start, round, start - %blake2b_g_function - // stack: round, start -%endmacro + // stack: m[s[x_idx]], m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest + %stack (mm: 2, abcd: 4, xy: 2, r, s) -> (abcd, mm, s, r, s) + // stack: a, b, c, d, m[s[x_idx]], m[s[y_idx]], start, round, start, retdest + %jump(blake2b_g_function) + +global run_g_function_round: + // stack: round, start, retdest + PUSH g_function_return_1 + // stack: g_function_return_1, round, start, retdest + %stack (ret, r, s) -> (0, 4, 8, 12, 0, 1, r, s, ret, r, s) + // stack: a=0, b=4, c=8, d=12, x_idx=0, y_idx=1, round, start, g_function_return_1, round, start, retdest + %jump(call_blake2b_g_function) +g_function_return_1: + // stack: round, start, retdest + PUSH g_function_return_2 + // stack: g_function_return_2, round, start, retdest + %stack (ret, r, s) -> (1, 5, 9, 13, 2, 3, r, s, ret, r, s) + // stack: a=1, b=5, c=9, d=13, x_idx=2, y_idx=3, round, start, g_function_return_2, round, start, retdest + %jump(call_blake2b_g_function) +g_function_return_2: + // stack: round, start, retdest + PUSH g_function_return_3 + // stack: g_function_return_3, round, start, retdest + %stack (ret, r, s) -> (2, 6, 10, 14, 4, 5, r, s, ret, r, s) + // stack: a=2, b=6, c=10, d=14, x_idx=4, y_idx=5, round, start, g_function_return_3, round, start, retdest + %jump(call_blake2b_g_function) +g_function_return_3: + // stack: round, start, retdest + PUSH g_function_return_4 + // stack: g_function_return_4, round, start, retdest + %stack (ret, r, s) -> (3, 7, 11, 15, 6, 7, r, s, ret, r, s) + // stack: a=3, b=7, c=11, d=15, x_idx=6, y_idx=7, round, start, g_function_return_4, round, start, retdest + %jump(call_blake2b_g_function) +g_function_return_4: + // stack: round, start, retdest + PUSH g_function_return_5 + // stack: g_function_return_5, round, start, retdest + %stack (ret, r, s) -> (0, 5, 10, 15, 8, 9, r, s, ret, r, s) + // stack: a=0, b=5, c=10, d=15, x_idx=8, y_idx=9, round, start, g_function_return_5, round, start, retdest + %jump(call_blake2b_g_function) +g_function_return_5: + // stack: round, start, retdest + PUSH g_function_return_6 + // stack: g_function_return_6, round, start, retdest + %stack (ret, r, s) -> (1, 6, 11, 12, 10, 11, r, s, ret, r, s) + // stack: a=1, b=6, c=11, d=12, x_idx=10, y_idx=11, round, start, g_function_return_6, round, start, retdest + %jump(call_blake2b_g_function) +g_function_return_6: + // stack: round, start, retdest + PUSH g_function_return_7 + // stack: g_function_return_7, round, start, retdest + %stack (ret, r, s) -> (2, 7, 8, 13, 12, 13, r, s, ret, r, s) + // stack: a=2, b=7, c=8, d=13, x_idx=12, y_idx=13, round, start, g_function_return_7, round, start, retdest + %jump(call_blake2b_g_function) +g_function_return_7: + // stack: round, start, retdest + PUSH g_function_return_8 + // stack: g_function_return_8, round, start, retdest + %stack (ret, r, s) -> (3, 4, 9, 14, 14, 15, r, s, ret, r, s) + // stack: a=3, b=4, c=9, d=14, x_idx=14, y_idx=15, round, start, g_function_return_8, round, start, retdest + %jump(call_blake2b_g_function) +g_function_return_8: + // stack: round, start, retdest + SWAP1 + // stack: start, round, retdest + SWAP2 + // stack: retdest, round, start + JUMP + + +global run_12_rounds_g_function: + // stack: start, retdest + PUSH 0 + // stack: round=0, start, retdest +run_next_round_g_function: + // stack: round, start, retdest + PUSH run_g_function_round_return + // stack: run_g_function_round_return, round, start, retdest + SWAP2 + // stack: start, round, run_g_function_round_return, retdest + SWAP1 + // stack: round, start, run_g_function_round_return, retdest + %jump(run_g_function_round) +run_next_round_g_function_return: + // stack: round, start, retdest + %increment + // stack: round+1, start, retdest + DUP1 + // stack: round+1, round+1, start, retdest + %lt_const(12) + // stack: round+1 < 12, round+1, start, retdest + %jumpi(run_next_round_g_function) + // stack: round+1, start, retdest + %pop2 + // stack: retdest + JUMP + From 5f592e60dc1248a80d54ed39139800eb97e9f95a Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 7 Mar 2023 12:25:34 -0800 Subject: [PATCH 02/12] fixes --- evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm | 12 ++++++------ evm/src/cpu/kernel/tests/hash.rs | 2 ++ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm index 831841c5..ff3ee43a 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm @@ -117,8 +117,8 @@ call_blake2b_g_function: ADD %mload_kernel_general // stack: m[s[x_idx]], m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest - %stack (mm: 2, abcd: 4, xy: 2, r, s) -> (abcd, mm, s, r, s) - // stack: a, b, c, d, m[s[x_idx]], m[s[y_idx]], start, round, start, retdest + %stack (mm: 2, abcd: 4, xy: 2, r, s) -> (abcd, mm, s) + // stack: a, b, c, d, m[s[x_idx]], m[s[y_idx]], start, retdest %jump(blake2b_g_function) global run_g_function_round: @@ -192,12 +192,12 @@ global run_12_rounds_g_function: // stack: round=0, start, retdest run_next_round_g_function: // stack: round, start, retdest - PUSH run_g_function_round_return - // stack: run_g_function_round_return, round, start, retdest + PUSH run_next_round_g_function_return + // stack: run_next_round_g_function_return, round, start, retdest SWAP2 - // stack: start, round, run_g_function_round_return, retdest + // stack: start, round, run_next_round_g_function_return, retdest SWAP1 - // stack: round, start, run_g_function_round_return, retdest + // stack: round, start, run_next_round_g_function_return, retdest %jump(run_g_function_round) run_next_round_g_function_return: // stack: round, start, retdest diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index bc73ecd5..cc2c96eb 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -79,6 +79,8 @@ fn prepare_test( // Run the interpeter let result = run_interpreter_with_memory(interpreter_setup).unwrap(); + dbg!(result.stack().to_vec()); + Ok((expected, result.stack().to_vec())) } From 70475a5a87bc3b2d9611bb4b66617dc636a43a1d Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 7 Mar 2023 12:27:08 -0800 Subject: [PATCH 03/12] cleanup --- evm/src/cpu/kernel/tests/hash.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index cc2c96eb..bc73ecd5 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -79,8 +79,6 @@ fn prepare_test( // Run the interpeter let result = run_interpreter_with_memory(interpreter_setup).unwrap(); - dbg!(result.stack().to_vec()); - Ok((expected, result.stack().to_vec())) } From 40f90d8312f51e81407c48d7e464924b71d8f0d1 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 7 Mar 2023 15:45:20 -0800 Subject: [PATCH 04/12] hash function optimization --- .../kernel/asm/hash/blake2b/compression.asm | 12 +-- evm/src/cpu/kernel/asm/hash/blake2b/hash.asm | 91 +++++++++++++++++-- 2 files changed, 86 insertions(+), 17 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index 840d8c54..fdf02d69 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -192,14 +192,10 @@ g_functions_return: // Finalize hash value. // stack: cur_block, retdest - %blake2b_generate_new_hash_value(7) - %blake2b_generate_new_hash_value(6) - %blake2b_generate_new_hash_value(5) - %blake2b_generate_new_hash_value(4) - %blake2b_generate_new_hash_value(3) - %blake2b_generate_new_hash_value(2) - %blake2b_generate_new_hash_value(1) - %blake2b_generate_new_hash_value(0) + PUSH hash_generate_return + // stack: hash_generate_return, cur_block, retdest + %jump(blake2b_generate_all_hash_values) +hash_generate_return: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest DUP9 // stack: cur_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block, retdest diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm b/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm index 712a97c0..91a5530e 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm @@ -1,18 +1,91 @@ -%macro blake2b_generate_new_hash_value(i) +blake2b_generate_new_hash_value: + // stack: i, retdest %blake2b_hash_value_addr - %add_const($i) + // stack: addr, i, retdest + DUP2 + ADD %mload_kernel_general - // stack: h_i, ... + // stack: h_i, i, retdest %blake2b_internal_state_addr - %add_const($i) + // stack: addr, h_i, i, retdest + DUP3 + ADD %mload_kernel_general - // stack: v_i, h_i, ... + // stack: v_i, h_i, i, retdest %blake2b_internal_state_addr - %add_const($i) + // stack: addr, v_i, h_i, i, retdest + DUP4 + ADD %add_const(8) %mload_kernel_general - // stack: v_(i+8), v_i, h_i, ... + // stack: v_(i+8), v_i, h_i, i, retdest XOR XOR - // stack: h_i' = v_(i+8) ^ v_i ^ h_i, ... -%endmacro + // stack: h_i' = v_(i+8) ^ v_i ^ h_i, i, retdest + SWAP1 + POP + // stack: h_i', retdest + SWAP1 + JUMP + +global blake2b_generate_all_hash_values: + // stack: retdest + PUSH blake2b_generate_hash_return_7 + // stack: blake2b_generate_hash_return_7, retdest + PUSH 7 + // stack: 7, blake2b_generate_hash_return_7, retdest + %jump(blake2b_generate_new_hash_value) +blake2b_generate_hash_return_7: + // stack: h_7', retdest + PUSH blake2b_generate_hash_return_6 + // stack: blake2b_generate_hash_return_6, h_7', retdest + PUSH 6 + // stack: 6, blake2b_generate_hash_return_6, h_7', retdest + %jump(blake2b_generate_new_hash_value) +blake2b_generate_hash_return_6: + // stack: h_6', h_7', retdest + PUSH blake2b_generate_hash_return_5 + // stack: blake2b_generate_hash_return_5, h_6', h_7', retdest + PUSH 5 + // stack: 5, blake2b_generate_hash_return_5, h_6', h_7', retdest + %jump(blake2b_generate_new_hash_value) +blake2b_generate_hash_return_5: + // stack: h_5', h_6', h_7', retdest + PUSH blake2b_generate_hash_return_4 + // stack: blake2b_generate_hash_return_4, h_5', h_6', h_7', retdest + PUSH 4 + // stack: 4, blake2b_generate_hash_return_4, h_5', h_6', h_7', retdest + %jump(blake2b_generate_new_hash_value) +blake2b_generate_hash_return_4: + // stack: h_4', h_5', h_6', h_7', retdest + PUSH blake2b_generate_hash_return_3 + // stack: blake2b_generate_hash_return_3, h_4', h_5', h_6', h_7', retdest + PUSH 3 + // stack: 3, blake2b_generate_hash_return_3, h_4', h_5', h_6', h_7', retdest + %jump(blake2b_generate_new_hash_value) +blake2b_generate_hash_return_3: + // stack: h_3', h_4', h_5', h_6', h_7', retdest + PUSH blake2b_generate_hash_return_2 + // stack: blake2b_generate_hash_return_2, h_3', h_4', h_5', h_6', h_7', retdest + PUSH 2 + // stack: 2, blake2b_generate_hash_return_2, h_3', h_4', h_5', h_6', h_7', retdest + %jump(blake2b_generate_new_hash_value) +blake2b_generate_hash_return_2: + // stack: h_2', h_3', h_4', h_5', h_6', h_7', retdest + PUSH blake2b_generate_hash_return_1 + // stack: blake2b_generate_hash_return_1, h_2', h_3', h_4', h_5', h_6', h_7', retdest + PUSH 1 + // stack: 1, blake2b_generate_hash_return_1, h_2', h_3', h_4', h_5', h_6', h_7', retdest + %jump(blake2b_generate_new_hash_value) +blake2b_generate_hash_return_1: + // stack: h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest + PUSH blake2b_generate_hash_return_0 + // stack: blake2b_generate_hash_return_0, h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest + PUSH 0 + // stack: 0, blake2b_generate_hash_return_0, h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest + %jump(blake2b_generate_new_hash_value) +blake2b_generate_hash_return_0: + // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest + %stack (h: 8, ret) -> (ret, h) + // stack: retdest, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7' + JUMP \ No newline at end of file From c37d1e25fcf4697c5ef3033b7a7390b37b3043b1 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 7 Mar 2023 15:45:52 -0800 Subject: [PATCH 05/12] optimize hash generation further --- evm/src/cpu/kernel/asm/hash/blake2b/hash.asm | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm b/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm index 91a5530e..945c7fb0 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm @@ -14,17 +14,17 @@ blake2b_generate_new_hash_value: // stack: v_i, h_i, i, retdest %blake2b_internal_state_addr // stack: addr, v_i, h_i, i, retdest - DUP4 + SWAP1 + // stack: v_i, addr, h_i, i, retdest + SWAP3 + // stack: i, addr, h_i, v_i, retdest ADD %add_const(8) %mload_kernel_general - // stack: v_(i+8), v_i, h_i, i, retdest + // stack: v_(i+8), h_i, v_i, retdest XOR XOR - // stack: h_i' = v_(i+8) ^ v_i ^ h_i, i, retdest - SWAP1 - POP - // stack: h_i', retdest + // stack: h_i' = v_(i+8) ^ v_i ^ h_i, retdest SWAP1 JUMP @@ -88,4 +88,4 @@ blake2b_generate_hash_return_0: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest %stack (h: 8, ret) -> (ret, h) // stack: retdest, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7' - JUMP \ No newline at end of file + JUMP From 9d8d81b4b0e4f5570edbda8414d13b4fa14d07b5 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 7 Mar 2023 16:19:15 -0800 Subject: [PATCH 06/12] optimize hash generation further further --- evm/src/cpu/kernel/asm/hash/blake2b/hash.asm | 77 +++++--------------- evm/src/cpu/kernel/tests/hash.rs | 2 + 2 files changed, 22 insertions(+), 57 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm b/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm index 945c7fb0..654b51b3 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/hash.asm @@ -30,62 +30,25 @@ blake2b_generate_new_hash_value: global blake2b_generate_all_hash_values: // stack: retdest - PUSH blake2b_generate_hash_return_7 - // stack: blake2b_generate_hash_return_7, retdest - PUSH 7 - // stack: 7, blake2b_generate_hash_return_7, retdest + PUSH 8 + // stack: i=8, retdest +blake2b_generate_hash_loop: + // stack: i, h_i', ..., h_7', retdest + %decrement + // stack: i-1, h_i', ..., h_7', retdest + PUSH blake2b_generate_hash_return + // stack: blake2b_generate_hash_return, i-1, h_i', ..., h_7', retdest + DUP2 + // stack: i-1, blake2b_generate_hash_return, i-1, h_i', ..., h_7', retdest %jump(blake2b_generate_new_hash_value) -blake2b_generate_hash_return_7: - // stack: h_7', retdest - PUSH blake2b_generate_hash_return_6 - // stack: blake2b_generate_hash_return_6, h_7', retdest - PUSH 6 - // stack: 6, blake2b_generate_hash_return_6, h_7', retdest - %jump(blake2b_generate_new_hash_value) -blake2b_generate_hash_return_6: - // stack: h_6', h_7', retdest - PUSH blake2b_generate_hash_return_5 - // stack: blake2b_generate_hash_return_5, h_6', h_7', retdest - PUSH 5 - // stack: 5, blake2b_generate_hash_return_5, h_6', h_7', retdest - %jump(blake2b_generate_new_hash_value) -blake2b_generate_hash_return_5: - // stack: h_5', h_6', h_7', retdest - PUSH blake2b_generate_hash_return_4 - // stack: blake2b_generate_hash_return_4, h_5', h_6', h_7', retdest - PUSH 4 - // stack: 4, blake2b_generate_hash_return_4, h_5', h_6', h_7', retdest - %jump(blake2b_generate_new_hash_value) -blake2b_generate_hash_return_4: - // stack: h_4', h_5', h_6', h_7', retdest - PUSH blake2b_generate_hash_return_3 - // stack: blake2b_generate_hash_return_3, h_4', h_5', h_6', h_7', retdest - PUSH 3 - // stack: 3, blake2b_generate_hash_return_3, h_4', h_5', h_6', h_7', retdest - %jump(blake2b_generate_new_hash_value) -blake2b_generate_hash_return_3: - // stack: h_3', h_4', h_5', h_6', h_7', retdest - PUSH blake2b_generate_hash_return_2 - // stack: blake2b_generate_hash_return_2, h_3', h_4', h_5', h_6', h_7', retdest - PUSH 2 - // stack: 2, blake2b_generate_hash_return_2, h_3', h_4', h_5', h_6', h_7', retdest - %jump(blake2b_generate_new_hash_value) -blake2b_generate_hash_return_2: - // stack: h_2', h_3', h_4', h_5', h_6', h_7', retdest - PUSH blake2b_generate_hash_return_1 - // stack: blake2b_generate_hash_return_1, h_2', h_3', h_4', h_5', h_6', h_7', retdest - PUSH 1 - // stack: 1, blake2b_generate_hash_return_1, h_2', h_3', h_4', h_5', h_6', h_7', retdest - %jump(blake2b_generate_new_hash_value) -blake2b_generate_hash_return_1: - // stack: h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest - PUSH blake2b_generate_hash_return_0 - // stack: blake2b_generate_hash_return_0, h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest - PUSH 0 - // stack: 0, blake2b_generate_hash_return_0, h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest - %jump(blake2b_generate_new_hash_value) -blake2b_generate_hash_return_0: - // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', retdest - %stack (h: 8, ret) -> (ret, h) - // stack: retdest, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7' +blake2b_generate_hash_return: + // stack: h_(i-1)', i-1, h_i', ..., h_7', retdest + SWAP1 + // stack: i-1, h_(i-1)', h_i', ..., h_7', retdest + DUP1 + // stack: i-1, i-1, h_(i-1)', ..., h_7', retdest + %jumpi(blake2b_generate_hash_loop) + // stack: i-1=0, h_0', ..., h_7', retdest + %stack (i, h: 8, ret) -> (ret, h) + // stack: retdest, h_0'...h_7' JUMP diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index bc73ecd5..cc2c96eb 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -79,6 +79,8 @@ fn prepare_test( // Run the interpeter let result = run_interpreter_with_memory(interpreter_setup).unwrap(); + dbg!(result.stack().to_vec()); + Ok((expected, result.stack().to_vec())) } From 5994f4d932e538f69cd5904b07e22b4c66456614 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 7 Mar 2023 16:49:28 -0800 Subject: [PATCH 07/12] cleanup --- evm/src/cpu/kernel/asm/hash/blake2b/compression.asm | 1 - 1 file changed, 1 deletion(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index fdf02d69..11e5389b 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -189,7 +189,6 @@ compression_loop: // stack: start, g_functions_return, cur_block, retdest %jump(run_12_rounds_g_function) g_functions_return: - // Finalize hash value. // stack: cur_block, retdest PUSH hash_generate_return From 0f55956ade0ef43f99dd07daf6807e5791f7eb20 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 7 Mar 2023 17:17:23 -0800 Subject: [PATCH 08/12] optimized initial hash value generation --- .../cpu/kernel/asm/hash/blake2b/addresses.asm | 16 ----- .../kernel/asm/hash/blake2b/compression.asm | 4 +- evm/src/cpu/kernel/asm/hash/blake2b/iv.asm | 61 ++++++++++++++----- evm/src/cpu/kernel/tests/hash.rs | 2 - 4 files changed, 50 insertions(+), 33 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm b/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm index 9d65b9ed..f1d7c3e9 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/addresses.asm @@ -1,19 +1,3 @@ -// Load the initial hash value (the IV, but with params XOR'd into the first word). -%macro blake2b_initial_hash_value - %blake2b_iv_i(7) - %blake2b_iv_i(6) - %blake2b_iv_i(5) - %blake2b_iv_i(4) - %blake2b_iv_i(3) - %blake2b_iv_i(2) - %blake2b_iv_i(1) - // stack: IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 - PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 - %blake2b_iv_i(0) - XOR - // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7 -%endmacro - // Address where the working version of the hash value is stored. %macro blake2b_hash_value_addr PUSH 0 diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index 11e5389b..cd1f6a80 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -2,7 +2,9 @@ global blake2b_compression: // stack: retdest PUSH 0 // stack: cur_block = 0, retdest - %blake2b_initial_hash_value + PUSH compression_loop + // stack: compression_loop, cur_block, retdest + %jump(blake2b_initial_hash_value) compression_loop: // stack: h_0, ..., h_7, cur_block, retdest diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm b/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm index 174afd33..6236fbef 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm @@ -33,30 +33,63 @@ global blake2b_iv_const: BYTES 91, 224, 205, 25 BYTES 19, 126, 33, 121 -%macro blake2b_iv - // stack: i, ... +global blake2b_iv: + // stack: i, retdest PUSH blake2b_iv_const - // stack: blake2b_iv_const, i, ... + // stack: blake2b_iv_const, i, retdest SWAP1 - // stack: i, blake2b_iv_const, ... + // stack: i, blake2b_iv_const, retdest %mul_const(8) ADD - // stack: blake2b_iv_const + 2 * i, ... + // stack: blake2b_iv_const + 2 * i, retdest DUP1 - // stack: blake2b_iv_const + 2 * i, blake2b_iv_const + 2 * i, ... + // stack: blake2b_iv_const + 2 * i, blake2b_iv_const + 2 * i, retdest %add_const(4) - // stack: blake2b_iv_const + 2 * i + 1, blake2b_iv_const + 2 * i, ... + // stack: blake2b_iv_const + 2 * i + 1, blake2b_iv_const + 2 * i, retdest %mload_kernel_code_u32 SWAP1 %mload_kernel_code_u32 - // stack: IV_i[32:], IV_i[:32], ... + // stack: IV_i[32:], IV_i[:32], retdest %shl_const(32) - // stack: IV_i[32:] << 32, IV_i[:32], ... + // stack: IV_i[32:] << 32, IV_i[:32], retdest OR - // stack: IV_i, ... + // stack: IV_i, retdest + SWAP1 + JUMP + +%macro blake2b_iv + %stack (i) -> (i, %%after) + %jump(blake2b_iv) +%%after: %endmacro -%macro blake2b_iv_i(i) - PUSH $i - %blake2b_iv -%endmacro +// Load the initial hash value (the IV, but with params XOR'd into the first word). +global blake2b_initial_hash_value: + // stack: retdest + PUSH 8 + // stack: i=8, retdest +blake2b_initial_hash_loop: + // stack: i, IV_i, ..., IV_7, retdest + %decrement + // stack: i-1, IV_i, ..., IV_7, retdest + PUSH blake2b_initial_hash_return + // stack: blake2b_initial_hash_return, i-1, IV_i, ..., IV_7, retdest + DUP2 + // stack: i-1, blake2b_initial_hash_return, i-1, IV_i, ..., IV_7, retdest + %jump(blake2b_iv) +blake2b_initial_hash_return: + // stack: IV_(i-1), i-1, IV_i, ..., IV_7, retdest + SWAP1 + // stack: i-1, IV_(i-1), IV_i, ..., IV_7, retdest + DUP1 + // stack: i-1, i-1, IV_(i-1), ..., IV_7, retdest + %jumpi(blake2b_initial_hash_loop) + // stack: i-1=0, IV_0, ..., IV_7, retdest + POP + // stack: IV_0, ..., IV_7, retdest + PUSH 0x01010040 // params: key = 00, digest_size = 64 = 0x40 + XOR + // stack: IV_0 ^ params, IV_1, IV_2, IV_3, IV_4, IV_5, IV_6, IV_7, retdest + %stack(iv: 8, ret) -> (ret, iv) + JUMP + diff --git a/evm/src/cpu/kernel/tests/hash.rs b/evm/src/cpu/kernel/tests/hash.rs index cc2c96eb..bc73ecd5 100644 --- a/evm/src/cpu/kernel/tests/hash.rs +++ b/evm/src/cpu/kernel/tests/hash.rs @@ -79,8 +79,6 @@ fn prepare_test( // Run the interpeter let result = run_interpreter_with_memory(interpreter_setup).unwrap(); - dbg!(result.stack().to_vec()); - Ok((expected, result.stack().to_vec())) } From fda2e19054e01c344e214d4ac6c580e97b3fe597 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 8 Mar 2023 12:13:38 -0800 Subject: [PATCH 09/12] restored blake2b_g_function and call_blake2b_g_function macros --- .../kernel/asm/hash/blake2b/g_functions.asm | 183 +++++++----------- 1 file changed, 68 insertions(+), 115 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm index ff3ee43a..782393de 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm @@ -1,89 +1,89 @@ -blake2b_g_function: +%macro blake2b_g_function // Function to mix two input words, x and y, into the four words indexed by a, b, c, d (which // are in the range 0..16) in the internal state. // The internal state is stored in memory starting at the address start. - // stack: a, b, c, d, x, y, start, retdest + // stack: a, b, c, d, x, y, start %stack (indices: 4) -> (indices, indices) - // stack: a, b, c, d, a, b, c, d, x, y, start, retdest + // stack: a, b, c, d, a, b, c, d, x, y, start DUP11 - // stack: start, a, b, c, d, a, b, c, d, x, y, start, retdest + // stack: start, a, b, c, d, a, b, c, d, x, y, start %stack (start, a, b, c, d) -> (d, start, c, start, b, start, a, start) - // stack: d, start, c, start, b, start, a, start, a, b, c, d, x, y, start, retdest + // stack: d, start, c, start, b, start, a, start, a, b, c, d, x, y, start ADD %mload_kernel_general - // stack: v[d], c, start, b, start, a, start, a, b, c, d, x, y, start, retdest + // stack: v[d], c, start, b, start, a, start, a, b, c, d, x, y, start %stack (vd, remaining: 6) -> (remaining, vd) - // stack: c, start, b, start, a, start, v[d], a, b, c, d, x, y, start, retdest + // stack: c, start, b, start, a, start, v[d], a, b, c, d, x, y, start ADD %mload_kernel_general %stack (vc, remaining: 4) -> (remaining, vc) - // stack: b, start, a, start, v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: b, start, a, start, v[c], v[d], a, b, c, d, x, y, start ADD %mload_kernel_general - // stack: v[b], a, start, v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: v[b], a, start, v[c], v[d], a, b, c, d, x, y, start %stack (vb, remaining: 2) -> (remaining, vb) - // stack: a, start, v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: a, start, v[b], v[c], v[d], a, b, c, d, x, y, start ADD %mload_kernel_general - // stack: v[a], v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: v[a], v[b], v[c], v[d], a, b, c, d, x, y, start DUP2 - // stack: v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start DUP10 - // stack: x, v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: x, v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start ADD ADD %as_u64 - // stack: v[a]' = (v[a] + v[b] + x) % 2^64, v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: v[a]' = (v[a] + v[b] + x) % 2^64, v[b], v[c], v[d], a, b, c, d, x, y, start %stack (a, b, c, d) -> (a, d, a, b, c, d) - // stack: v[a]', v[d], v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: v[a]', v[d], v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start XOR %rotr_64(32) - // stack: v[d]' = (v[d] ^ v[a]') >>> 32, v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: v[d]' = (v[d] ^ v[a]') >>> 32, v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start %stack (top: 4, vd) -> (top) - // stack: v[d]', v[a]', v[b], v[c], a, b, c, d, x, y, start, retdest + // stack: v[d]', v[a]', v[b], v[c], a, b, c, d, x, y, start %stack (d, a, b, c) -> (c, d, a, b, d) - // stack: v[c], v[d]', v[a]', v[b], v[d]', a, b, c, d, x, y, start, retdest + // stack: v[c], v[d]', v[a]', v[b], v[d]', a, b, c, d, x, y, start ADD %as_u64 - // stack: v[c]' = (v[c] + v[d]') % 2^64, v[a]', v[b], v[d]', a, b, c, d, x, y, start, retdest + // stack: v[c]' = (v[c] + v[d]') % 2^64, v[a]', v[b], v[d]', a, b, c, d, x, y, start %stack (c, a, b, d) -> (b, c, a, c, d) - // stack: v[b], v[c]', v[a]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: v[b], v[c]', v[a]', v[c]', v[d]', a, b, c, d, x, y, start XOR %rotr_64(24) - // stack: v[b]' = (v[b] ^ v[c]') >>> 24, v[a]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: v[b]' = (v[b] ^ v[c]') >>> 24, v[a]', v[c]', v[d]', a, b, c, d, x, y, start SWAP1 - // stack: v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start DUP2 - // stack: v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start DUP11 - // stack: y, v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: y, v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start ADD ADD %as_u64 - // stack: v[a]'' = (v[a]' + v[b]' + y) % 2^64, v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: v[a]'' = (v[a]' + v[b]' + y) % 2^64, v[b]', v[c]', v[d]', a, b, c, d, x, y, start SWAP3 - // stack: v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start DUP4 - // stack: v[a]'', v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[a]'', v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start XOR %rotr_64(16) - // stack: v[d]'' = (v[a]'' ^ v[d]') >>> 8, v[b]', v[c]', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[d]'' = (v[a]'' ^ v[d]') >>> 8, v[b]', v[c]', v[a]'', a, b, c, d, x, y, start SWAP2 - // stack: v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start DUP3 - // stack: v[d]'', v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[d]'', v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start ADD %as_u64 - // stack: v[c]'' = (v[c]' + v[d]'') % 2^64, v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[c]'' = (v[c]' + v[d]'') % 2^64, v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start DUP1 - // stack: v[c]'', v[c]'', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[c]'', v[c]'', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start SWAP2 - // stack: v[b]', v[c]'', v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[b]', v[c]'', v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start XOR %rotr_64(63) - // stack: v[b]'' = (v[b]' ^ v[c]'') >>> 7, v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[b]'' = (v[b]' ^ v[c]'') >>> 7, v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start %stack (vb, vc, vd, va, a, b, c, d, x, y, start) -> (start, a, va, start, b, vb, start, c, vc, start, d, vd) - // stack: start, a, v[a]'', start, b, v[b]'', start, c, v[c]'', start, d, v[d]'', retdest + // stack: start, a, v[a]'', start, b, v[b]'', start, c, v[c]'', start, d, v[d]'' ADD %mstore_kernel_general ADD @@ -92,100 +92,53 @@ blake2b_g_function: %mstore_kernel_general ADD %mstore_kernel_general - // stack: retdest - JUMP +%endmacro -call_blake2b_g_function: - // stack: a, b, c, d, x_idx, y_idx, round, start, retdest - DUP6 - // stack: y_idx, a, b, c, d, x_idx, y_idx, round, start, retdest - DUP8 - // stack: round, y_idx, a, b, c, d, x_idx, y_idx, round, start, retdest +%macro call_blake2b_g_function(a, b, c, d, x_idx, y_idx) + // stack: round, start + PUSH $y_idx + DUP2 + // stack: round, y_idx, round, start %blake2b_permutation - // stack: s[y_idx], a, b, c, d, x_idx, y_idx, round, start, retdest + // stack: s[y_idx], round, start %blake2b_message_addr ADD %mload_kernel_general - // stack: m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest - DUP6 - // stack: x_idx, m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest - DUP9 - // stack: round, x_idx, m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest + // stack: m[s[y_idx]], round, start + PUSH $x_idx + DUP3 + // stack: round, 2, m[s[y_idx]], round, start %blake2b_permutation - // stack: s[x_idx], m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest + // stack: s[x_idx], m[s[y_idx]], round, start %blake2b_message_addr ADD %mload_kernel_general - // stack: m[s[x_idx]], m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest - %stack (mm: 2, abcd: 4, xy: 2, r, s) -> (abcd, mm, s) - // stack: a, b, c, d, m[s[x_idx]], m[s[y_idx]], start, retdest - %jump(blake2b_g_function) + // stack: m[s[x_idx]], m[s[y_idx]], round, start + %stack (ss: 2, r, s) -> (ss, s, r, s) + // stack: m[s[x_idx]], m[s[y_idx]], start, round, start + PUSH $d + PUSH $c + PUSH $b + PUSH $a + // stack: a, b, c, d, m[s[x_idx]], m[s[y_idx]], start, round, start + %blake2b_g_function + // stack: round, start +%endmacro -global run_g_function_round: +run_g_function_round: // stack: round, start, retdest - PUSH g_function_return_1 - // stack: g_function_return_1, round, start, retdest - %stack (ret, r, s) -> (0, 4, 8, 12, 0, 1, r, s, ret, r, s) - // stack: a=0, b=4, c=8, d=12, x_idx=0, y_idx=1, round, start, g_function_return_1, round, start, retdest - %jump(call_blake2b_g_function) -g_function_return_1: - // stack: round, start, retdest - PUSH g_function_return_2 - // stack: g_function_return_2, round, start, retdest - %stack (ret, r, s) -> (1, 5, 9, 13, 2, 3, r, s, ret, r, s) - // stack: a=1, b=5, c=9, d=13, x_idx=2, y_idx=3, round, start, g_function_return_2, round, start, retdest - %jump(call_blake2b_g_function) -g_function_return_2: - // stack: round, start, retdest - PUSH g_function_return_3 - // stack: g_function_return_3, round, start, retdest - %stack (ret, r, s) -> (2, 6, 10, 14, 4, 5, r, s, ret, r, s) - // stack: a=2, b=6, c=10, d=14, x_idx=4, y_idx=5, round, start, g_function_return_3, round, start, retdest - %jump(call_blake2b_g_function) -g_function_return_3: - // stack: round, start, retdest - PUSH g_function_return_4 - // stack: g_function_return_4, round, start, retdest - %stack (ret, r, s) -> (3, 7, 11, 15, 6, 7, r, s, ret, r, s) - // stack: a=3, b=7, c=11, d=15, x_idx=6, y_idx=7, round, start, g_function_return_4, round, start, retdest - %jump(call_blake2b_g_function) -g_function_return_4: - // stack: round, start, retdest - PUSH g_function_return_5 - // stack: g_function_return_5, round, start, retdest - %stack (ret, r, s) -> (0, 5, 10, 15, 8, 9, r, s, ret, r, s) - // stack: a=0, b=5, c=10, d=15, x_idx=8, y_idx=9, round, start, g_function_return_5, round, start, retdest - %jump(call_blake2b_g_function) -g_function_return_5: - // stack: round, start, retdest - PUSH g_function_return_6 - // stack: g_function_return_6, round, start, retdest - %stack (ret, r, s) -> (1, 6, 11, 12, 10, 11, r, s, ret, r, s) - // stack: a=1, b=6, c=11, d=12, x_idx=10, y_idx=11, round, start, g_function_return_6, round, start, retdest - %jump(call_blake2b_g_function) -g_function_return_6: - // stack: round, start, retdest - PUSH g_function_return_7 - // stack: g_function_return_7, round, start, retdest - %stack (ret, r, s) -> (2, 7, 8, 13, 12, 13, r, s, ret, r, s) - // stack: a=2, b=7, c=8, d=13, x_idx=12, y_idx=13, round, start, g_function_return_7, round, start, retdest - %jump(call_blake2b_g_function) -g_function_return_7: - // stack: round, start, retdest - PUSH g_function_return_8 - // stack: g_function_return_8, round, start, retdest - %stack (ret, r, s) -> (3, 4, 9, 14, 14, 15, r, s, ret, r, s) - // stack: a=3, b=4, c=9, d=14, x_idx=14, y_idx=15, round, start, g_function_return_8, round, start, retdest - %jump(call_blake2b_g_function) -g_function_return_8: - // stack: round, start, retdest - SWAP1 - // stack: start, round, retdest - SWAP2 + %call_blake2b_g_function(0, 4, 8, 12, 0, 1) + %call_blake2b_g_function(1, 5, 9, 13, 2, 3) + %call_blake2b_g_function(2, 6, 10, 14, 4, 5) + %call_blake2b_g_function(3, 7, 11, 15, 6, 7) + %call_blake2b_g_function(0, 5, 10, 15, 8, 9) + %call_blake2b_g_function(1, 6, 11, 12, 10, 11) + %call_blake2b_g_function(2, 7, 8, 13, 12, 13) + %call_blake2b_g_function(3, 4, 9, 14, 14, 15) + %stack (r, s, ret) -> (ret, r, s) // stack: retdest, round, start JUMP - global run_12_rounds_g_function: // stack: start, retdest PUSH 0 From 33ccf8987ea4bd346f2619f35ac1f71b05276781 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Wed, 8 Mar 2023 12:34:50 -0800 Subject: [PATCH 10/12] small optimizations --- .../kernel/asm/hash/blake2b/g_functions.asm | 32 ++++++++++++------- .../kernel/asm/hash/blake2b/permutations.asm | 25 +++++++++++---- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm index 782393de..f2d3b1d2 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm @@ -3,28 +3,38 @@ // are in the range 0..16) in the internal state. // The internal state is stored in memory starting at the address start. // stack: a, b, c, d, x, y, start - %stack (indices: 4) -> (indices, indices) + DUP4 + DUP4 + DUP4 + DUP4 // stack: a, b, c, d, a, b, c, d, x, y, start DUP11 // stack: start, a, b, c, d, a, b, c, d, x, y, start - %stack (start, a, b, c, d) -> (d, start, c, start, b, start, a, start) - // stack: d, start, c, start, b, start, a, start, a, b, c, d, x, y, start ADD %mload_kernel_general - // stack: v[d], c, start, b, start, a, start, a, b, c, d, x, y, start - %stack (vd, remaining: 6) -> (remaining, vd) - // stack: c, start, b, start, a, start, v[d], a, b, c, d, x, y, start + // stack: v[a], b, c, d, a, b, c, d, x, y, start + SWAP1 + // stack: b, v[a], c, d, a, b, c, d, x, y, start + DUP11 + // stack: start, b, v[a], c, d, a, b, c, d, x, y, start ADD %mload_kernel_general - %stack (vc, remaining: 4) -> (remaining, vc) - // stack: b, start, a, start, v[c], v[d], a, b, c, d, x, y, start + // stack: v[b], v[a], c, d, a, b, c, d, x, y, start + SWAP2 + // stack: c, v[a], v[b], d, a, b, c, d, x, y, start + DUP11 + // stack: start, c, v[a], v[b], d, a, b, c, d, x, y, start ADD %mload_kernel_general - // stack: v[b], a, start, v[c], v[d], a, b, c, d, x, y, start - %stack (vb, remaining: 2) -> (remaining, vb) - // stack: a, start, v[b], v[c], v[d], a, b, c, d, x, y, start + // stack: v[c], v[a], v[b], d, a, b, c, d, x, y, start + SWAP3 + // stack: d, v[a], v[b], v[c], a, b, c, d, x, y, start + DUP11 + // stack: start, d, v[a], v[b], v[c], a, b, c, d, x, y, start ADD %mload_kernel_general + // stack: v[d], v[a], v[b], v[c], a, b, c, d, x, y, start + %stack (vd, vs: 3) -> (vs, vd) // stack: v[a], v[b], v[c], v[d], a, b, c, d, x, y, start DUP2 // stack: v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm index 5277e611..1ef455f1 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/permutations.asm @@ -58,17 +58,28 @@ global permutation_9_constants: BYTES 15, 11, 9, 14 BYTES 3, 12, 13, 0 -%macro blake2b_permutation - // stack: round, i +global blake2b_permutation: + // stack: i, round, retdest PUSH permutation_0_constants - // stack: permutation_0_constants, round, i - SWAP1 - // stack: round, permutation_1_constants, i + // stack: permutation_0_constants, i, round, retdest + SWAP2 + // stack: round, i, permutation_0_constants, retdest %mod_const(10) - // stack: round % 10, permutation_1_constants, i + // stack: round % 10, i, permutation_0_constants, retdest %mul_const(16) ADD - // stack: permutation_(round)_constants, i ADD %mload_kernel_code + // stack: permutation_(round%10)_constants[i], retdest + SWAP1 + JUMP + +%macro blake2b_permutation + // stack: round, i + PUSH %%after + // stack: %%after, round, i + SWAP2 + // stack: i, round, %%after + %jump(blake2b_permutation) +%%after: %endmacro From 476a554aea93dc1f399cf87ea4900b6fb780cf42 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 14 Mar 2023 12:29:30 -0700 Subject: [PATCH 11/12] OR -> ADD --- .../kernel/asm/curve/secp256k1/ecrecover.asm | 14 ++++------ evm/src/cpu/kernel/asm/hash/blake2b/iv.asm | 2 +- .../cpu/kernel/asm/hash/ripemd/functions.asm | 2 +- evm/src/cpu/kernel/asm/hash/ripemd/main.asm | 8 +++--- .../cpu/kernel/asm/hash/sha2/compression.asm | 14 +++++----- evm/src/cpu/kernel/asm/memory/core.asm | 28 +++++++++---------- evm/src/cpu/kernel/asm/util/basic_macros.asm | 10 +++---- 7 files changed, 37 insertions(+), 41 deletions(-) diff --git a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm index dd2b86f6..c243a748 100644 --- a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm +++ b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm @@ -134,17 +134,13 @@ pubkey_to_addr: // stack: hash, v, r, s, retdest DUP2 // stack: v, hash, v, r, s, retdest - PUSH 27 - // stack: 27, v, hash, v, r, s, retdest - EQ + %eq_consts(27) // stack: v==27, hash, v, r, s, retdest DUP3 // stack: v, v==27, hash, v, r, s, retdest - PUSH 28 - // stack: 28, v, v==27, hash, v, r, s, retdest - EQ + %eq_consts(28) // stack: v==28, v==27, hash, v, r, s, retdest - OR + ADD // OR // stack: (v==28 || v==27), hash, v, r, s, retdest DUP5 // stack: s, (v==28 || v==27), hash, v, r, s, retdest @@ -154,7 +150,7 @@ pubkey_to_addr: // stack: r, (s >= N || s==0), (v==28 || v==27), hash, v, r, s, retdest %secp_is_out_of_bounds // stack: (r >= N || r==0), (s >= N || s==0), (v==28 || v==27), hash, v, r, s, retdest - OR + ADD // OR // stack: (r >= N || r==0 || s >= N || s==0), (v==28 || v==27), hash, v, r, s, retdest ISZERO // stack: (r < N & r!=0 & s < N & s!=0), (v==28 || v==27), hash, v, r, s, retdest @@ -178,7 +174,7 @@ pubkey_to_addr: // stack: x < N, x==0 ISZERO // stack: x >= N, x==0 - OR + ADD // OR // stack: x >= N || x==0 %endmacro diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm b/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm index 174afd33..94e9ba27 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/iv.asm @@ -52,7 +52,7 @@ global blake2b_iv_const: // stack: IV_i[32:], IV_i[:32], ... %shl_const(32) // stack: IV_i[32:] << 32, IV_i[:32], ... - OR + ADD // OR // stack: IV_i, ... %endmacro diff --git a/evm/src/cpu/kernel/asm/hash/ripemd/functions.asm b/evm/src/cpu/kernel/asm/hash/ripemd/functions.asm index ac111215..de2fdcf6 100644 --- a/evm/src/cpu/kernel/asm/hash/ripemd/functions.asm +++ b/evm/src/cpu/kernel/asm/hash/ripemd/functions.asm @@ -18,7 +18,7 @@ global rol: // stack: x << n, x >> (32-n), retdest %as_u32 // stack: u32(x << n), x >> (32-n), retdest - OR + ADD // OR // stack: u32(x << n) | (x >> (32-n)), retdest SWAP1 JUMP diff --git a/evm/src/cpu/kernel/asm/hash/ripemd/main.asm b/evm/src/cpu/kernel/asm/hash/ripemd/main.asm index 4aa6a6ad..0ad1e4e5 100644 --- a/evm/src/cpu/kernel/asm/hash/ripemd/main.asm +++ b/evm/src/cpu/kernel/asm/hash/ripemd/main.asm @@ -81,21 +81,21 @@ process: SWAP1 %reverse_bytes_u32 %shl_const(96) - OR + ADD // OR // stack: b' a', c, d, e, VARS SWAP1 %reverse_bytes_u32 %shl_const(64) - OR + ADD // OR // stack: c' b' a', d, e, VARS SWAP1 %reverse_bytes_u32 %shl_const(32) - OR + ADD // OR // stack: d' c' b' a', e, VARS SWAP1 %reverse_bytes_u32 - OR + ADD // OR // stack: e' d' c' b' a', VARS %stack (result, VARS: 3, retdest) -> (retdest, result) // stack: 0xdeadbeef, result diff --git a/evm/src/cpu/kernel/asm/hash/sha2/compression.asm b/evm/src/cpu/kernel/asm/hash/sha2/compression.asm index 8850c1c8..8c219ebb 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/compression.asm @@ -263,19 +263,19 @@ compression_end: POP // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest %shl_const(32) - OR + ADD // OR %shl_const(32) - OR + ADD // OR %shl_const(32) - OR + ADD // OR %shl_const(32) - OR + ADD // OR %shl_const(32) - OR + ADD // OR %shl_const(32) - OR + ADD // OR %shl_const(32) - OR + ADD // OR // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest SWAP3 // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index a979f930..8f59a128 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -79,21 +79,21 @@ DUP2 %increment %mload_kernel($segment) - OR + ADD // OR // stack: (c_3 << 8) | c_2, offset %shl_const(8) // stack: ((c_3 << 8) | c_2) << 8, offset DUP2 %add_const(2) %mload_kernel($segment) - OR + ADD // OR // stack: (((c_3 << 8) | c_2) << 8) | c_1, offset %shl_const(8) // stack: ((((c_3 << 8) | c_2) << 8) | c_1) << 8, offset SWAP1 %add_const(3) %mload_kernel($segment) - OR + ADD // OR // stack: (((((c_3 << 8) | c_2) << 8) | c_1) << 8) | c_0 %endmacro @@ -107,19 +107,19 @@ %increment %mload_kernel($segment) %shl_const(8) - OR + ADD // stack: c0 | (c1 << 8) , offset DUP2 %add_const(2) %mload_kernel($segment) %shl_const(16) - OR + ADD // stack: c0 | (c1 << 8) | (c2 << 16), offset SWAP1 %add_const(3) %mload_kernel($segment) %shl_const(24) - OR + ADD // OR // stack: c0 | (c1 << 8) | (c2 << 16) | (c3 << 24) %endmacro @@ -137,7 +137,7 @@ // stack: hi, lo %shl_const(32) // stack: hi << 32, lo - OR + ADD // OR // stack: (hi << 32) | lo %endmacro @@ -152,49 +152,49 @@ DUP2 %add_const(4) %mload_kernel_u32($segment) - OR + ADD // OR // stack: (c_7 << 32) | c_6, offset %shl_const(32) // stack: ((c_7 << 32) | c_6) << 32, offset DUP2 %add_const(8) %mload_kernel_u32($segment) - OR + ADD // OR // stack: (c_7 << 64) | (c_6 << 32) | c_5, offset %shl_const(32) // stack: ((c_7 << 64) | (c_6 << 32) | c_5) << 32, offset DUP2 %add_const(12) %mload_kernel_u32($segment) - OR + ADD // OR // stack: (c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4, offset %shl_const(32) // stack: ((c_7 << 96) | (c_6 << 64) | (c_5 << 32) | c_4) << 32, offset DUP2 %add_const(16) %mload_kernel_u32($segment) - OR + ADD // OR // stack: (c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3, offset %shl_const(32) // stack: ((c_7 << 128) | (c_6 << 96) | (c_5 << 64) | (c_4 << 32) | c_3) << 32, offset DUP2 %add_const(20) %mload_kernel_u32($segment) - OR + ADD // OR // stack: (c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2, offset %shl_const(32) // stack: ((c_7 << 160) | (c_6 << 128) | (c_5 << 96) | (c_4 << 64) | (c_3 << 32) | c_2) << 32, offset DUP2 %add_const(24) %mload_kernel_u32($segment) - OR + ADD // OR // stack: (c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1, offset %shl_const(32) // stack: ((c_7 << 192) | (c_6 << 160) | (c_5 << 128) | (c_4 << 96) | (c_3 << 64) | (c_2 << 32) | c_1) << 32, offset DUP2 %add_const(28) %mload_kernel_u32($segment) - OR + ADD // OR // stack: (c_7 << 224) | (c_6 << 192) | (c_5 << 160) | (c_4 << 128) | (c_3 << 96) | (c_2 << 64) | (c_1 << 32) | c_0, offset SWAP1 POP diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 5ee4a1e7..52556d3f 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -309,9 +309,9 @@ BYTE %shl_const(24) // stack: d000, b0, a, c00 - OR - OR - OR + ADD // OR + ADD // OR + ADD // OR // stack: dcba %endmacro @@ -332,7 +332,7 @@ %reverse_bytes_u32 // stack: word_lo_inverted, word_hi_inverted %shl_const(32) - OR + ADD // OR // stack: word_inverted %endmacro @@ -341,7 +341,7 @@ // stack: a, b, c, d %rep 3 %shl_const(64) - OR + ADD // OR %endrep // stack: a || b || c || d %endmacro From 676a483c06c4ec004def5f9ed45eacddb7d34310 Mon Sep 17 00:00:00 2001 From: Nicholas Ward Date: Tue, 14 Mar 2023 12:34:51 -0700 Subject: [PATCH 12/12] fix --- evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm index c243a748..11ec27c8 100644 --- a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm +++ b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm @@ -134,11 +134,11 @@ pubkey_to_addr: // stack: hash, v, r, s, retdest DUP2 // stack: v, hash, v, r, s, retdest - %eq_consts(27) + %eq_const(27) // stack: v==27, hash, v, r, s, retdest DUP3 // stack: v, v==27, hash, v, r, s, retdest - %eq_consts(28) + %eq_const(28) // stack: v==28, v==27, hash, v, r, s, retdest ADD // OR // stack: (v==28 || v==27), hash, v, r, s, retdest