diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm index f62d309c..f2d3b1d2 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/g_functions.asm @@ -1,4 +1,4 @@ -blake2b_g_function: +%macro blake2b_g_function // Function to mix two input words, x and y, into the four words indexed by a, b, c, d (which // are in the range 0..16) in the internal state. // The internal state is stored in memory starting at the address start. @@ -37,63 +37,63 @@ blake2b_g_function: %stack (vd, vs: 3) -> (vs, vd) // stack: v[a], v[b], v[c], v[d], a, b, c, d, x, y, start DUP2 - // stack: v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start DUP10 - // stack: x, v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: x, v[b], v[a], v[b], v[c], v[d], a, b, c, d, x, y, start ADD ADD %as_u64 - // stack: v[a]' = (v[a] + v[b] + x) % 2^64, v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: v[a]' = (v[a] + v[b] + x) % 2^64, v[b], v[c], v[d], a, b, c, d, x, y, start %stack (a, b, c, d) -> (a, d, a, b, c, d) - // stack: v[a]', v[d], v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: v[a]', v[d], v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start XOR %rotr_64(32) - // stack: v[d]' = (v[d] ^ v[a]') >>> 32, v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start, retdest + // stack: v[d]' = (v[d] ^ v[a]') >>> 32, v[a]', v[b], v[c], v[d], a, b, c, d, x, y, start %stack (top: 4, vd) -> (top) - // stack: v[d]', v[a]', v[b], v[c], a, b, c, d, x, y, start, retdest + // stack: v[d]', v[a]', v[b], v[c], a, b, c, d, x, y, start %stack (d, a, b, c) -> (c, d, a, b, d) - // stack: v[c], v[d]', v[a]', v[b], v[d]', a, b, c, d, x, y, start, retdest + // stack: v[c], v[d]', v[a]', v[b], v[d]', a, b, c, d, x, y, start ADD %as_u64 - // stack: v[c]' = (v[c] + v[d]') % 2^64, v[a]', v[b], v[d]', a, b, c, d, x, y, start, retdest + // stack: v[c]' = (v[c] + v[d]') % 2^64, v[a]', v[b], v[d]', a, b, c, d, x, y, start %stack (c, a, b, d) -> (b, c, a, c, d) - // stack: v[b], v[c]', v[a]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: v[b], v[c]', v[a]', v[c]', v[d]', a, b, c, d, x, y, start XOR %rotr_64(24) - // stack: v[b]' = (v[b] ^ v[c]') >>> 24, v[a]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: v[b]' = (v[b] ^ v[c]') >>> 24, v[a]', v[c]', v[d]', a, b, c, d, x, y, start SWAP1 - // stack: v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start DUP2 - // stack: v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start DUP11 - // stack: y, v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: y, v[b]', v[a]', v[b]', v[c]', v[d]', a, b, c, d, x, y, start ADD ADD %as_u64 - // stack: v[a]'' = (v[a]' + v[b]' + y) % 2^64, v[b]', v[c]', v[d]', a, b, c, d, x, y, start, retdest + // stack: v[a]'' = (v[a]' + v[b]' + y) % 2^64, v[b]', v[c]', v[d]', a, b, c, d, x, y, start SWAP3 - // stack: v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start DUP4 - // stack: v[a]'', v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[a]'', v[d]', v[b]', v[c]', v[a]'', a, b, c, d, x, y, start XOR %rotr_64(16) - // stack: v[d]'' = (v[a]'' ^ v[d]') >>> 8, v[b]', v[c]', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[d]'' = (v[a]'' ^ v[d]') >>> 8, v[b]', v[c]', v[a]'', a, b, c, d, x, y, start SWAP2 - // stack: v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start DUP3 - // stack: v[d]'', v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[d]'', v[c]', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start ADD %as_u64 - // stack: v[c]'' = (v[c]' + v[d]'') % 2^64, v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[c]'' = (v[c]' + v[d]'') % 2^64, v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start DUP1 - // stack: v[c]'', v[c]'', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[c]'', v[c]'', v[b]', v[d]'', v[a]'', a, b, c, d, x, y, start SWAP2 - // stack: v[b]', v[c]'', v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[b]', v[c]'', v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start XOR %rotr_64(63) - // stack: v[b]'' = (v[b]' ^ v[c]'') >>> 7, v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start, retdest + // stack: v[b]'' = (v[b]' ^ v[c]'') >>> 7, v[c]'', v[d]'', v[a]'', a, b, c, d, x, y, start %stack (vb, vc, vd, va, a, b, c, d, x, y, start) -> (start, a, va, start, b, vb, start, c, vc, start, d, vd) - // stack: start, a, v[a]'', start, b, v[b]'', start, c, v[c]'', start, d, v[d]'', retdest + // stack: start, a, v[a]'', start, b, v[b]'', start, c, v[c]'', start, d, v[d]'' ADD %mstore_kernel_general ADD @@ -102,27 +102,24 @@ blake2b_g_function: %mstore_kernel_general ADD %mstore_kernel_general - // stack: retdest - JUMP +%endmacro -call_blake2b_g_function: - // stack: a, b, c, d, x_idx, y_idx, round, start, retdest - DUP6 - // stack: y_idx, a, b, c, d, x_idx, y_idx, round, start, retdest - DUP8 - // stack: round, y_idx, a, b, c, d, x_idx, y_idx, round, start, retdest +%macro call_blake2b_g_function(a, b, c, d, x_idx, y_idx) + // stack: round, start + PUSH $y_idx + DUP2 + // stack: round, y_idx, round, start %blake2b_permutation - // stack: s[y_idx], a, b, c, d, x_idx, y_idx, round, start, retdest + // stack: s[y_idx], round, start %blake2b_message_addr ADD %mload_kernel_general - // stack: m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest - DUP6 - // stack: x_idx, m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest - DUP9 - // stack: round, x_idx, m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest + // stack: m[s[y_idx]], round, start + PUSH $x_idx + DUP3 + // stack: round, 2, m[s[y_idx]], round, start %blake2b_permutation - // stack: s[x_idx], m[s[y_idx]], a, b, c, d, x_idx, y_idx, round, start, retdest + // stack: s[x_idx], m[s[y_idx]], round, start %blake2b_message_addr ADD %mload_kernel_general