diff --git a/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm b/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm index 9db1c70f..01c02715 100644 --- a/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm +++ b/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm @@ -17,7 +17,6 @@ global precompile_blake2_f: // Load inputs from calldata memory into stack. - %calldatasize // stack: calldatasize, blake2_f_contd, kexit_info DUP1 @@ -42,22 +41,22 @@ global precompile_blake2_f: %sub_const(8) // stack: t1_addr=flag_addr-8, flag, blake2_f_contd, kexit_info - %stack (t1_addr) -> (@SEGMENT_CALLDATA, t1_addr, 8, t1_addr) - // stack: @SEGMENT_CALLDATA, t1_addr, 8, t1_addr, flag, blake2_f_contd, kexit_info + %stack (t1_addr) -> (@SEGMENT_CALLDATA, t1_addr, t1_addr) + // stack: @SEGMENT_CALLDATA, t1_addr, t1_addr, flag, blake2_f_contd, kexit_info GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, t1_addr, 8, t1_addr, flag, blake2_f_contd, kexit_info - %mload_packing_LE + // stack: ctx, @SEGMENT_CALLDATA, t1_addr, t1_addr, flag, blake2_f_contd, kexit_info + %mload_packing_u64_LE // stack: t_1, t1_addr, flag, blake2_f_contd, kexit_info SWAP1 // stack: t1_addr, t_1, flag, blake2_f_contd, kexit_info %sub_const(8) // stack: t0_addr=t1_addr-8, t_1, flag, blake2_f_contd, kexit_info - %stack (t0_addr) -> (@SEGMENT_CALLDATA, t0_addr, 8, t0_addr) - // stack: @SEGMENT_CALLDATA, t0_addr, 8, t0_addr, t_1, flag, blake2_f_contd, kexit_info + %stack (t0_addr) -> (@SEGMENT_CALLDATA, t0_addr, t0_addr) + // stack: @SEGMENT_CALLDATA, t0_addr, t0_addr, t_1, flag, blake2_f_contd, kexit_info GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, t0_addr, 8, t0_addr, t_1, flag, blake2_f_contd, kexit_info - %mload_packing_LE + // stack: ctx, @SEGMENT_CALLDATA, t0_addr, t0_addr, t_1, flag, blake2_f_contd, kexit_info + %mload_packing_u64_LE // stack: t_0, t0_addr, t_1, flag, blake2_f_contd, kexit_info SWAP1 // stack: t0_addr = m0_addr + 8 * 16, t_0, t_1, flag, blake2_f_contd, kexit_info @@ -66,15 +65,13 @@ global precompile_blake2_f: // stack: m0_addr + 8 * (16 - i), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %sub_const(8) // stack: m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - PUSH 8 - // stack: 8, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - DUP2 - // stack: m0_addr + 8 * (16 - i - 1), 8, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + DUP1 + // stack: m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info PUSH @SEGMENT_CALLDATA - // stack: @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), 8, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), 8, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - %mload_packing_LE + // stack: ctx, @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + %mload_packing_u64_LE // stack: m_i, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info SWAP1 // stack: m0_addr + 8 * (16 - i - 1), m_i, m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info @@ -85,15 +82,13 @@ global precompile_blake2_f: // stack: h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %sub_const(8) // stack: h0_addr + 8 * (8 - i - 1), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - PUSH 8 - // stack: 8, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - DUP2 - // stack: h0_addr + 8 * (8 - i), 8, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + DUP1 + // stack: h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info PUSH @SEGMENT_CALLDATA - // stack: @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), 8, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), 8, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info - %mload_packing_LE + // stack: ctx, @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + %mload_packing_u64_LE // stack: h_i, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info SWAP1 // stack: h0_addr + 8 * (8 - i), h_i, h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info @@ -115,10 +110,9 @@ global precompile_blake2_f: %jump(blake2_f) blake2_f_contd: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', kexit_info - // Store the result hash to the parent's return data using `mstore_unpacking_LE`. - %pop8 + // Store the result hash to the parent's return data using `mstore_unpacking_u64_LE`. - %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 32) + %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 64) PUSH 0 // stack: addr_0=0, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', kexit_info %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT) @@ -126,11 +120,10 @@ blake2_f_contd: %rep 8 // stack: parent_ctx, addr_i, h_i', ..., h_7', kexit_info - %stack (ctx, addr, h_i) -> (ctx, @SEGMENT_RETURNDATA, addr, h_i, 4, addr, ctx) - // stack: parent_ctx, @SEGMENT_RETURNDATA, addr_i, h_i', 4, addr_i, parent_ctx, h_(i+1)', ..., h_7', kexit_info - %mstore_unpacking_LE + %stack (ctx, addr, h_i) -> (ctx, @SEGMENT_RETURNDATA, addr, h_i, addr, ctx) + %mstore_unpacking_u64_LE // stack: addr_i, parent_ctx, h_(i+1)', ..., h_7', kexit_info - %add_const(4) + %add_const(8) // stack: addr_(i+1), parent_ctx, h_(i+1)', ..., h_7', kexit_info SWAP1 // stack: parent_ctx, addr_(i+1), h_(i+1)', ..., h_7', kexit_info diff --git a/evm/src/cpu/kernel/asm/memory/packing.asm b/evm/src/cpu/kernel/asm/memory/packing.asm index edb25a4c..0f802335 100644 --- a/evm/src/cpu/kernel/asm/memory/packing.asm +++ b/evm/src/cpu/kernel/asm/memory/packing.asm @@ -49,56 +49,22 @@ mload_packing_return: %%after: %endmacro -global mload_packing_LE: - // stack: context, segment, offset, len, retdest - SWAP2 - // stack: offset, segment, context, len, retdest - DUP4 - // stack: len, offset, segment, context, len, retdest - ADD - %decrement - // stack: end_offset, segment, context, len, retdest - SWAP2 - // stack: context, segment, end_offset, len, retdest - DUP3 DUP3 DUP3 MLOAD_GENERAL DUP5 %eq_const(1) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(1) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(2) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(2) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(3) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(3) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(4) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(4) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(5) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(5) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(6) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(6) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(7) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(7) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(8) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(8) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(9) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(9) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(10) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(10) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(11) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(11) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(12) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(12) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(13) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(13) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(14) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(14) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(15) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(15) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(16) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(16) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(17) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(17) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(18) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(18) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(19) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(19) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(20) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(20) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(21) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(21) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(22) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(22) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(23) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(23) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(24) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(24) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(25) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(25) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(26) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(26) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(27) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(27) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(28) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(28) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(29) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(29) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(30) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(30) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(31) %jumpi(mload_packing_LE_return) %shl_const(8) - DUP4 %sub_const(31) DUP4 DUP4 MLOAD_GENERAL ADD -mload_packing_LE_return: - %stack (packed_value, addr: 3, len, retdest) -> (retdest, packed_value) +global mload_packing_u64_LE: + // stack: context, segment, offset, retdest + DUP3 DUP3 DUP3 MLOAD_GENERAL + DUP4 %add_const(1) DUP4 DUP4 MLOAD_GENERAL %shl_const( 8) ADD + DUP4 %add_const(2) DUP4 DUP4 MLOAD_GENERAL %shl_const(16) ADD + DUP4 %add_const(3) DUP4 DUP4 MLOAD_GENERAL %shl_const(24) ADD + DUP4 %add_const(4) DUP4 DUP4 MLOAD_GENERAL %shl_const(32) ADD + DUP4 %add_const(5) DUP4 DUP4 MLOAD_GENERAL %shl_const(40) ADD + DUP4 %add_const(6) DUP4 DUP4 MLOAD_GENERAL %shl_const(48) ADD + DUP4 %add_const(7) DUP4 DUP4 MLOAD_GENERAL %shl_const(56) ADD + %stack (value, context, segment, offset, retdest) -> (retdest, value) JUMP -%macro mload_packing_LE - %stack (addr: 3, len) -> (addr, len, %%after) - %jump(mload_packing_LE) +%macro mload_packing_u64_LE + %stack (addr: 3) -> (addr, %%after) + %jump(mload_packing_u64_LE) %%after: %endmacro @@ -148,55 +114,37 @@ mstore_unpacking_finish: %%after: %endmacro -// Pre stack: context, segment, offset, value, len, retdest +// Pre stack: context, segment, offset, value, retdest // Post stack: offset' -global mstore_unpacking_LE: - // stack: context, segment, offset, value, len, retdest - SWAP2 - // stack: offset, segment, context, value, len, retdest - DUP5 - // stack: len, offset, segment, context, value, len, retdest - ADD - // stack: end_offset, segment, context, value, len, retdest - SWAP2 - // stack: context, segment, end_offset, value, len, retdest +global mstore_unpacking_u64_LE: + %stack (context, segment, offset, value) -> (0xff, value, context, segment, offset, value) + AND + DUP4 DUP4 DUP4 MSTORE_GENERAL // First byte + %stack (context, segment, offset, value) -> (0xff00, value, context, segment, offset, value) + AND %shr_const(8) + DUP4 %add_const(1) DUP4 DUP4 MSTORE_GENERAL // Second byte + %stack (context, segment, offset, value) -> (0xff0000, value, context, segment, offset, value) + AND %shr_const(16) + DUP4 %add_const(2) DUP4 DUP4 MSTORE_GENERAL // Third byte + %stack (context, segment, offset, value) -> (0xff000000, value, context, segment, offset, value) + AND %shr_const(24) + DUP4 %add_const(3) DUP4 DUP4 MSTORE_GENERAL // Fourth byte + %stack (context, segment, offset, value) -> (0xff00000000, value, context, segment, offset, value) + AND %shr_const(32) + DUP4 %add_const(4) DUP4 DUP4 MSTORE_GENERAL // Fifth byte + %stack (context, segment, offset, value) -> (0xff0000000000, value, context, segment, offset, value) + AND %shr_const(40) + DUP4 %add_const(5) DUP4 DUP4 MSTORE_GENERAL // Sixth byte + %stack (context, segment, offset, value) -> (0xff000000000000, value, context, segment, offset, value) + AND %shr_const(48) + DUP4 %add_const(6) DUP4 DUP4 MSTORE_GENERAL // Seventh byte + %stack (context, segment, offset, value) -> (0xff00000000000000, value, context, segment, offset, value) + AND %shr_const(56) + DUP4 %add_const(7) DUP4 DUP4 MSTORE_GENERAL // Eighth byte + %pop4 JUMP - PUSH 0 - // stack: i=0, context, segment, end_offset, value, len, retdest - -mstore_unpacking_LE_loop: - // stack: i, context, segment, offset, value, len, retdest - // If i == len, finish. - DUP6 - DUP2 - EQ - %jumpi(mstore_unpacking_LE_finish) - - // stack: i, context, segment, offset, value, len, retdest - DUP5 // value - DUP2 // i - BYTE - // stack: value[i], i, context, segment, offset, value, len, retdest - DUP5 DUP5 DUP5 // context, segment, offset - // stack: context, segment, offset, value[i], i, context, segment, offset, value, len, retdest - MSTORE_GENERAL - // stack: i, context, segment, offset, value, len, retdest - - // Decrement offset. - SWAP3 %decrement SWAP3 - // Increment i. - %increment - - %jump(mstore_unpacking_LE_loop) - -mstore_unpacking_LE_finish: - // stack: i, context, segment, offset, value, len, retdest - %pop3 - %stack (offset, value, len, retdest) -> (retdest, offset) - JUMP - -%macro mstore_unpacking_LE - %stack (addr: 3, value, len) -> (addr, value, len, %%after) - %jump(mstore_unpacking_LE) +%macro mstore_unpacking_u64_LE + %stack (addr: 3, value) -> (addr, value, %%after) + %jump(mstore_unpacking_u64_LE) %%after: %endmacro