diff --git a/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm b/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm index 778d82ce..b1b30767 100644 --- a/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm +++ b/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm @@ -17,6 +17,7 @@ global precompile_blake2_f: // Load inputs from calldata memory into stack. + %calldatasize // stack: calldatasize, blake2_f_contd, kexit_info DUP1 @@ -45,7 +46,7 @@ global precompile_blake2_f: // stack: @SEGMENT_CALLDATA, t1_addr, 8, t1_addr, flag, blake2_f_contd, kexit_info GET_CONTEXT // stack: ctx, @SEGMENT_CALLDATA, t1_addr, 8, t1_addr, flag, blake2_f_contd, kexit_info - %mload_packing + %mload_packing_LE // stack: t_1, t1_addr, flag, blake2_f_contd, kexit_info SWAP1 // stack: t1_addr, t_1, flag, blake2_f_contd, kexit_info @@ -56,67 +57,66 @@ global precompile_blake2_f: // stack: @SEGMENT_CALLDATA, t0_addr, 8, t0_addr, t_1, flag, blake2_f_contd, kexit_info GET_CONTEXT // stack: ctx, @SEGMENT_CALLDATA, t0_addr, 8, t0_addr, t_1, flag, blake2_f_contd, kexit_info - %mload_packing + %mload_packing_LE // stack: t_0, t0_addr, t_1, flag, blake2_f_contd, kexit_info SWAP1 - // stack: t0_addr, t_0, t_1, flag, blake2_f_contd, kexit_info - %sub_const(128) // 16 * 8 - // stack: m0_addr=t0_addr-128, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: t0_addr = m0_addr + 8 * 16, t_0, t_1, flag, blake2_f_contd, kexit_info %rep 16 - // stack: 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: m0_addr + 8 * (16 - i), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + %sub_const(8) + // stack: m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info PUSH 8 - // stack: 8, 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: 8, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info DUP2 - // stack: 68 + 8 * i, 8, 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: m0_addr + 8 * (16 - i - 1), 8, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info PUSH @SEGMENT_CALLDATA - // stack: @SEGMENT_CALLDATA, 68 + 8 * i, 8, 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), 8, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 68 + 8 * i, 8, 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %mload_packing - // stack: m_i, 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: ctx, @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), 8, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + %mload_packing_LE + // stack: m_i, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info SWAP1 - // stack: 68 + 8 * i, m_i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %add_const(8) + // stack: m0_addr + 8 * (16 - i - 1), m_i, m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %endrep - // stack: 68 + 8 * 16 = 196, m_15, ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %sub_const(192) // 16 * 8 (m values) + 8 * 8 (h values) - // stack: h0_addr, m_15, ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: m0_addr = h0_addr + 8 * 8, m_0, ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %rep 8 - // stack: 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + %sub_const(8) + // stack: h0_addr + 8 * (8 - i - 1), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info PUSH 8 - // stack: 8, 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: 8, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info DUP2 - // stack: 4 + 8 * i, 8, 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: h0_addr + 8 * (8 - i), 8, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info PUSH @SEGMENT_CALLDATA - // stack: @SEGMENT_CALLDATA, 4 + 8 * i, 8, 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), 8, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 4 + 8 * i, 8, 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %mload_packing - // stack: h_i, 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: ctx, @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), 8, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + %mload_packing_LE + // stack: h_i, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info SWAP1 - // stack: 4 + 8 * i, h_i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %add_const(8) + // stack: h0_addr + 8 * (8 - i), h_i, h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %endrep - // stack: 4 + 8 * 8 = 68, h_7, ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: h0_addr + 8 * 8 = 68, h_0, ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info POP %stack () -> (@SEGMENT_CALLDATA, 0, 4) GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 0, 4, h_7..h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: ctx, @SEGMENT_CALLDATA, 0, 4, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %mload_packing - // stack: rounds, h_7..h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info DUP1 - // stack: rounds, rounds, h_7..h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %charge_gas + // stack: rounds, rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + charge_gas - // stack: rounds, h_7..h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %jump(blake2_f) blake2_f_contd: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', kexit_info - // Store the result hash to the parent's return data using `mstore_unpacking`. + // Store the result hash to the parent's return data using `mstore_unpacking_LE`. + %pop8 %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 32) PUSH 0 @@ -128,7 +128,7 @@ blake2_f_contd: // stack: parent_ctx, addr_i, h_i', ..., h_7', kexit_info %stack (ctx, addr, h_i) -> (ctx, @SEGMENT_RETURNDATA, addr, h_i, 4, addr, ctx) // stack: parent_ctx, @SEGMENT_RETURNDATA, addr_i, h_i', 4, addr_i, parent_ctx, h_(i+1)', ..., h_7', kexit_info - %mstore_unpacking + %mstore_unpacking_LE // stack: addr_i, parent_ctx, h_(i+1)', ..., h_7', kexit_info %add_const(4) // stack: addr_(i+1), parent_ctx, h_(i+1)', ..., h_7', kexit_info diff --git a/evm/src/cpu/kernel/asm/memory/packing.asm b/evm/src/cpu/kernel/asm/memory/packing.asm index a224fbb4..edb25a4c 100644 --- a/evm/src/cpu/kernel/asm/memory/packing.asm +++ b/evm/src/cpu/kernel/asm/memory/packing.asm @@ -49,6 +49,59 @@ mload_packing_return: %%after: %endmacro +global mload_packing_LE: + // stack: context, segment, offset, len, retdest + SWAP2 + // stack: offset, segment, context, len, retdest + DUP4 + // stack: len, offset, segment, context, len, retdest + ADD + %decrement + // stack: end_offset, segment, context, len, retdest + SWAP2 + // stack: context, segment, end_offset, len, retdest + DUP3 DUP3 DUP3 MLOAD_GENERAL DUP5 %eq_const(1) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(1) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(2) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(2) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(3) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(3) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(4) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(4) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(5) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(5) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(6) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(6) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(7) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(7) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(8) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(8) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(9) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(9) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(10) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(10) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(11) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(11) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(12) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(12) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(13) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(13) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(14) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(14) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(15) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(15) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(16) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(16) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(17) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(17) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(18) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(18) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(19) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(19) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(20) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(20) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(21) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(21) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(22) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(22) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(23) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(23) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(24) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(24) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(25) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(25) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(26) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(26) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(27) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(27) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(28) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(28) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(29) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(29) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(30) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(30) DUP4 DUP4 MLOAD_GENERAL ADD DUP5 %eq_const(31) %jumpi(mload_packing_LE_return) %shl_const(8) + DUP4 %sub_const(31) DUP4 DUP4 MLOAD_GENERAL ADD +mload_packing_LE_return: + %stack (packed_value, addr: 3, len, retdest) -> (retdest, packed_value) + JUMP + +%macro mload_packing_LE + %stack (addr: 3, len) -> (addr, len, %%after) + %jump(mload_packing_LE) +%%after: +%endmacro + // Pre stack: context, segment, offset, value, len, retdest // Post stack: offset' global mstore_unpacking: @@ -94,3 +147,56 @@ mstore_unpacking_finish: %jump(mstore_unpacking) %%after: %endmacro + +// Pre stack: context, segment, offset, value, len, retdest +// Post stack: offset' +global mstore_unpacking_LE: + // stack: context, segment, offset, value, len, retdest + SWAP2 + // stack: offset, segment, context, value, len, retdest + DUP5 + // stack: len, offset, segment, context, value, len, retdest + ADD + // stack: end_offset, segment, context, value, len, retdest + SWAP2 + // stack: context, segment, end_offset, value, len, retdest + + PUSH 0 + // stack: i=0, context, segment, end_offset, value, len, retdest + +mstore_unpacking_LE_loop: + // stack: i, context, segment, offset, value, len, retdest + // If i == len, finish. + DUP6 + DUP2 + EQ + %jumpi(mstore_unpacking_LE_finish) + + // stack: i, context, segment, offset, value, len, retdest + DUP5 // value + DUP2 // i + BYTE + // stack: value[i], i, context, segment, offset, value, len, retdest + DUP5 DUP5 DUP5 // context, segment, offset + // stack: context, segment, offset, value[i], i, context, segment, offset, value, len, retdest + MSTORE_GENERAL + // stack: i, context, segment, offset, value, len, retdest + + // Decrement offset. + SWAP3 %decrement SWAP3 + // Increment i. + %increment + + %jump(mstore_unpacking_LE_loop) + +mstore_unpacking_LE_finish: + // stack: i, context, segment, offset, value, len, retdest + %pop3 + %stack (offset, value, len, retdest) -> (retdest, offset) + JUMP + +%macro mstore_unpacking_LE + %stack (addr: 3, value, len) -> (addr, value, len, %%after) + %jump(mstore_unpacking_LE) +%%after: +%endmacro