diff --git a/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm b/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm index 778d82ce..01c02715 100644 --- a/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm +++ b/evm/src/cpu/kernel/asm/core/precompiles/blake2_f.asm @@ -41,84 +41,78 @@ global precompile_blake2_f: %sub_const(8) // stack: t1_addr=flag_addr-8, flag, blake2_f_contd, kexit_info - %stack (t1_addr) -> (@SEGMENT_CALLDATA, t1_addr, 8, t1_addr) - // stack: @SEGMENT_CALLDATA, t1_addr, 8, t1_addr, flag, blake2_f_contd, kexit_info + %stack (t1_addr) -> (@SEGMENT_CALLDATA, t1_addr, t1_addr) + // stack: @SEGMENT_CALLDATA, t1_addr, t1_addr, flag, blake2_f_contd, kexit_info GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, t1_addr, 8, t1_addr, flag, blake2_f_contd, kexit_info - %mload_packing + // stack: ctx, @SEGMENT_CALLDATA, t1_addr, t1_addr, flag, blake2_f_contd, kexit_info + %mload_packing_u64_LE // stack: t_1, t1_addr, flag, blake2_f_contd, kexit_info SWAP1 // stack: t1_addr, t_1, flag, blake2_f_contd, kexit_info %sub_const(8) // stack: t0_addr=t1_addr-8, t_1, flag, blake2_f_contd, kexit_info - %stack (t0_addr) -> (@SEGMENT_CALLDATA, t0_addr, 8, t0_addr) - // stack: @SEGMENT_CALLDATA, t0_addr, 8, t0_addr, t_1, flag, blake2_f_contd, kexit_info + %stack (t0_addr) -> (@SEGMENT_CALLDATA, t0_addr, t0_addr) + // stack: @SEGMENT_CALLDATA, t0_addr, t0_addr, t_1, flag, blake2_f_contd, kexit_info GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, t0_addr, 8, t0_addr, t_1, flag, blake2_f_contd, kexit_info - %mload_packing + // stack: ctx, @SEGMENT_CALLDATA, t0_addr, t0_addr, t_1, flag, blake2_f_contd, kexit_info + %mload_packing_u64_LE // stack: t_0, t0_addr, t_1, flag, blake2_f_contd, kexit_info SWAP1 - // stack: t0_addr, t_0, t_1, flag, blake2_f_contd, kexit_info - %sub_const(128) // 16 * 8 - // stack: m0_addr=t0_addr-128, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: t0_addr = m0_addr + 8 * 16, t_0, t_1, flag, blake2_f_contd, kexit_info %rep 16 - // stack: 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - PUSH 8 - // stack: 8, 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - DUP2 - // stack: 68 + 8 * i, 8, 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: m0_addr + 8 * (16 - i), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + %sub_const(8) + // stack: m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + DUP1 + // stack: m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info PUSH @SEGMENT_CALLDATA - // stack: @SEGMENT_CALLDATA, 68 + 8 * i, 8, 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 68 + 8 * i, 8, 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %mload_packing - // stack: m_i, 68 + 8 * i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: ctx, @SEGMENT_CALLDATA, m0_addr + 8 * (16 - i - 1), m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + %mload_packing_u64_LE + // stack: m_i, m0_addr + 8 * (16 - i - 1), m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info SWAP1 - // stack: 68 + 8 * i, m_i, m_(i-1), ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %add_const(8) + // stack: m0_addr + 8 * (16 - i - 1), m_i, m_(i+1), ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %endrep - // stack: 68 + 8 * 16 = 196, m_15, ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %sub_const(192) // 16 * 8 (m values) + 8 * 8 (h values) - // stack: h0_addr, m_15, ..., m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: m0_addr = h0_addr + 8 * 8, m_0, ..., m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %rep 8 - // stack: 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - PUSH 8 - // stack: 8, 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - DUP2 - // stack: 4 + 8 * i, 8, 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + %sub_const(8) + // stack: h0_addr + 8 * (8 - i - 1), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + DUP1 + // stack: h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info PUSH @SEGMENT_CALLDATA - // stack: @SEGMENT_CALLDATA, 4 + 8 * i, 8, 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 4 + 8 * i, 8, 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %mload_packing - // stack: h_i, 4 + 8 * i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: ctx, @SEGMENT_CALLDATA, h0_addr + 8 * (8 - i), h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info + %mload_packing_u64_LE + // stack: h_i, h0_addr + 8 * (8 - i), h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info SWAP1 - // stack: 4 + 8 * i, h_i, h_(i-1), ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info - %add_const(8) + // stack: h0_addr + 8 * (8 - i), h_i, h_(i+1), ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %endrep - // stack: 4 + 8 * 8 = 68, h_7, ..., h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: h0_addr + 8 * 8 = 68, h_0, ..., h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info POP %stack () -> (@SEGMENT_CALLDATA, 0, 4) GET_CONTEXT - // stack: ctx, @SEGMENT_CALLDATA, 0, 4, h_7..h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: ctx, @SEGMENT_CALLDATA, 0, 4, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %mload_packing - // stack: rounds, h_7..h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info DUP1 - // stack: rounds, rounds, h_7..h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: rounds, rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %charge_gas - // stack: rounds, h_7..h_0, m_15..m_0, t_0, t_1, flag, blake2_f_contd, kexit_info + // stack: rounds, h_0..h_7, m_0..m_15, t_0, t_1, flag, blake2_f_contd, kexit_info %jump(blake2_f) blake2_f_contd: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', kexit_info - // Store the result hash to the parent's return data using `mstore_unpacking`. + // Store the result hash to the parent's return data using `mstore_unpacking_u64_LE`. - %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 32) + %mstore_parent_context_metadata(@CTX_METADATA_RETURNDATA_SIZE, 64) PUSH 0 // stack: addr_0=0, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', kexit_info %mload_context_metadata(@CTX_METADATA_PARENT_CONTEXT) @@ -126,11 +120,10 @@ blake2_f_contd: %rep 8 // stack: parent_ctx, addr_i, h_i', ..., h_7', kexit_info - %stack (ctx, addr, h_i) -> (ctx, @SEGMENT_RETURNDATA, addr, h_i, 4, addr, ctx) - // stack: parent_ctx, @SEGMENT_RETURNDATA, addr_i, h_i', 4, addr_i, parent_ctx, h_(i+1)', ..., h_7', kexit_info - %mstore_unpacking + %stack (ctx, addr, h_i) -> (ctx, @SEGMENT_RETURNDATA, addr, h_i, addr, ctx) + %mstore_unpacking_u64_LE // stack: addr_i, parent_ctx, h_(i+1)', ..., h_7', kexit_info - %add_const(4) + %add_const(8) // stack: addr_(i+1), parent_ctx, h_(i+1)', ..., h_7', kexit_info SWAP1 // stack: parent_ctx, addr_(i+1), h_(i+1)', ..., h_7', kexit_info diff --git a/evm/src/cpu/kernel/asm/memory/packing.asm b/evm/src/cpu/kernel/asm/memory/packing.asm index a224fbb4..0f802335 100644 --- a/evm/src/cpu/kernel/asm/memory/packing.asm +++ b/evm/src/cpu/kernel/asm/memory/packing.asm @@ -49,6 +49,25 @@ mload_packing_return: %%after: %endmacro +global mload_packing_u64_LE: + // stack: context, segment, offset, retdest + DUP3 DUP3 DUP3 MLOAD_GENERAL + DUP4 %add_const(1) DUP4 DUP4 MLOAD_GENERAL %shl_const( 8) ADD + DUP4 %add_const(2) DUP4 DUP4 MLOAD_GENERAL %shl_const(16) ADD + DUP4 %add_const(3) DUP4 DUP4 MLOAD_GENERAL %shl_const(24) ADD + DUP4 %add_const(4) DUP4 DUP4 MLOAD_GENERAL %shl_const(32) ADD + DUP4 %add_const(5) DUP4 DUP4 MLOAD_GENERAL %shl_const(40) ADD + DUP4 %add_const(6) DUP4 DUP4 MLOAD_GENERAL %shl_const(48) ADD + DUP4 %add_const(7) DUP4 DUP4 MLOAD_GENERAL %shl_const(56) ADD + %stack (value, context, segment, offset, retdest) -> (retdest, value) + JUMP + +%macro mload_packing_u64_LE + %stack (addr: 3) -> (addr, %%after) + %jump(mload_packing_u64_LE) +%%after: +%endmacro + // Pre stack: context, segment, offset, value, len, retdest // Post stack: offset' global mstore_unpacking: @@ -94,3 +113,38 @@ mstore_unpacking_finish: %jump(mstore_unpacking) %%after: %endmacro + +// Pre stack: context, segment, offset, value, retdest +// Post stack: offset' +global mstore_unpacking_u64_LE: + %stack (context, segment, offset, value) -> (0xff, value, context, segment, offset, value) + AND + DUP4 DUP4 DUP4 MSTORE_GENERAL // First byte + %stack (context, segment, offset, value) -> (0xff00, value, context, segment, offset, value) + AND %shr_const(8) + DUP4 %add_const(1) DUP4 DUP4 MSTORE_GENERAL // Second byte + %stack (context, segment, offset, value) -> (0xff0000, value, context, segment, offset, value) + AND %shr_const(16) + DUP4 %add_const(2) DUP4 DUP4 MSTORE_GENERAL // Third byte + %stack (context, segment, offset, value) -> (0xff000000, value, context, segment, offset, value) + AND %shr_const(24) + DUP4 %add_const(3) DUP4 DUP4 MSTORE_GENERAL // Fourth byte + %stack (context, segment, offset, value) -> (0xff00000000, value, context, segment, offset, value) + AND %shr_const(32) + DUP4 %add_const(4) DUP4 DUP4 MSTORE_GENERAL // Fifth byte + %stack (context, segment, offset, value) -> (0xff0000000000, value, context, segment, offset, value) + AND %shr_const(40) + DUP4 %add_const(5) DUP4 DUP4 MSTORE_GENERAL // Sixth byte + %stack (context, segment, offset, value) -> (0xff000000000000, value, context, segment, offset, value) + AND %shr_const(48) + DUP4 %add_const(6) DUP4 DUP4 MSTORE_GENERAL // Seventh byte + %stack (context, segment, offset, value) -> (0xff00000000000000, value, context, segment, offset, value) + AND %shr_const(56) + DUP4 %add_const(7) DUP4 DUP4 MSTORE_GENERAL // Eighth byte + %pop4 JUMP + +%macro mstore_unpacking_u64_LE + %stack (addr: 3, value) -> (addr, value, %%after) + %jump(mstore_unpacking_u64_LE) +%%after: +%endmacro