diff --git a/evm/Cargo.toml b/evm/Cargo.toml index d9de1d16..37333027 100644 --- a/evm/Cargo.toml +++ b/evm/Cargo.toml @@ -13,7 +13,7 @@ edition = "2021" anyhow = "1.0.40" blake2 = "0.10.5" env_logger = "0.10.0" -eth_trie_utils = "0.4.0" +eth_trie_utils = "0.4.1" ethereum-types = "0.14.0" hex = { version = "0.4.3", optional = true } hex-literal = "0.3.4" diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index 80dd9392..34e3ce43 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -17,6 +17,7 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/core/call.asm"), include_str!("asm/core/create.asm"), include_str!("asm/core/create_addresses.asm"), + include_str!("asm/core/gas.asm"), include_str!("asm/core/intrinsic_gas.asm"), include_str!("asm/core/invalid.asm"), include_str!("asm/core/jumpdest_analysis.asm"), diff --git a/evm/src/cpu/kernel/asm/account_code.asm b/evm/src/cpu/kernel/asm/account_code.asm index f10fbc19..3b18e309 100644 --- a/evm/src/cpu/kernel/asm/account_code.asm +++ b/evm/src/cpu/kernel/asm/account_code.asm @@ -1,6 +1,12 @@ -retzero: - %stack (account_ptr, retdest) -> (retdest, 0) - JUMP +global sys_extcodehash: + // stack: kexit_info, address + // TODO: Charge gas. + SWAP1 + // stack: address, kexit_info + %extcodehash + // stack: hash, kexit_info + SWAP1 + EXIT_KERNEL global extcodehash: // stack: address, retdest @@ -12,6 +18,9 @@ global extcodehash: %mload_trie_data // stack: codehash, retdest SWAP1 JUMP +retzero: + %stack (account_ptr, retdest) -> (retdest, 0) + JUMP %macro extcodehash %stack (address) -> (address, %%after) @@ -32,6 +41,7 @@ global extcodehash: global sys_extcodesize: // stack: kexit_info, address + // TODO: Charge gas. SWAP1 // stack: address, kexit_info %extcodesize @@ -61,6 +71,8 @@ global extcodesize: // Pre stack: kexit_info, address, dest_offset, offset, size // Post stack: (empty) global sys_extcodecopy: + // TODO: Call %update_mem_bytes to expand memory. + // TODO: Charge other gas. %stack (kexit_info, address, dest_offset, offset, size) -> (address, dest_offset, offset, size, kexit_info) %extcodecopy @@ -104,7 +116,7 @@ extcodecopy_loop: // stack: opcode, offset, code_size, dest_offset, i, size, retdest DUP4 // stack: dest_offset, opcode, offset, code_size, dest_offset, i, size, retdest - %mstore_main + %mstore_current(@SEGMENT_MAIN_MEMORY) // stack: offset, code_size, dest_offset, i, size, retdest %increment // stack: offset+1, code_size, dest_offset, i, size, retdest diff --git a/evm/src/cpu/kernel/asm/core/call.asm b/evm/src/cpu/kernel/asm/core/call.asm index cb66b7fa..103977b2 100644 --- a/evm/src/cpu/kernel/asm/core/call.asm +++ b/evm/src/cpu/kernel/asm/core/call.asm @@ -3,6 +3,7 @@ // Creates a new sub context and executes the code of the given account. global sys_call: // stack: kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size + // TODO: Charge gas. %create_context // stack: new_ctx, kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size @@ -27,6 +28,7 @@ global sys_call: // given account. In particular the storage remains the same. global sys_callcode: // stack: kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size + // TODO: Charge gas. %create_context // stack: new_ctx, kexit_info, gas, address, value, args_offset, args_size, ret_offset, ret_size @@ -50,6 +52,7 @@ global sys_callcode: // CALL if the value sent is not 0. global sys_staticcall: // stack: kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size + // TODO: Charge gas. %create_context // stack: new_ctx, kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size @@ -70,6 +73,7 @@ global sys_staticcall: // value remain the same. global sys_delegatecall: // stack: kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size + // TODO: Charge gas. %create_context // stack: new_ctx, kexit_info, gas, address, args_offset, args_size, ret_offset, ret_size diff --git a/evm/src/cpu/kernel/asm/core/create.asm b/evm/src/cpu/kernel/asm/core/create.asm index eb0f821e..cc37f832 100644 --- a/evm/src/cpu/kernel/asm/core/create.asm +++ b/evm/src/cpu/kernel/asm/core/create.asm @@ -1,21 +1,30 @@ // The CREATE syscall. // -// Pre stack: value, CODE_ADDR, code_len, retdest +// Pre stack: kexit_info, value, code_offset, code_len // Post stack: address global sys_create: + // TODO: Charge gas. + %stack (kexit_info, value, code_offset, code_len) + -> (value, 0, @SEGMENT_MAIN_MEMORY, code_offset, code_len) %address + // stack: sender, value, CODE_ADDR: 3, code_len, sys_create_finish, kexit_info %jump(create) +sys_create_finish: + // stack: address, kexit_info + SWAP1 + EXIT_KERNEL // Create a new contract account with the traditional address scheme, i.e. // address = KEC(RLP(sender, nonce))[12:] // This can be used both for the CREATE instruction and for contract-creation // transactions. // -// Pre stack: sender, endowment, CODE_ADDR, code_len, retdest +// Pre stack: sender, endowment, CODE_ADDR: 3, code_len, retdest // Post stack: address // Note: CODE_ADDR refers to a (context, segment, offset) tuple. global create: // stack: sender, endowment, CODE_ADDR, code_len, retdest + // TODO: Charge gas. DUP1 %get_nonce // stack: nonce, sender, endowment, CODE_ADDR, code_len, retdest // Call get_create_address and have it return to create_inner. @@ -26,20 +35,34 @@ global create: // CREATE2; see EIP-1014. Address will be // address = KEC(0xff || sender || salt || code_hash)[12:] // -// Pre stack: sender, endowment, salt, CODE_ADDR: 3, code_len, retdest +// Pre stack: kexit_info, value, code_offset, code_len, salt // Post stack: address -// Note: CODE_ADDR refers to a (context, segment, offset) tuple. global sys_create2: - // stack: sender, endowment, salt, CODE_ADDR: 3, code_len, retdest - DUP7 DUP7 DUP7 DUP7 // CODE_ADDR: 3, code_len + // stack: kexit_info, value, code_offset, code_len, salt + // TODO: Charge gas. + SWAP4 + %stack (salt) -> (salt, sys_create2_got_address) + // stack: salt, sys_create2_got_address, value, code_offset, code_len, kexit_info + DUP4 // code_len + DUP4 // code_offset + PUSH @SEGMENT_MAIN_MEMORY + PUSH 0 // context KECCAK_GENERAL - // stack: code_hash, sender, endowment, salt, CODE_ADDR: 3, code_len, retdest - - // Call get_create2_address and have it return to create_inner. - %stack (code_hash, sender, endowment, salt) - -> (sender, salt, code_hash, create_inner, sender, endowment) - // stack: sender, salt, CODE_ADDR, code_len, create_inner, sender, endowment, CODE_ADDR, code_len, retdest + // stack: hash, salt, sys_create2_got_address, value, code_offset, code_len, kexit_info + %address + // stack: sender, hash, salt, sys_create2_got_address, value, code_offset, code_len, kexit_info %jump(get_create2_address) +sys_create2_got_address: + // stack: address, value, code_offset, code_len, kexit_info + %address + %stack (sender, address, value, code_offset, code_len, kexit_info) + -> (address, sender, value, 0, @SEGMENT_MAIN_MEMORY, code_offset, code_len, + sys_create2_finish, kexit_info) + %jump(create_inner) +sys_create2_finish: + // stack: address, kexit_info + SWAP1 + EXIT_KERNEL // Pre stack: address, sender, endowment, CODE_ADDR, code_len, retdest // Post stack: address diff --git a/evm/src/cpu/kernel/asm/core/create_addresses.asm b/evm/src/cpu/kernel/asm/core/create_addresses.asm index 2d94ee94..67fd65a6 100644 --- a/evm/src/cpu/kernel/asm/core/create_addresses.asm +++ b/evm/src/cpu/kernel/asm/core/create_addresses.asm @@ -8,6 +8,8 @@ global get_create_address: // TODO: Replace with actual implementation. %pop2 PUSH 123 + // stack: address, retdest + %observe_new_address SWAP1 JUMP @@ -21,5 +23,22 @@ global get_create2_address: // TODO: Replace with actual implementation. %pop3 PUSH 123 + // stack: address, retdest + %observe_new_address SWAP1 JUMP + +// This should be called whenever a new address is created. This is only for debugging. It does +// nothing, but just provides a single hook where code can react to newly created addresses. +global observe_new_address: + // stack: address, retdest + SWAP1 + // stack: retdest, address + JUMP + +// Convenience macro to call observe_new_address and return where we left off. +%macro observe_new_address + %stack (address) -> (address, %%after) + %jump(observe_new_address) +%%after: +%endmacro diff --git a/evm/src/cpu/kernel/asm/core/gas.asm b/evm/src/cpu/kernel/asm/core/gas.asm new file mode 100644 index 00000000..78cc065c --- /dev/null +++ b/evm/src/cpu/kernel/asm/core/gas.asm @@ -0,0 +1,58 @@ +global sys_gas: + // stack: kexit_info + %charge_gas_const(@GAS_BASE) + // stack: kexit_info + DUP1 %shr_const(192) + // stack: gas_used, kexit_info + %ctx_gas_limit + // stack: gas_limit, gas_used, kexit_info + SUB + // stack: gas_remaining, kexit_info + SWAP1 + EXIT_KERNEL + +%macro ctx_gas_limit + %mload_context_metadata(@CTX_METADATA_GAS_LIMIT) +%endmacro + +// Charge gas. Faults if we exceed the limit for the current context. +%macro charge_gas + // stack: gas, kexit_info + %shl_const(192) + ADD + // stack: kexit_info' + %ctx_gas_limit + // stack: gas_limit, kexit_info' + DUP2 %shr_const(192) + // stack: gas_used, gas_limit, kexit_info' + GT + // stack: out_of_gas, kexit_info' + %jumpi(fault_exception) + // stack: kexit_info' +%endmacro + +// Charge a constant amount of gas. +%macro charge_gas_const(gas) + // stack: kexit_info + PUSH $gas + // stack: gas, kexit_info + %charge_gas + // stack: kexit_info' +%endmacro + +// Charge gas and exit kernel code. +%macro charge_gas_and_exit + // stack: gas, kexit_info + %charge_gas + // stack: kexit_info' + EXIT_KERNEL +%endmacro + +global sys_gasprice: + // stack: kexit_info + %charge_gas_const(@GAS_BASE) + // stack: kexit_info + %mload_txn_field(@TXN_FIELD_COMPUTED_FEE_PER_GAS) + // stack: gas_price, kexit_info + SWAP1 + EXIT_KERNEL diff --git a/evm/src/cpu/kernel/asm/core/syscall_stubs.asm b/evm/src/cpu/kernel/asm/core/syscall_stubs.asm index 6dcbbb6e..26281eea 100644 --- a/evm/src/cpu/kernel/asm/core/syscall_stubs.asm +++ b/evm/src/cpu/kernel/asm/core/syscall_stubs.asm @@ -17,26 +17,16 @@ global sys_balance: PANIC global sys_origin: PANIC -global sys_calldataload: - PANIC global sys_calldatasize: PANIC global sys_calldatacopy: PANIC global sys_codecopy: PANIC -global sys_gasprice: - // stack: kexit_info - %mload_txn_field(@TXN_FIELD_COMPUTED_FEE_PER_GAS) - // stack: gas_price, kexit_info - SWAP1 - EXIT_KERNEL global sys_returndatasize: PANIC global sys_returndatacopy: PANIC -global sys_extcodehash: - PANIC global sys_blockhash: PANIC global sys_coinbase: @@ -54,6 +44,8 @@ global sys_gaslimit: global sys_chainid: // TODO: Return the block's chain ID instead of the txn's, even though they should match. // stack: kexit_info + %charge_gas_const(@GAS_BASE) + // stack: kexit_info %mload_txn_field(@TXN_FIELD_CHAIN_ID) // stack: chain_id, kexit_info SWAP1 @@ -62,16 +54,6 @@ global sys_selfbalance: PANIC global sys_basefee: PANIC -global sys_gas: - // stack: kexit_info - DUP1 %shr_const(192) - // stack: gas_used, kexit_info - %mload_context_metadata(@CTX_METADATA_GAS_LIMIT) - // stack: gas_limit, gas_used, kexit_info - SUB - // stack: gas_remaining, kexit_info - SWAP1 - EXIT_KERNEL global sys_log0: PANIC global sys_log1: diff --git a/evm/src/cpu/kernel/asm/core/terminate.asm b/evm/src/cpu/kernel/asm/core/terminate.asm index 341884ea..4a3fbf02 100644 --- a/evm/src/cpu/kernel/asm/core/terminate.asm +++ b/evm/src/cpu/kernel/asm/core/terminate.asm @@ -20,6 +20,7 @@ global sys_return: global sys_selfdestruct: // stack: kexit_info + // TODO: Charge gas. %consume_gas_const(@GAS_SELFDESTRUCT) %leftover_gas // stack: leftover_gas @@ -37,7 +38,7 @@ global sys_revert: PUSH 0 // success %jump(terminate_common) -// The execution is in an exceptional halt-ing state if +// The execution is in an exceptional halting state if // - there is insufficient gas // - the instruction is invalid // - there are insufficient stack items diff --git a/evm/src/cpu/kernel/asm/core/transfer.asm b/evm/src/cpu/kernel/asm/core/transfer.asm index 0ba99fd8..c001e726 100644 --- a/evm/src/cpu/kernel/asm/core/transfer.asm +++ b/evm/src/cpu/kernel/asm/core/transfer.asm @@ -23,21 +23,10 @@ global transfer_eth_failure: %%after: %endmacro -// Pre stack: should_transfer, from, to, amount -// Post stack: (empty) -%macro maybe_transfer_eth - %jumpi(%%transfer) - // We're skipping the transfer, so just pop the arguments and return. - %pop3 - %jump(%%after) -%%transfer: - %transfer_eth -%%after: -%endmacro - // Returns 0 on success, or 1 if addr has insufficient balance. Panics if addr isn't found in the trie. // Pre stack: addr, amount, retdest // Post stack: status (0 indicates success) +// TODO: Should it be copy-on-write (with make_account_copy) instead of mutating the trie? global deduct_eth: // stack: addr, amount, retdest %mpt_read_state_trie @@ -73,6 +62,7 @@ global deduct_eth_insufficient_balance: // Pre stack: addr, amount, redest // Post stack: (empty) +// TODO: Should it be copy-on-write (with make_account_copy) instead of mutating the trie? global add_eth: // stack: addr, amount, retdest DUP1 %mpt_read_state_trie diff --git a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm index 11ec27c8..6e9df123 100644 --- a/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm +++ b/evm/src/cpu/kernel/asm/curve/secp256k1/ecrecover.asm @@ -106,19 +106,7 @@ ecdsa_after_precompute_loop_end: // Take a public key (PKx, PKy) and return the associated address KECCAK256(PKx || PKy)[-20:]. pubkey_to_addr: // stack: PKx, PKy, retdest - PUSH 0 - // stack: 0, PKx, PKy, retdest - MSTORE // TODO: switch to kernel memory (like `%mstore_kernel(@SEGMENT_KERNEL_GENERAL)`). - // stack: PKy, retdest - PUSH 0x20 - // stack: 0x20, PKy, retdest - MSTORE - // stack: retdest - PUSH 0x40 - // stack: 0x40, retdest - PUSH 0 - // stack: 0, 0x40, retdest - KECCAK256 + %keccak256_u256_pair // stack: hash, retdest PUSH 0xffffffffffffffffffffffffffffffffffffffff // stack: 2^160-1, hash, retdest diff --git a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm index cd1f6a80..6e8cdb0a 100644 --- a/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/blake2b/compression.asm @@ -85,8 +85,7 @@ compression_loop: // stack: cur_message_addr + 1, cur_block_byte + 8, ... %endrep // stack: end_message_addr, end_block_start_byte, t, cur_block, is_last_block, retdest - POP - POP + %pop2 // stack: t, cur_block, is_last_block, retdest SWAP1 // stack: cur_block, t, is_last_block, retdest @@ -128,15 +127,14 @@ compression_loop: // stack: 0, start + 8, invert_if_last_block, t, cur_block, retdest %rep 4 // stack: i, loc, ... - DUP2 - DUP2 - // stack: i, loc, i, loc,... + DUP1 + // stack: i, i, loc, ... %blake2b_iv - // stack: IV_i, loc, i, loc,... - SWAP1 - // stack: loc, IV_i, i, loc,... + // stack: IV_i, i, loc, ... + DUP3 + // stack: loc, IV_i, i, loc, ... %mstore_kernel_general - // stack: i, loc,... + // stack: i, loc, ... %increment SWAP1 %increment @@ -147,15 +145,11 @@ compression_loop: %stack (i, loc, inv, last, t) -> (t, t, i, loc, inv, last) // stack: t, t, 4, start + 12, invert_if_last_block, cur_block, retdest %shr_const(64) - // stack: t >> 64, t, 4, start + 12, invert_if_last_block, cur_block, retdest + // stack: t_hi = t >> 64, t, 4, start + 12, invert_if_last_block, cur_block, retdest SWAP1 - // stack: t, t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest - PUSH 1 - %shl_const(64) - // stack: 1 << 64, t, t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest - SWAP1 - MOD - // stack: t_lo = t % (1 << 64), t_hi = t >> 64, 4, start + 12, invert_if_last_block, cur_block, retdest + // stack: t, t_hi, 4, start + 12, invert_if_last_block, cur_block, retdest + %mod_const(0x10000000000000000) + // stack: t_lo = t % (1 << 64), t_hi, 4, start + 12, invert_if_last_block, cur_block, retdest %stack (t_lo, t_hi, i, loc, inv) -> (i, loc, t_lo, t_hi, inv, 0) // stack: 4, start + 12, t_lo, t_hi, invert_if_last_block, 0, cur_block, retdest @@ -163,25 +157,31 @@ compression_loop: // the values (t % 2**64, t >> 64, invert_if, 0). %rep 4 // stack: i, loc, val, next_val,... - %stack (i, loc, val) -> (i, val, loc, i, loc) - // stack: i, val, loc, i, loc, next_val,... + DUP1 + // stack: i, i, loc, val, next_val,... %blake2b_iv - // stack: IV_i, val, loc, i, loc, next_val,... + // stack: IV_i, i, loc, val, next_val,... + DUP4 + // stack: val, IV_i, i, loc, val, next_val,... XOR - // stack: val ^ IV_i, loc, i, loc, next_val,... - SWAP1 - // stack: loc, val ^ IV_i, i, loc, next_val,... + // stack: val ^ IV_i, i, loc, val, next_val,... + DUP3 + // stack: loc, val ^ IV_i, i, loc, val, next_val,... %mstore_kernel_general - // stack: i, loc, next_val,... + // stack: i, loc, val, next_val,... %increment - SWAP1 + // stack: i + 1, loc, val, next_val,... + SWAP2 + // stack: val, loc, i + 1, next_val,... + POP + // stack: loc, i + 1, next_val,... %increment + // stack: loc + 1, i + 1, next_val,... SWAP1 // stack: i + 1, loc + 1, next_val,... %endrep // stack: 8, loc + 16, cur_block, retdest - POP - POP + %pop2 // stack: cur_block, retdest // Run 12 rounds of G functions. @@ -209,10 +209,9 @@ hash_generate_return: PUSH 0 %mload_kernel_general // stack: num_blocks, cur_block + 1, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest - EQ - // stack: last_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest - %jumpi(compression_end) - %jump(compression_loop) + GT + // stack: not_last_block, h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest + %jumpi(compression_loop) compression_end: // stack: h_0', h_1', h_2', h_3', h_4', h_5', h_6', h_7', cur_block + 1, retdest diff --git a/evm/src/cpu/kernel/asm/hash/sha2/compression.asm b/evm/src/cpu/kernel/asm/hash/sha2/compression.asm index 8c219ebb..8f7d942c 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/compression.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/compression.asm @@ -8,22 +8,7 @@ global sha2_compression: // stack: message_schedule_addr, retdest - PUSH 0 - // stack: i=0, message_schedule_addr, retdest - SWAP1 - // stack: message_schedule_addr, i=0, retdest - PUSH 0 - // stack: 0, message_schedule_addr, i=0, retdest - %mload_kernel_general - // stack: num_blocks, message_schedule_addr, i=0, retdest - DUP1 - // stack: num_blocks, num_blocks, message_schedule_addr, i=0, retdest - %scratch_space_addr_from_num_blocks - // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, retdest - SWAP1 - // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest // Push the initial hash values; these constants are called H^(0) in the spec. - PUSH 0x5be0cd19 // H^(0)_7 PUSH 0x1f83d9ab // H^(0)_6 PUSH 0x9b05688c // H^(0)_5 PUSH 0x510e527f // H^(0)_4 @@ -31,255 +16,145 @@ global sha2_compression: PUSH 0x3c6ef372 // H^(0)_2 PUSH 0xbb67ae85 // H^(0)_1 PUSH 0x6a09e667 // H^(0)_0 - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + PUSH 0x5be0cd19 // H^(0)_7 + // stack: h[0], a[0], b[0], c[0], d[0], e[0], f[0], g[0], message_schedule_addr, retdest + SWAP8 + // stack: message_schedule_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest + PUSH 0 + // stack: i=0, message_schedule_addr, a[0]..h[0], retdest + SWAP1 + // stack: message_schedule_addr, i=0, a[0]..h[0], retdest + PUSH 0 + // stack: 0, message_schedule_addr, i=0, a[0]..h[0], retdest + %mload_kernel_general + // stack: num_blocks, message_schedule_addr, i=0, a[0]..h[0], retdest + DUP1 + // stack: num_blocks, num_blocks, message_schedule_addr, i=0, a[0]..h[0], retdest + %scratch_space_addr_from_num_blocks + // stack: scratch_space_addr, num_blocks, message_schedule_addr, i=0, a[0]..h[0], retdest + SWAP1 + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, a[0]..h[0], retdest compression_start_block: - // Store the current values of the working variables, as the "initial values" to be added back in at the end of this block. - DUP10 - // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP2 - DUP2 - // stack: scratch_space_addr, a[0], scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP3 - DUP2 - // stack: scratch_space_addr+4, b[0], scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+4, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP4 - DUP2 - // stack: scratch_space_addr+8, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP5 - DUP2 - // stack: scratch_space_addr+12, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+12, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP6 - DUP2 - // stack: scratch_space_addr+16, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+16, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP7 - DUP2 - // stack: scratch_space_addr+20, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+20, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP8 - DUP2 - // stack: scratch_space_addr+24, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+24, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %add_const(4) - // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - - DUP9 - DUP2 - // stack: scratch_space_addr+28, c[0], scratch_space_addr+8, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - %mstore_kernel_general_u32 - // stack: scratch_space_addr+28, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest - POP - // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, retdest + // We keep the current values of the working variables saved at the end of the stack. + // These are the "initial values" to be added back in at the end of this block. + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i=0, a[0]..h[0], retdest + %rep 8 + DUP12 + %endrep + // stack: a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], num_blocks, scratch_space_addr, message_schedule_addr, i=0, a[0]..h[0], retdest compression_loop: // Update the eight working variables, using the next constant K[i] and the next message schedule chunk W[i]. - // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP11 - // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP13 - // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %mul_const(4) - // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: 4*i, message_schedule_addr, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest ADD - // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: message_schedule_addr + 4*i, a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %mload_kernel_general_u32 - // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest PUSH sha2_constants_k - // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP14 - // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %mul_const(4) - // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: 4*i, sha2_constants_k, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest ADD - // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: sha2_constants_k + 4*i, W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %mload_kernel_code_u32 - // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %stack (start: 6, e, f, g, h) -> (e, f, g, h, start, e, f, g, h) - // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest + DUP10 + DUP10 + DUP10 + DUP10 + // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %sha2_temp_word1 - // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %stack (t, a, b, c) -> (a, b, c, t, a, b, c) - // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest + DUP4 + DUP4 + DUP4 + // stack: a[i], b[i], c[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %sha2_temp_word2 - // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP6 - // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP3 - // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: T1[i], d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %add_u32 - // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: e[i+1]=T1[i]+d[i], T2[i], T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest SWAP2 - // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: T2[i], T1[i], e[i+1], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %add_u32 - // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: a[i+1]=T1[i]+T2[i], e[i+1], b[i+1]=a[i], c[i+1]=b[i], d[i+1]=c[i], d[i], f[i+1]=e[i], g[i+1]=f[i], h[i+1]=g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %stack (a, e, b, c, d, old_d, f, g, h, old_h) -> (a, b, c, d, e, f, g, h) - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP12 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %increment - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP1 - // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i+1, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %eq_const(64) - // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP1 - // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP12 - // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: num_blocks, i+1==64, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest SUB - // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest + // stack: num_blocks new, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest SWAP13 - // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + // stack: message_schedule_addr, i+1==64, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest SWAP1 - // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest - PUSH 256 - MUL - // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + // stack: i+1==64, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest + %mul_const(256) + // stack: (i+1==64)*256, message_schedule_addr, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest ADD - // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, retdest + // stack: message_schedule_addr new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, num_blocks new, i, a[0]..h[0], retdest SWAP12 - // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, retdest + // stack: num_blocks new, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks, scratch_space_addr, message_schedule_addr new, i, a[0]..h[0], retdest SWAP10 - // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + // stack: num_blocks, i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_a[0]..h[0], retdest POP - // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_retdest + // stack: i+1, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, new_a[0]..h[0], retdest %and_const(63) - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, retdest + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, i, a[0]..h[0], retdest SWAP12 - // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + // stack: i, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, a[0]..h[0], retdest POP - // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest + // stack: a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, a[0]..h[0], retdest DUP12 - // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, retdest - ISZERO - %jumpi(compression_end_block) - %jump(compression_loop) + // stack: (i+1)%64, a[i+1], b[i+1], c[i+1], d[i+1], e[i+1], f[i+1], g[i+1], h[i+1], num_blocks new, scratch_space_addr, message_schedule_addr new, (i+1)%64, a[0]..h[0], retdest + %jumpi(compression_loop) compression_end_block: // Add the initial values of the eight working variables (from the start of this block's compression) back into them. - // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - // stack: scratch_space_addr, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %mload_kernel_general_u32 - // stack: a[0], a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: a[0]+a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP1 - // stack: b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(4) - %mload_kernel_general_u32 - // stack: b[0], b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: b[0]+b[64], a[0]+a[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP2 - // stack: c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(8) - %mload_kernel_general_u32 - // stack: c[0], c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: c[0]+c[64], a[0]+a[64], b[0]+b[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP3 - // stack: d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(12) - %mload_kernel_general_u32 - // stack: d[0], d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: d[0]+d[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP4 - // stack: e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(16) - %mload_kernel_general_u32 - // stack: e[0], e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: e[0]+e[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP5 - // stack: f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(20) - %mload_kernel_general_u32 - // stack: f[0], f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: f[0]+f[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP6 - // stack: g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(24) - %mload_kernel_general_u32 - // stack: g[0], g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: g[0]+g[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP7 - // stack: h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - DUP10 - %add_const(28) - %mload_kernel_general_u32 - // stack: h[0], h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - %add_u32 - // stack: h[0]+h[64], a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], num_blocks, scratch_space_addr, message_schedule_addr, i, retdest - SWAP8 - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + // stack: a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest + PUSH 0 + // stack: 0, a[64], b[64], c[64], d[64], e[64], f[64], g[64], h[64], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0], b[0], c[0], d[0], e[0], f[0], g[0], h[0], retdest + %rep 8 + SWAP13 + %add_u32 + SWAP12 + %endrep + // stack: 0, num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest + POP + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest DUP1 - // stack: num_blocks, num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest + // stack: num_blocks, num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest ISZERO // In this case, we've finished all the blocks. %jumpi(compression_end) - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - %stack (num_blocks, working: 8) -> (working, num_blocks) + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest %jump(compression_start_block) compression_end: - // stack: num_blocks, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - POP - // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], scratch_space_addr, message_schedule_addr, i, retdest - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - %shl_const(32) - ADD // OR - // stack: concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), scratch_space_addr, message_schedule_addr, i, retdest - SWAP3 - // stack: i, scratch_space_addr, message_schedule_addr, concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest - %pop3 + // stack: num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest + %pop4 + // stack: a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64], retdest + %rep 7 + %shl_const(32) + ADD // OR + %endrep // stack: sha2_result = concat(a[0]+a[64], b[0]+b[64], c[0]+c[64], d[0]+d[64], e[0]+e[64], f[0]+f[64], g[0]+g[64], h[0]+h[64]), retdest SWAP1 JUMP diff --git a/evm/src/cpu/kernel/asm/hash/sha2/main.asm b/evm/src/cpu/kernel/asm/hash/sha2/main.asm index 058224f6..1deab294 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/main.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/main.asm @@ -19,9 +19,7 @@ global sha2_pad: // STEP 1: append 1 // insert 128 (= 1 << 7) at x[num_bytes+1] // stack: num_bytes, retdest - PUSH 1 - PUSH 7 - SHL + PUSH 0x80 // stack: 128, num_bytes, retdest DUP2 // stack: num_bytes, 128, num_bytes, retdest @@ -40,14 +38,12 @@ global sha2_pad: // STEP 3: calculate length := num_bytes*8 SWAP1 // stack: num_bytes, num_blocks, retdest - PUSH 8 - MUL + %mul_const(8) // stack: length = num_bytes*8, num_blocks, retdest // STEP 4: write length to x[num_blocks*64-7..num_blocks*64] DUP2 // stack: num_blocks, length, num_blocks, retdest - PUSH 64 - MUL + %mul_const(64) // stack: last_addr = num_blocks*64, length, num_blocks, retdest %sha2_write_length // stack: num_blocks, retdest diff --git a/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm b/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm index 78d98634..d8f0500d 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/message_schedule.asm @@ -55,16 +55,13 @@ gen_message_schedule_from_block_0_loop: // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest %decrement DUP1 - ISZERO - %jumpi(gen_message_schedule_from_block_0_end) - %jump(gen_message_schedule_from_block_0_loop) + %jumpi(gen_message_schedule_from_block_0_loop) gen_message_schedule_from_block_0_end: // stack: old counter=0, output_addr, block[0], block[1], retdest POP - PUSH 8 - // stack: counter=8, output_addr, block[0], block[1], retdest - %stack (counter, out, b0, b1) -> (out, counter, b1, b0) - // stack: output_addr, counter, block[1], block[0], retdest + // stack: output_addr, block[0], block[1], retdest + %stack (out, b0, b1) -> (out, 8, b1, b0) + // stack: output_addr, counter=8, block[1], block[0], retdest %add_const(64) // stack: output_addr + 64, counter, block[1], block[0], retdest SWAP1 @@ -96,9 +93,7 @@ gen_message_schedule_from_block_1_loop: // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest %decrement DUP1 - ISZERO - %jumpi(gen_message_schedule_from_block_1_end) - %jump(gen_message_schedule_from_block_1_loop) + %jumpi(gen_message_schedule_from_block_1_loop) gen_message_schedule_from_block_1_end: // stack: old counter=0, output_addr, block[1], block[0], retdest POP @@ -118,11 +113,7 @@ gen_message_schedule_remaining_loop: // stack: output_addr, counter, block[0], block[1], retdest DUP1 // stack: output_addr, output_addr, counter, block[0], block[1], retdest - PUSH 2 - PUSH 4 - MUL - SWAP1 - SUB + %sub_const(8) // stack: output_addr - 2*4, output_addr, counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 2*4], output_addr, counter, block[0], block[1], retdest @@ -132,11 +123,7 @@ gen_message_schedule_remaining_loop: // stack: output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest DUP1 // stack: output_addr, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - PUSH 7 - PUSH 4 - MUL - SWAP1 - SUB + %sub_const(28) // stack: output_addr - 7*4, output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 7*4], output_addr, sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest @@ -144,11 +131,7 @@ gen_message_schedule_remaining_loop: // stack: output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest DUP1 // stack: output_addr, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - PUSH 15 - PUSH 4 - MUL - SWAP1 - SUB + %sub_const(60) // stack: output_addr - 15*4, output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 15*4], output_addr, x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest @@ -158,11 +141,7 @@ gen_message_schedule_remaining_loop: // stack: output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest DUP1 // stack: output_addr, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest - PUSH 16 - PUSH 4 - MUL - SWAP1 - SUB + %sub_const(64) // stack: output_addr - 16*4, output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest %mload_kernel_general_u32 // stack: x[output_addr - 16*4], output_addr, sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest @@ -185,9 +164,7 @@ gen_message_schedule_remaining_loop: %decrement // stack: counter - 1, output_addr + 4, block[0], block[1], retdest DUP1 - ISZERO - %jumpi(gen_message_schedule_remaining_end) - %jump(gen_message_schedule_remaining_loop) + %jumpi(gen_message_schedule_remaining_loop) gen_message_schedule_remaining_end: // stack: counter=0, output_addr, block[0], block[1], retdest %pop4 @@ -230,9 +207,7 @@ gen_all_message_schedules_loop_end: // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest DUP2 // stack: counter - 1, cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest - ISZERO - %jumpi(gen_all_message_schedules_end) - %jump(gen_all_message_schedules_loop) + %jumpi(gen_all_message_schedules_loop) gen_all_message_schedules_end: // stack: cur_addr + 64, counter - 1, cur_output_addr + 256, output_addr, retdest %pop3 diff --git a/evm/src/cpu/kernel/asm/hash/sha2/ops.asm b/evm/src/cpu/kernel/asm/hash/sha2/ops.asm index 7d8054ca..6a4c5e3b 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/ops.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/ops.asm @@ -26,14 +26,15 @@ // stack: x, x %rotr(7) // stack: rotr(x, 7), x - %stack (rotated, x) -> (x, x, rotated) + SWAP1 + // stack: x, rotr(x, 7) + DUP1 // stack: x, x, rotr(x, 7) %rotr(18) // stack: rotr(x, 18), x, rotr(x, 7) SWAP1 // stack: x, rotr(x, 18), rotr(x, 7) - PUSH 3 - SHR + %div_const(8) // equivalent to %shr_const(3) // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) XOR XOR @@ -45,7 +46,9 @@ // stack: x, x %rotr(17) // stack: rotr(x, 17), x - %stack (rotated, x) -> (x, x, rotated) + SWAP1 + // stack: x, rotr(x, 17) + DUP1 // stack: x, x, rotr(x, 17) %rotr(19) // stack: rotr(x, 19), x, rotr(x, 17) @@ -64,7 +67,9 @@ // stack: x, x %rotr(2) // stack: rotr(x, 2), x - %stack (rotated, x) -> (x, x, rotated) + SWAP1 + // stack: x, rotr(x, 2) + DUP1 // stack: x, x, rotr(x, 2) %rotr(13) // stack: rotr(x, 13), x, rotr(x, 2) @@ -82,7 +87,9 @@ // stack: x, x %rotr(6) // stack: rotr(x, 6), x - %stack (rotated, x) -> (x, x, rotated) + SWAP1 + // stack: x, rotr(x, 6) + DUP1 // stack: x, x, rotr(x, 6) %rotr(11) // stack: rotr(x, 11), x, rotr(x, 6) @@ -100,11 +107,13 @@ // stack: x, x, y, z NOT // stack: not x, x, y, z - %stack (notx, x, y, z) -> (notx, z, x, y) - // stack: not x, z, x, y + SWAP1 + // stack: x, not x, y, z + SWAP3 + // stack: z, not x, y, x AND - // stack: (not x) and z, x, y - %stack (nxz, x, y) -> (x, y, nxz) + // stack: (not x) and z, y, x + SWAP2 // stack: x, y, (not x) and z AND // stack: x and y, (not x) and z @@ -113,18 +122,22 @@ %macro sha2_majority // stack: x, y, z - %stack (xyz: 3) -> (xyz, xyz) - // stack: x, y, z, x, y, z + DUP1 + // stack: x, x, y, z + DUP3 + // stack: y, x, x, y, z + DUP5 + // stack: z, y, x, x, y, z AND - // stack: x and y, z, x, y, z + // stack: z and y, x, x, y, z + SWAP4 + // stack: z, x, x, y, z and y + AND + // stack: z and x, x, y, z and y SWAP2 - // stack: x, z, x and y, y, z + // stack: y, x, z and x, z and y AND - // stack: x and z, x and y, y, z - %stack (a: 2, b: 2) -> (b, a) - // stack: y, z, x and z, x and y - AND - // stack: y and z, x and z, x and y + // stack: y and x, z and x, z and y OR OR %endmacro diff --git a/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm b/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm index 5727498c..4f73fa79 100644 --- a/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm +++ b/evm/src/cpu/kernel/asm/hash/sha2/write_length.asm @@ -10,110 +10,24 @@ // stack: last_addr, length % (1 << 8), length, last_addr %mstore_kernel_general - // stack: length, last_addr - SWAP1 - %decrement - SWAP1 - // stack: length, last_addr - 1 - %shr_const(8) - // stack: length >> 8, last_addr - 1 - DUP1 - // stack: length >> 8, length >> 8, last_addr - 1 - %and_const(0xff) - // stack: (length >> 8) % (1 << 8), length >> 8, last_addr - 1 - DUP3 - // stack: last_addr - 1, (length >> 8) % (1 << 8), length >> 8, last_addr - 1 - %mstore_kernel_general - - // stack: length >> 8, last_addr - 1 - SWAP1 - %decrement - SWAP1 - // stack: length >> 8, last_addr - 2 - %shr_const(8) - // stack: length >> 16, last_addr - 2 - DUP1 - // stack: length >> 16, length >> 16, last_addr - 2 - %and_const(0xff) - // stack: (length >> 16) % (1 << 8), length >> 16, last_addr - 2 - DUP3 - // stack: last_addr - 2, (length >> 16) % (1 << 8), length >> 16, last_addr - 2 - %mstore_kernel_general + %rep 7 + // For i = 0 to 6 + // stack: length >> (8 * i), last_addr - i - 1 + SWAP1 + %decrement + SWAP1 + // stack: length >> (8 * i), last_addr - i - 2 + %div_const(256) // equivalent to %shr_const(8) + // stack: length >> (8 * (i + 1)), last_addr - i - 2 + DUP1 + // stack: length >> (8 * (i + 1)), length >> (8 * (i + 1)), last_addr - i - 2 + %mod_const(256) + // stack: (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2 + DUP3 + // stack: last_addr - i - 2, (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2 + %mstore_kernel_general + %endrep - // stack: length >> 16, last_addr - 2 - SWAP1 - %decrement - SWAP1 - // stack: length >> 16, last_addr - 3 - %shr_const(8) - // stack: length >> 24, last_addr - 3 - DUP1 - // stack: length >> 24, length >> 24, last_addr - 3 - %and_const(0xff) - // stack: (length >> 24) % (1 << 8), length >> 24, last_addr - 3 - DUP3 - // stack: last_addr - 3, (length >> 24) % (1 << 8), length >> 24, last_addr - 3 - %mstore_kernel_general - - // stack: length >> 24, last_addr - 3 - SWAP1 - %decrement - SWAP1 - // stack: length >> 24, last_addr - 4 - %shr_const(8) - // stack: length >> 32, last_addr - 4 - DUP1 - // stack: length >> 32, length >> 32, last_addr - 4 - %and_const(0xff) - // stack: (length >> 32) % (1 << 8), length >> 32, last_addr - 4 - DUP3 - // stack: last_addr - 4, (length >> 32) % (1 << 8), length >> 32, last_addr - 4 - %mstore_kernel_general - - // stack: length >> 32, last_addr - 4 - SWAP1 - %decrement - SWAP1 - // stack: length >> 32, last_addr - 5 - %shr_const(8) - // stack: length >> 40, last_addr - 5 - DUP1 - // stack: length >> 40, length >> 40, last_addr - 5 - %and_const(0xff) - // stack: (length >> 40) % (1 << 8), length >> 40, last_addr - 5 - DUP3 - // stack: last_addr - 5, (length >> 40) % (1 << 8), length >> 40, last_addr - 5 - %mstore_kernel_general - - // stack: length >> 40, last_addr - 5 - SWAP1 - %decrement - SWAP1 - // stack: length >> 40, last_addr - 6 - %shr_const(8) - // stack: length >> 48, last_addr - 6 - DUP1 - // stack: length >> 48, length >> 48, last_addr - 6 - %and_const(0xff) - // stack: (length >> 48) % (1 << 8), length >> 48, last_addr - 6 - DUP3 - // stack: last_addr - 6, (length >> 48) % (1 << 8), length >> 48, last_addr - 6 - %mstore_kernel_general - - // stack: length >> 48, last_addr - 6 - SWAP1 - %decrement - SWAP1 - // stack: length >> 48, last_addr - 7 - %shr_const(8) - // stack: length >> 56, last_addr - 7 - DUP1 - // stack: length >> 56, length >> 56, last_addr - 7 - %and_const(0xff) - // stack: (length >> 56) % (1 << 8), length >> 56, last_addr - 7 - DUP3 - // stack: last_addr - 7, (length >> 56) % (1 << 8), length >> 56, last_addr - 7 - %mstore_kernel_general %pop2 // stack: (empty) %endmacro diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index 8f59a128..24f35ada 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -419,21 +419,3 @@ %mstore_kernel_general_2 // stack: (empty) %endmacro - -%macro mload_main - // stack: offset - DUP1 - // stack: offset, offset - %update_msize - // stack: offset - %mload_current(@SEGMENT_MAIN_MEMORY) -%endmacro - -%macro mstore_main - // stack: offset, value - DUP1 - // stack: offset, offset, value - %update_msize - // stack: offset, value - %mstore_current(@SEGMENT_MAIN_MEMORY) -%endmacro diff --git a/evm/src/cpu/kernel/asm/memory/metadata.asm b/evm/src/cpu/kernel/asm/memory/metadata.asm index 7ea6d9e5..89b39707 100644 --- a/evm/src/cpu/kernel/asm/memory/metadata.asm +++ b/evm/src/cpu/kernel/asm/memory/metadata.asm @@ -39,6 +39,8 @@ %endmacro global sys_address: + // stack: kexit_info + %charge_gas_const(@GAS_BASE) // stack: kexit_info %address // stack: address, kexit_info @@ -50,6 +52,8 @@ global sys_address: %endmacro global sys_caller: + // stack: kexit_info + %charge_gas_const(@GAS_BASE) // stack: kexit_info %caller // stack: caller, kexit_info @@ -65,6 +69,8 @@ global sys_caller: %endmacro global sys_codesize: + // stack: kexit_info + %charge_gas_const(@GAS_BASE) // stack: kexit_info %codesize // stack: codesize, kexit_info @@ -72,34 +78,95 @@ global sys_codesize: EXIT_KERNEL global sys_callvalue: + // stack: kexit_info + %charge_gas_const(@GAS_BASE) // stack: kexit_info %callvalue // stack: callvalue, kexit_info SWAP1 EXIT_KERNEL +%macro mem_words + %mload_context_metadata(@CTX_METADATA_MEM_WORDS) +%endmacro + %macro msize - %mload_context_metadata(@CTX_METADATA_MSIZE) + %mem_words + %mul_const(32) %endmacro global sys_msize: + // stack: kexit_info + %charge_gas_const(@GAS_BASE) // stack: kexit_info %msize // stack: msize, kexit_info SWAP1 EXIT_KERNEL -%macro update_msize - // stack: offset - %add_const(32) - // stack: 32 + offset - %div_const(32) - // stack: (offset+32)/32 = ceil_div_usize(offset+1, 32) - %mul_const(32) - // stack: ceil_div_usize(offset+1, 32) * 32 - %msize - // stack: current_msize, ceil_div_usize(offset+1, 32) * 32 - %max - // stack: new_msize - %mstore_context_metadata(@CTX_METADATA_MSIZE) +%macro update_mem_words + // stack: num_words, kexit_info + %mem_words + // stack: old_num_words, num_words, kexit_info + DUP2 DUP2 GT + // stack: old_num_words > num_words, old_num_words, num_words, kexit_info + %jumpi(%%end) + // stack: old_num_words, num_words, kexit_info + %memory_cost + // stack: old_cost, num_words, kexit_info + SWAP1 + // stack: num_words, old_cost, kexit_info + DUP1 %mstore_context_metadata(@CTX_METADATA_MEM_WORDS) + // stack: num_words, old_cost, kexit_info + %memory_cost + // stack: new_cost, old_cost, kexit_info + SUB + // stack: additional_cost, kexit_info + %charge_gas +%%end: + // stack: kexit_info +%endmacro + +%macro update_mem_bytes + // stack: num_bytes, kexit_info + %num_bytes_to_num_words + // stack: num_words, kexit_info + %update_mem_words + // stack: kexit_info +%endmacro + +%macro num_bytes_to_num_words + // stack: num_bytes + %add_const(31) + // stack: 31 + num_bytes + %div_const(32) + // stack: (num_bytes + 31) / 32 +%endmacro + +%macro memory_cost + // stack: num_words + DUP1 + // stack: num_words, msize + %mul_const(@GAS_MEMORY) + // stack: num_words * GAS_MEMORY, msize + SWAP1 + // stack: num_words, num_words * GAS_MEMORY + %square + %div_const(512) + // stack: num_words^2 / 512, num_words * GAS_MEMORY + ADD + // stack: cost = num_words^2 / 512 + num_words * GAS_MEMORY +%endmacro + +// Faults if the given offset is "unreasonable", i.e. the associated memory expansion cost +// would exceed any reasonable block limit. +// We do this to avoid overflows in future gas-related calculations. +%macro ensure_reasonable_offset + // stack: offset + // The memory expansion cost, (50000000 / 32)^2 / 512, is around 2^32 gas, + // i.e. greater than any reasonable block limit. + %gt_const(50000000) + // stack: is_unreasonable + %jumpi(fault_exception) + // stack: (empty) %endmacro diff --git a/evm/src/cpu/kernel/asm/memory/syscalls.asm b/evm/src/cpu/kernel/asm/memory/syscalls.asm index 3b56a7fd..3045be6d 100644 --- a/evm/src/cpu/kernel/asm/memory/syscalls.asm +++ b/evm/src/cpu/kernel/asm/memory/syscalls.asm @@ -1,4 +1,12 @@ global sys_mload: + // stack: kexit_info, offset + DUP2 %ensure_reasonable_offset + // stack: kexit_info, offset + %charge_gas_const(@GAS_VERYLOW) + // stack: kexit_info, offset + DUP2 %add_const(32) + // stack: expanded_num_bytes, kexit_info, offset + %update_mem_bytes // stack: kexit_info, offset PUSH 0 // acc = 0 // stack: acc, kexit_info, offset @@ -38,6 +46,14 @@ global sys_mload: EXIT_KERNEL global sys_mstore: + // stack: kexit_info, offset, value + DUP2 %ensure_reasonable_offset + // stack: kexit_info, offset, value + %charge_gas_const(@GAS_VERYLOW) + // stack: kexit_info, offset, value + DUP2 %add_const(32) + // stack: expanded_num_bytes, kexit_info, offset, value + %update_mem_bytes // stack: kexit_info, offset, value DUP3 PUSH 0 BYTE DUP3 %add_const( 0) %mstore_current(@SEGMENT_MAIN_MEMORY) DUP3 PUSH 1 BYTE DUP3 %add_const( 1) %mstore_current(@SEGMENT_MAIN_MEMORY) @@ -75,8 +91,30 @@ global sys_mstore: EXIT_KERNEL global sys_mstore8: + // stack: kexit_info, offset, value + DUP2 %ensure_reasonable_offset + // stack: kexit_info, offset, value + %charge_gas_const(@GAS_VERYLOW) + // stack: kexit_info, offset, value + DUP2 %increment + // stack: expanded_num_bytes, kexit_info, offset, value + %update_mem_bytes // stack: kexit_info, offset, value %stack (kexit_info, offset, value) -> (offset, value, kexit_info) %mstore_current(@SEGMENT_MAIN_MEMORY) // stack: kexit_info EXIT_KERNEL + +global sys_calldataload: + // stack: kexit_info, i + %charge_gas_const(@GAS_VERYLOW) + // stack: kexit_info, i + %stack (kexit_info, i) -> (@SEGMENT_CALLDATA, i, 32, sys_calldataload_after_mload_packing, kexit_info) + GET_CONTEXT + // stack: ADDR: 3, 32, sys_calldataload_after_mload_packing, kexit_info + %jump(mload_packing) +sys_calldataload_after_mload_packing: + // stack: value, kexit_info + SWAP1 + EXIT_KERNEL + PANIC diff --git a/evm/src/cpu/kernel/asm/mpt/accounts.asm b/evm/src/cpu/kernel/asm/mpt/accounts.asm index 08291048..050dbb41 100644 --- a/evm/src/cpu/kernel/asm/mpt/accounts.asm +++ b/evm/src/cpu/kernel/asm/mpt/accounts.asm @@ -38,8 +38,8 @@ global make_account_copy: DUP2 %mload_trie_data %append_to_trie_data DUP2 %add_const(1) %mload_trie_data %append_to_trie_data - DUP2 %add_const(3) %mload_trie_data %append_to_trie_data - SWAP1 %add_const(4) %mload_trie_data %append_to_trie_data + DUP2 %add_const(2) %mload_trie_data %append_to_trie_data + SWAP1 %add_const(3) %mload_trie_data %append_to_trie_data // stack: new_account_ptr, retdest SWAP1 diff --git a/evm/src/cpu/kernel/asm/mpt/hash/hash.asm b/evm/src/cpu/kernel/asm/mpt/hash/hash.asm index 0c8beae7..4209f06c 100644 --- a/evm/src/cpu/kernel/asm/mpt/hash/hash.asm +++ b/evm/src/cpu/kernel/asm/mpt/hash/hash.asm @@ -118,16 +118,19 @@ global encode_node_empty: // stack: node_type, node_payload_ptr, encode_value, retdest %pop3 // stack: retdest - // An empty node is encoded as a single byte, 0x80, which is the RLP - // encoding of the empty string. Write this byte to RLP[0] and return - // (0, 1). + // An empty node is encoded as a single byte, 0x80, which is the RLP encoding of the empty string. + // TODO: Write this byte just once to RLP memory, then we can always return (0, 1). + %alloc_rlp_block + // stack: rlp_pos, retdest PUSH 0x80 - PUSH 0 + // stack: 0x80, rlp_pos, retdest + DUP2 + // stack: rlp_pos, 0x80, rlp_pos, retdest %mstore_rlp - %stack (retdest) -> (retdest, 0, 1) + %stack (rlp_pos, retdest) -> (retdest, rlp_pos, 1) JUMP -encode_node_branch: +global encode_node_branch: // stack: node_type, node_payload_ptr, encode_value, retdest POP // stack: node_payload_ptr, encode_value, retdest @@ -135,6 +138,7 @@ encode_node_branch: // Get the next unused offset within the encoded child buffers. // Then immediately increment the next unused offset by 16, so any // recursive calls will use nonoverlapping offsets. + // TODO: Allocate a block of RLP memory instead? %mload_global_metadata(@GLOBAL_METADATA_TRIE_ENCODED_CHILD_SIZE) DUP1 %add_const(16) %mstore_global_metadata(@GLOBAL_METADATA_TRIE_ENCODED_CHILD_SIZE) @@ -150,41 +154,41 @@ encode_node_branch: // stack: base_offset, node_payload_ptr, encode_value, retdest // Now, append each child to our RLP tape. - PUSH 9 // rlp_pos; we start at 9 to leave room to prepend a list prefix + %alloc_rlp_block DUP1 + // stack: rlp_pos, rlp_start, base_offset, node_payload_ptr, encode_value, retdest %append_child(0) %append_child(1) %append_child(2) %append_child(3) %append_child(4) %append_child(5) %append_child(6) %append_child(7) %append_child(8) %append_child(9) %append_child(10) %append_child(11) %append_child(12) %append_child(13) %append_child(14) %append_child(15) - // stack: rlp_pos', base_offset, node_payload_ptr, encode_value, retdest + // stack: rlp_pos', rlp_start, base_offset, node_payload_ptr, encode_value, retdest - // We no longer need base_offset. - SWAP1 - POP - - // stack: rlp_pos', node_payload_ptr, encode_value, retdest - SWAP1 + %stack (rlp_pos, rlp_start, base_offset, node_payload_ptr) + -> (node_payload_ptr, rlp_pos, rlp_start) %add_const(16) - // stack: value_ptr_ptr, rlp_pos', encode_value, retdest + // stack: value_ptr_ptr, rlp_pos', rlp_start, encode_value, retdest %mload_trie_data - // stack: value_ptr, rlp_pos', encode_value, retdest + // stack: value_ptr, rlp_pos', rlp_start, encode_value, retdest DUP1 %jumpi(encode_node_branch_with_value) + // No value; append the empty string (0x80). - // stack: value_ptr, rlp_pos', encode_value, retdest - %stack (value_ptr, rlp_pos, encode_value) -> (rlp_pos, 0x80, rlp_pos) + // stack: value_ptr, rlp_pos', rlp_start, encode_value, retdest + %stack (value_ptr, rlp_pos, rlp_start, encode_value) -> (rlp_pos, 0x80, rlp_pos, rlp_start) %mstore_rlp - // stack: rlp_pos', retdest + // stack: rlp_pos', rlp_start, retdest %increment - // stack: rlp_pos'', retdest + // stack: rlp_pos'', rlp_start, retdest %jump(encode_node_branch_prepend_prefix) encode_node_branch_with_value: - // stack: value_ptr, rlp_pos', encode_value, retdest - %stack (value_ptr, rlp_pos, encode_value) - -> (encode_value, rlp_pos, value_ptr, encode_node_branch_prepend_prefix) + // stack: value_ptr, rlp_pos', rlp_start, encode_value, retdest + %stack (value_ptr, rlp_pos, rlp_start, encode_value) + -> (encode_value, rlp_pos, value_ptr, encode_node_branch_prepend_prefix, rlp_start) JUMP // call encode_value encode_node_branch_prepend_prefix: - // stack: rlp_pos'', retdest + // stack: rlp_pos'', rlp_start, retdest %prepend_rlp_list_prefix - %stack (start_pos, rlp_len, retdest) -> (retdest, start_pos, rlp_len) + // stack: rlp_prefix_start, rlp_len, retdest + %stack (rlp_prefix_start, rlp_len, retdest) + -> (retdest, rlp_prefix_start, rlp_len) JUMP // Part of the encode_node_branch function. Encodes the i'th child. @@ -208,27 +212,28 @@ encode_node_branch_prepend_prefix: // Part of the encode_node_branch function. Appends the i'th child's RLP. %macro append_child(i) - // stack: rlp_pos, base_offset, node_payload_ptr, encode_value, retdest - DUP2 %add_const($i) %mload_kernel(@SEGMENT_TRIE_ENCODED_CHILD) // load result - DUP3 %add_const($i) %mload_kernel(@SEGMENT_TRIE_ENCODED_CHILD_LEN) // load result_len - // stack: result_len, result, rlp_pos, base_offset, node_payload_ptr, encode_value, retdest + // stack: rlp_pos, rlp_start, base_offset, node_payload_ptr, encode_value, retdest + DUP3 %add_const($i) %mload_kernel(@SEGMENT_TRIE_ENCODED_CHILD) // load result + DUP4 %add_const($i) %mload_kernel(@SEGMENT_TRIE_ENCODED_CHILD_LEN) // load result_len + // stack: result_len, result, rlp_pos, rlp_start, base_offset, node_payload_ptr, encode_value, retdest // If result_len != 32, result is raw RLP, with an appropriate RLP prefix already. DUP1 %sub_const(32) %jumpi(%%unpack) // Otherwise, result is a hash, and we need to add the prefix 0x80 + 32 = 160. - // stack: result_len, result, rlp_pos, base_offset, node_payload_ptr, encode_value, retdest + // stack: result_len, result, rlp_pos, rlp_start, base_offset, node_payload_ptr, encode_value, retdest PUSH 160 DUP4 // rlp_pos %mstore_rlp SWAP2 %increment SWAP2 // rlp_pos += 1 %%unpack: - %stack (result_len, result, rlp_pos, base_offset, node_payload_ptr, encode_value, retdest) - -> (rlp_pos, result, result_len, %%after_unpacking, base_offset, node_payload_ptr, encode_value, retdest) + %stack (result_len, result, rlp_pos, rlp_start, base_offset, node_payload_ptr, encode_value, retdest) + -> (rlp_pos, result, result_len, %%after_unpacking, + rlp_start, base_offset, node_payload_ptr, encode_value, retdest) %jump(mstore_unpacking_rlp) %%after_unpacking: - // stack: rlp_pos', base_offset, node_payload_ptr, encode_value, retdest + // stack: rlp_pos', rlp_start, base_offset, node_payload_ptr, encode_value, retdest %endmacro -encode_node_extension: +global encode_node_extension: // stack: node_type, node_payload_ptr, encode_value, retdest %stack (node_type, node_payload_ptr, encode_value) -> (node_payload_ptr, encode_value, encode_node_extension_after_encode_child, node_payload_ptr) @@ -237,61 +242,66 @@ encode_node_extension: %jump(encode_or_hash_node) encode_node_extension_after_encode_child: // stack: result, result_len, node_payload_ptr, retdest + %alloc_rlp_block + // stack: rlp_start, result, result_len, node_payload_ptr, retdest PUSH encode_node_extension_after_hex_prefix // retdest PUSH 0 // terminated - // stack: terminated, encode_node_extension_after_hex_prefix, result, result_len, node_payload_ptr, retdest - DUP5 %increment %mload_trie_data // Load the packed_nibbles field, which is at index 1. - // stack: packed_nibbles, terminated, encode_node_extension_after_hex_prefix, result, result_len, node_payload_ptr, retdest - DUP6 %mload_trie_data // Load the num_nibbles field, which is at index 0. - // stack: num_nibbles, packed_nibbles, terminated, encode_node_extension_after_hex_prefix, result, result_len, node_payload_ptr, retdest - PUSH 9 // We start at 9 to leave room to prepend the largest possible RLP list header. - // stack: rlp_start, num_nibbles, packed_nibbles, terminated, encode_node_extension_after_hex_prefix, result, result_len, node_payload_ptr, retdest + // stack: terminated, encode_node_extension_after_hex_prefix, rlp_start, result, result_len, node_payload_ptr, retdest + DUP6 %increment %mload_trie_data // Load the packed_nibbles field, which is at index 1. + // stack: packed_nibbles, terminated, encode_node_extension_after_hex_prefix, rlp_start, result, result_len, node_payload_ptr, retdest + DUP7 %mload_trie_data // Load the num_nibbles field, which is at index 0. + // stack: num_nibbles, packed_nibbles, terminated, encode_node_extension_after_hex_prefix, rlp_start, result, result_len, node_payload_ptr, retdest + DUP5 + // stack: rlp_start, num_nibbles, packed_nibbles, terminated, encode_node_extension_after_hex_prefix, rlp_start, result, result_len, node_payload_ptr, retdest %jump(hex_prefix_rlp) encode_node_extension_after_hex_prefix: - // stack: rlp_pos, result, result_len, node_payload_ptr, retdest + // stack: rlp_pos, rlp_start, result, result_len, node_payload_ptr, retdest // If result_len != 32, result is raw RLP, with an appropriate RLP prefix already. - DUP3 %sub_const(32) %jumpi(encode_node_extension_unpack) + DUP4 %sub_const(32) %jumpi(encode_node_extension_unpack) // Otherwise, result is a hash, and we need to add the prefix 0x80 + 32 = 160. PUSH 160 DUP2 // rlp_pos %mstore_rlp %increment // rlp_pos += 1 encode_node_extension_unpack: - %stack (rlp_pos, result, result_len, node_payload_ptr) - -> (rlp_pos, result, result_len, encode_node_extension_after_unpacking) + %stack (rlp_pos, rlp_start, result, result_len, node_payload_ptr) + -> (rlp_pos, result, result_len, encode_node_extension_after_unpacking, rlp_start) %jump(mstore_unpacking_rlp) encode_node_extension_after_unpacking: - // stack: rlp_end_pos, retdest + // stack: rlp_pos, rlp_start, retdest %prepend_rlp_list_prefix - %stack (rlp_start_pos, rlp_len, retdest) -> (retdest, rlp_start_pos, rlp_len) + %stack (rlp_prefix_start_pos, rlp_len, retdest) + -> (retdest, rlp_prefix_start_pos, rlp_len) JUMP -encode_node_leaf: +global encode_node_leaf: // stack: node_type, node_payload_ptr, encode_value, retdest POP // stack: node_payload_ptr, encode_value, retdest + %alloc_rlp_block PUSH encode_node_leaf_after_hex_prefix // retdest PUSH 1 // terminated - // stack: terminated, encode_node_leaf_after_hex_prefix, node_payload_ptr, encode_value, retdest - DUP3 %increment %mload_trie_data // Load the packed_nibbles field, which is at index 1. - // stack: packed_nibbles, terminated, encode_node_leaf_after_hex_prefix, node_payload_ptr, encode_value, retdest - DUP4 %mload_trie_data // Load the num_nibbles field, which is at index 0. - // stack: num_nibbles, packed_nibbles, terminated, encode_node_leaf_after_hex_prefix, node_payload_ptr, encode_value, retdest - PUSH 9 // We start at 9 to leave room to prepend the largest possible RLP list header. - // stack: rlp_start, num_nibbles, packed_nibbles, terminated, encode_node_leaf_after_hex_prefix, node_payload_ptr, encode_value, retdest + // stack: terminated, encode_node_leaf_after_hex_prefix, rlp_start, node_payload_ptr, encode_value, retdest + DUP4 %increment %mload_trie_data // Load the packed_nibbles field, which is at index 1. + // stack: packed_nibbles, terminated, encode_node_leaf_after_hex_prefix, rlp_start, node_payload_ptr, encode_value, retdest + DUP5 %mload_trie_data // Load the num_nibbles field, which is at index 0. + // stack: num_nibbles, packed_nibbles, terminated, encode_node_leaf_after_hex_prefix, rlp_start, node_payload_ptr, encode_value, retdest + DUP5 + // stack: rlp_start, num_nibbles, packed_nibbles, terminated, encode_node_leaf_after_hex_prefix, rlp_start, node_payload_ptr, encode_value, retdest %jump(hex_prefix_rlp) encode_node_leaf_after_hex_prefix: - // stack: rlp_pos, node_payload_ptr, encode_value, retdest - SWAP1 + // stack: rlp_pos, rlp_start, node_payload_ptr, encode_value, retdest + SWAP2 %add_const(2) // The value pointer starts at index 3, after num_nibbles and packed_nibbles. - // stack: value_ptr_ptr, rlp_pos, encode_value, retdest + // stack: value_ptr_ptr, rlp_start, rlp_pos, encode_value, retdest %mload_trie_data - // stack: value_ptr, rlp_pos, encode_value, retdest - %stack (value_ptr, rlp_pos, encode_value, retdest) - -> (encode_value, rlp_pos, value_ptr, encode_node_leaf_after_encode_value, retdest) + // stack: value_ptr, rlp_start, rlp_pos, encode_value, retdest + %stack (value_ptr, rlp_start, rlp_pos, encode_value, retdest) + -> (encode_value, rlp_pos, value_ptr, encode_node_leaf_after_encode_value, rlp_start, retdest) JUMP encode_node_leaf_after_encode_value: - // stack: rlp_end_pos, retdest + // stack: rlp_end_pos, rlp_start, retdest %prepend_rlp_list_prefix - %stack (rlp_start_pos, rlp_len, retdest) -> (retdest, rlp_start_pos, rlp_len) + %stack (rlp_prefix_start_pos, rlp_len, retdest) + -> (retdest, rlp_prefix_start_pos, rlp_len) JUMP diff --git a/evm/src/cpu/kernel/asm/mpt/hash/hash_trie_specific.asm b/evm/src/cpu/kernel/asm/mpt/hash/hash_trie_specific.asm index 15f33cee..3662ee04 100644 --- a/evm/src/cpu/kernel/asm/mpt/hash/hash_trie_specific.asm +++ b/evm/src/cpu/kernel/asm/mpt/hash/hash_trie_specific.asm @@ -13,6 +13,17 @@ global mpt_hash_state_trie: %%after: %endmacro +global mpt_hash_storage_trie: + // stack: node_ptr, retdest + %stack (node_ptr) -> (node_ptr, encode_storage_value) + %jump(mpt_hash) + +%macro mpt_hash_storage_trie + PUSH %%after + %jump(mpt_hash_storage_trie) +%%after: +%endmacro + global mpt_hash_txn_trie: // stack: retdest PUSH encode_txn @@ -96,6 +107,12 @@ global encode_receipt: global encode_storage_value: // stack: rlp_pos, value_ptr, retdest + SWAP1 %mload_trie_data SWAP1 + // stack: rlp_pos, value, retdest + // The YP says storage trie is a map "... to the RLP-encoded 256-bit integer values" + // which seems to imply that this should be %encode_rlp_256. But %encode_rlp_scalar + // causes the tests to pass, so it seems storage values should be treated as variable- + // length after all. %encode_rlp_scalar // stack: rlp_pos', retdest SWAP1 diff --git a/evm/src/cpu/kernel/asm/mpt/read.asm b/evm/src/cpu/kernel/asm/mpt/read.asm index 08aa02c3..94cfbaf5 100644 --- a/evm/src/cpu/kernel/asm/mpt/read.asm +++ b/evm/src/cpu/kernel/asm/mpt/read.asm @@ -23,6 +23,7 @@ global mpt_read_state_trie: // - the virtual address of the trie to search in // - the number of nibbles in the key (should start at 64) // - the key, as a U256 +// - return destination // // This function returns a pointer to the value, or 0 if the key is not found. global mpt_read: @@ -43,13 +44,13 @@ global mpt_read: // it means the prover failed to provide necessary Merkle data, so panic. PANIC -mpt_read_empty: +global mpt_read_empty: // Return 0 to indicate that the value was not found. %stack (node_type, node_payload_ptr, num_nibbles, key, retdest) -> (retdest, 0) JUMP -mpt_read_branch: +global mpt_read_branch: // stack: node_type, node_payload_ptr, num_nibbles, key, retdest POP // stack: node_payload_ptr, num_nibbles, key, retdest @@ -71,7 +72,7 @@ mpt_read_branch: // stack: child_ptr, num_nibbles, key, retdest %jump(mpt_read) // recurse -mpt_read_branch_end_of_key: +global mpt_read_branch_end_of_key: %stack (node_payload_ptr, num_nibbles, key, retdest) -> (node_payload_ptr, retdest) // stack: node_payload_ptr, retdest %add_const(16) // skip over the 16 child nodes @@ -81,7 +82,7 @@ mpt_read_branch_end_of_key: SWAP1 JUMP -mpt_read_extension: +global mpt_read_extension: // stack: node_type, node_payload_ptr, num_nibbles, key, retdest %stack (node_type, node_payload_ptr, num_nibbles, key) -> (num_nibbles, key, node_payload_ptr) @@ -100,8 +101,9 @@ mpt_read_extension: // stack: node_key, key_part, key_part, future_nibbles, key, node_payload_ptr, retdest EQ // does the first part of our key match the node's key? %jumpi(mpt_read_extension_found) +global mpt_read_extension_not_found: // Not found; return 0. - %stack (key_part, future_nibbles, node_payload_ptr, retdest) -> (retdest, 0) + %stack (key_part, future_nibbles, key, node_payload_ptr, retdest) -> (retdest, 0) JUMP mpt_read_extension_found: // stack: key_part, future_nibbles, key, node_payload_ptr, retdest @@ -135,6 +137,7 @@ mpt_read_leaf: AND // stack: keys_match && num_nibbles_match, node_payload_ptr, retdest %jumpi(mpt_read_leaf_found) +global mpt_read_leaf_not_found: // Not found; return 0. %stack (node_payload_ptr, retdest) -> (retdest, 0) JUMP diff --git a/evm/src/cpu/kernel/asm/mpt/storage/storage_read.asm b/evm/src/cpu/kernel/asm/mpt/storage/storage_read.asm index e93b333f..cf7ed1e9 100644 --- a/evm/src/cpu/kernel/asm/mpt/storage/storage_read.asm +++ b/evm/src/cpu/kernel/asm/mpt/storage/storage_read.asm @@ -5,6 +5,7 @@ global sys_sload: // stack: kexit_info, slot + // TODO: Charge gas. SWAP1 // stack: slot, kexit_info %stack (slot) -> (slot, after_storage_read) diff --git a/evm/src/cpu/kernel/asm/mpt/util.asm b/evm/src/cpu/kernel/asm/mpt/util.asm index 0f7689e1..c7662c41 100644 --- a/evm/src/cpu/kernel/asm/mpt/util.asm +++ b/evm/src/cpu/kernel/asm/mpt/util.asm @@ -10,6 +10,23 @@ // stack: (empty) %endmacro +%macro alloc_rlp_block + // stack: (empty) + %mload_global_metadata(@GLOBAL_METADATA_RLP_DATA_SIZE) + // stack: block_start + // In our model it's fine to use memory in a sparse way, as long as the gaps aren't larger than + // 2^16 or so. So instead of the caller specifying the size of the block they need, we'll just + // allocate 0x10000 = 2^16 bytes, much larger than any RLP blob the EVM could possibly create. + DUP1 %add_const(0x10000) + // stack: block_end, block_start + %mstore_global_metadata(@GLOBAL_METADATA_RLP_DATA_SIZE) + // stack: block_start + // We leave an extra 9 bytes, so that callers can later prepend a prefix before block_start. + // (9 is the length of the longest possible RLP list prefix.) + %add_const(9) + // stack: block_start +%endmacro + %macro get_trie_data_size // stack: (empty) %mload_global_metadata(@GLOBAL_METADATA_TRIE_DATA_SIZE) diff --git a/evm/src/cpu/kernel/asm/rlp/encode.asm b/evm/src/cpu/kernel/asm/rlp/encode.asm index bc69f444..8254e3e8 100644 --- a/evm/src/cpu/kernel/asm/rlp/encode.asm +++ b/evm/src/cpu/kernel/asm/rlp/encode.asm @@ -196,66 +196,65 @@ encode_rlp_list_prefix_large_done_writing_len: %%after: %endmacro -// Given an RLP list payload which starts at position 9 and ends at the given -// position, prepend the appropriate RLP list prefix. Returns the updated start -// position, as well as the length of the RLP data (including the newly-added -// prefix). +// Given an RLP list payload which starts and ends at the given positions, +// prepend the appropriate RLP list prefix. Returns the updated start position, +// as well as the length of the RLP data (including the newly-added prefix). // -// (We sometimes start list payloads at position 9 because 9 is the length of -// the longest possible RLP list prefix.) -// -// Pre stack: end_pos, retdest -// Post stack: start_pos, rlp_len +// Pre stack: end_pos, start_pos, retdest +// Post stack: prefix_start_pos, rlp_len global prepend_rlp_list_prefix: - // stack: end_pos, retdest - // Since the list payload starts at position 9, payload_len = end_pos - 9. - PUSH 9 DUP2 SUB - // stack: payload_len, end_pos, retdest + // stack: end_pos, start_pos, retdest + DUP2 DUP2 SUB // end_pos - start_pos + // stack: payload_len, end_pos, start_pos, retdest DUP1 %gt_const(55) %jumpi(prepend_rlp_list_prefix_big) // If we got here, we have a small list, so we prepend 0xc0 + len at position 8. - // stack: payload_len, end_pos, retdest - %add_const(0xc0) - // stack: prefix_byte, end_pos, retdest - PUSH 8 // offset + // stack: payload_len, end_pos, start_pos, retdest + DUP1 %add_const(0xc0) + // stack: prefix_byte, payload_len, end_pos, start_pos, retdest + DUP4 %decrement // offset of prefix %mstore_rlp - // stack: end_pos, retdest - %sub_const(8) - // stack: rlp_len, retdest - PUSH 8 // start_pos - %stack (start_pos, rlp_len, retdest) -> (retdest, start_pos, rlp_len) + // stack: payload_len, end_pos, start_pos, retdest + %increment + // stack: rlp_len, end_pos, start_pos, retdest + SWAP2 %decrement + // stack: prefix_start_pos, end_pos, rlp_len, retdest + %stack (prefix_start_pos, end_pos, rlp_len, retdest) -> (retdest, prefix_start_pos, rlp_len) JUMP prepend_rlp_list_prefix_big: // We have a large list, so we prepend 0xf7 + len_of_len at position - // 8 - len_of_len, followed by the length itself. - // stack: payload_len, end_pos, retdest + // prefix_start_pos = start_pos - 1 - len_of_len + // followed by the length itself. + // stack: payload_len, end_pos, start_pos, retdest DUP1 %num_bytes - // stack: len_of_len, payload_len, end_pos, retdest + // stack: len_of_len, payload_len, end_pos, start_pos, retdest DUP1 - PUSH 8 + DUP5 %decrement // start_pos - 1 SUB - // stack: start_pos, len_of_len, payload_len, end_pos, retdest - DUP2 %add_const(0xf7) DUP2 %mstore_rlp // rlp[start_pos] = 0xf7 + len_of_len - DUP1 %increment // start_len_pos = start_pos + 1 - %stack (start_len_pos, start_pos, len_of_len, payload_len, end_pos, retdest) + // stack: prefix_start_pos, len_of_len, payload_len, end_pos, start_pos, retdest + DUP2 %add_const(0xf7) DUP2 %mstore_rlp // rlp[prefix_start_pos] = 0xf7 + len_of_len + // stack: prefix_start_pos, len_of_len, payload_len, end_pos, start_pos, retdest + DUP1 %increment // start_len_pos = prefix_start_pos + 1 + %stack (start_len_pos, prefix_start_pos, len_of_len, payload_len, end_pos, start_pos, retdest) -> (start_len_pos, payload_len, len_of_len, prepend_rlp_list_prefix_big_done_writing_len, - start_pos, end_pos, retdest) + prefix_start_pos, end_pos, retdest) %jump(mstore_unpacking_rlp) prepend_rlp_list_prefix_big_done_writing_len: - // stack: 9, start_pos, end_pos, retdest - %stack (_9, start_pos, end_pos) -> (end_pos, start_pos, start_pos) - // stack: end_pos, start_pos, start_pos, retdest + // stack: start_pos, prefix_start_pos, end_pos, retdest + %stack (start_pos, prefix_start_pos, end_pos) + -> (end_pos, prefix_start_pos, prefix_start_pos) + // stack: end_pos, prefix_start_pos, prefix_start_pos, retdest SUB - // stack: rlp_len, start_pos, retdest - %stack (rlp_len, start_pos, retdest) -> (retdest, start_pos, rlp_len) + // stack: rlp_len, prefix_start_pos, retdest + %stack (rlp_len, prefix_start_pos, retdest) -> (retdest, prefix_start_pos, rlp_len) JUMP // Convenience macro to call prepend_rlp_list_prefix and return where we left off. %macro prepend_rlp_list_prefix - %stack (end_pos) -> (end_pos, %%after) + %stack (end_pos, start_pos) -> (end_pos, start_pos, %%after) %jump(prepend_rlp_list_prefix) %%after: %endmacro diff --git a/evm/src/cpu/kernel/asm/transactions/type_0.asm b/evm/src/cpu/kernel/asm/transactions/type_0.asm index e9aedca0..d00b10d4 100644 --- a/evm/src/cpu/kernel/asm/transactions/type_0.asm +++ b/evm/src/cpu/kernel/asm/transactions/type_0.asm @@ -84,62 +84,64 @@ type_0_compute_signed_data: // otherwise, it is // keccak256(rlp([nonce, gas_price, gas_limit, to, value, data])) + %alloc_rlp_block + // stack: rlp_start, retdest %mload_txn_field(@TXN_FIELD_NONCE) - // stack: nonce, retdest - PUSH 9 // We start at 9 to leave room to prepend the largest possible RLP list header. - // stack: rlp_pos, nonce, retdest + // stack: nonce, rlp_start, retdest + DUP2 + // stack: rlp_pos, nonce, rlp_start, retdest %encode_rlp_scalar - // stack: rlp_pos, retdest + // stack: rlp_pos, rlp_start, retdest %mload_txn_field(@TXN_FIELD_MAX_FEE_PER_GAS) SWAP1 %encode_rlp_scalar - // stack: rlp_pos, retdest + // stack: rlp_pos, rlp_start, retdest %mload_txn_field(@TXN_FIELD_GAS_LIMIT) SWAP1 %encode_rlp_scalar - // stack: rlp_pos, retdest + // stack: rlp_pos, rlp_start, retdest %mload_txn_field(@TXN_FIELD_TO) SWAP1 %encode_rlp_160 - // stack: rlp_pos, retdest + // stack: rlp_pos, rlp_start, retdest %mload_txn_field(@TXN_FIELD_VALUE) SWAP1 %encode_rlp_scalar - // stack: rlp_pos, retdest + // stack: rlp_pos, rlp_start, retdest // Encode txn data. %mload_txn_field(@TXN_FIELD_DATA_LEN) PUSH 0 // ADDR.virt PUSH @SEGMENT_TXN_DATA PUSH 0 // ADDR.context - // stack: ADDR: 3, len, rlp_pos, retdest + // stack: ADDR: 3, len, rlp_pos, rlp_start, retdest PUSH after_serializing_txn_data - // stack: after_serializing_txn_data, ADDR: 3, len, rlp_pos, retdest + // stack: after_serializing_txn_data, ADDR: 3, len, rlp_pos, rlp_start, retdest SWAP5 - // stack: rlp_pos, ADDR: 3, len, after_serializing_txn_data, retdest + // stack: rlp_pos, ADDR: 3, len, after_serializing_txn_data, rlp_start, retdest %jump(encode_rlp_string) after_serializing_txn_data: - // stack: rlp_pos, retdest + // stack: rlp_pos, rlp_start, retdest %mload_txn_field(@TXN_FIELD_CHAIN_ID_PRESENT) ISZERO %jumpi(finish_rlp_list) - // stack: rlp_pos, retdest + // stack: rlp_pos, rlp_start, retdest %mload_txn_field(@TXN_FIELD_CHAIN_ID) SWAP1 %encode_rlp_scalar - // stack: rlp_pos, retdest + // stack: rlp_pos, rlp_start, retdest PUSH 0 SWAP1 %encode_rlp_scalar - // stack: rlp_pos, retdest + // stack: rlp_pos, rlp_start, retdest PUSH 0 SWAP1 %encode_rlp_scalar - // stack: rlp_pos, retdest + // stack: rlp_pos, rlp_start, retdest finish_rlp_list: %prepend_rlp_list_prefix - // stack: start_pos, rlp_len, retdest + // stack: prefix_start_pos, rlp_len, retdest PUSH @SEGMENT_RLP_RAW PUSH 0 // context // stack: ADDR: 3, rlp_len, retdest diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index e81993b0..476f1c74 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -345,24 +345,3 @@ %endrep // stack: a || b || c || d %endmacro - -// Charge gas. -// Arguments: -// stack[0]: gas to be charged -// stack[1]: syscall info -// Returns: -// new syscall info -%macro charge_gas - %shl_const(192) - ADD -%endmacro - -// Charge gas and exit kernel code. -// Arguments: -// stack[0]: gas to be charged -// stack[1]: syscall info -// Returns: nothing -%macro charge_gas_and_exit - %charge_gas - EXIT_KERNEL -%endmacro diff --git a/evm/src/cpu/kernel/asm/util/keccak.asm b/evm/src/cpu/kernel/asm/util/keccak.asm index 7922e8ce..280a4047 100644 --- a/evm/src/cpu/kernel/asm/util/keccak.asm +++ b/evm/src/cpu/kernel/asm/util/keccak.asm @@ -1,5 +1,6 @@ global sys_keccak256: // stack: kexit_info, offset, len + // TODO: Charge gas. %stack (kexit_info, offset, len) -> (offset, len, kexit_info) PUSH @SEGMENT_MAIN_MEMORY GET_CONTEXT @@ -23,3 +24,21 @@ global sys_keccak256: %stack (offset) -> (0, @SEGMENT_KERNEL_GENERAL, 0, $num_bytes) // context, segment, offset, len KECCAK_GENERAL %endmacro + +// Computes Keccak256(a || b). Clobbers @SEGMENT_KERNEL_GENERAL. +// +// Pre stack: a, b +// Post stack: hash +%macro keccak256_u256_pair + // Since KECCAK_GENERAL takes its input from memory, we will first write + // a's bytes to @SEGMENT_KERNEL_GENERAL[0..32], then b's bytes to + // @SEGMENT_KERNEL_GENERAL[32..64]. + %stack (a) -> (0, @SEGMENT_KERNEL_GENERAL, 0, a, 32, %%after_mstore_a) + %jump(mstore_unpacking) +%%after_mstore_a: + %stack (offset, b) -> (0, @SEGMENT_KERNEL_GENERAL, 32, b, 32, %%after_mstore_b) + %jump(mstore_unpacking) +%%after_mstore_b: + %stack (offset) -> (0, @SEGMENT_KERNEL_GENERAL, 0, 64) // context, segment, offset, len + KECCAK_GENERAL +%endmacro diff --git a/evm/src/cpu/kernel/constants/context_metadata.rs b/evm/src/cpu/kernel/constants/context_metadata.rs index 4e869661..32af6e35 100644 --- a/evm/src/cpu/kernel/constants/context_metadata.rs +++ b/evm/src/cpu/kernel/constants/context_metadata.rs @@ -23,8 +23,8 @@ pub(crate) enum ContextMetadata { /// Pointer to the initial version of the state trie, at the creation of this context. Used when /// we need to revert a context. StateTrieCheckpointPointer = 9, - /// Size of the active main memory. - MSize = 10, + /// Size of the active main memory, in (32 byte) words. + MemWords = 10, StackSize = 11, /// The gas limit for this call (not the entire transaction). GasLimit = 12, @@ -45,7 +45,7 @@ impl ContextMetadata { Self::CallValue, Self::Static, Self::StateTrieCheckpointPointer, - Self::MSize, + Self::MemWords, Self::StackSize, Self::GasLimit, ] @@ -64,7 +64,7 @@ impl ContextMetadata { ContextMetadata::CallValue => "CTX_METADATA_CALL_VALUE", ContextMetadata::Static => "CTX_METADATA_STATIC", ContextMetadata::StateTrieCheckpointPointer => "CTX_METADATA_STATE_TRIE_CHECKPOINT_PTR", - ContextMetadata::MSize => "CTX_METADATA_MSIZE", + ContextMetadata::MemWords => "CTX_METADATA_MEM_WORDS", ContextMetadata::StackSize => "CTX_METADATA_STACK_SIZE", ContextMetadata::GasLimit => "CTX_METADATA_GAS_LIMIT", } diff --git a/evm/src/cpu/kernel/constants/global_metadata.rs b/evm/src/cpu/kernel/constants/global_metadata.rs index cee02e86..e9d694eb 100644 --- a/evm/src/cpu/kernel/constants/global_metadata.rs +++ b/evm/src/cpu/kernel/constants/global_metadata.rs @@ -6,10 +6,13 @@ pub(crate) enum GlobalMetadata { /// give each new context a unique ID, so that its memory will be zero-initialized. LargestContext = 0, /// The size of active memory, in bytes. - MemorySize = 2, + MemorySize = 1, /// The size of the `TrieData` segment, in bytes. In other words, the next address available for /// appending additional trie data. - TrieDataSize = 3, + TrieDataSize = 2, + /// The size of the `TrieData` segment, in bytes. In other words, the next address available for + /// appending additional trie data. + RlpDataSize = 3, /// A pointer to the root of the state trie within the `TrieData` buffer. StateTrieRoot = 4, /// A pointer to the root of the transaction trie within the `TrieData` buffer. @@ -45,13 +48,14 @@ pub(crate) enum GlobalMetadata { } impl GlobalMetadata { - pub(crate) const COUNT: usize = 21; + pub(crate) const COUNT: usize = 22; pub(crate) fn all() -> [Self; Self::COUNT] { [ Self::LargestContext, Self::MemorySize, Self::TrieDataSize, + Self::RlpDataSize, Self::StateTrieRoot, Self::TransactionTrieRoot, Self::ReceiptTrieRoot, @@ -79,6 +83,7 @@ impl GlobalMetadata { Self::LargestContext => "GLOBAL_METADATA_LARGEST_CONTEXT", Self::MemorySize => "GLOBAL_METADATA_MEMORY_SIZE", Self::TrieDataSize => "GLOBAL_METADATA_TRIE_DATA_SIZE", + Self::RlpDataSize => "GLOBAL_METADATA_RLP_DATA_SIZE", Self::StateTrieRoot => "GLOBAL_METADATA_STATE_TRIE_ROOT", Self::TransactionTrieRoot => "GLOBAL_METADATA_TXN_TRIE_ROOT", Self::ReceiptTrieRoot => "GLOBAL_METADATA_RECEIPT_TRIE_ROOT", diff --git a/evm/src/cpu/kernel/constants/trie_type.rs b/evm/src/cpu/kernel/constants/trie_type.rs index 30f4802b..fc71c1f4 100644 --- a/evm/src/cpu/kernel/constants/trie_type.rs +++ b/evm/src/cpu/kernel/constants/trie_type.rs @@ -1,6 +1,6 @@ use eth_trie_utils::partial_trie::PartialTrie; -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) enum PartialTrieType { Empty = 0, Hash = 1, diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 52876c97..7d30d7f2 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -279,6 +279,7 @@ impl<'a> Interpreter<'a> { .byte(0); self.opcode_count[opcode as usize] += 1; self.incr(1); + match opcode { 0x00 => self.run_stop(), // "STOP", 0x01 => self.run_add(), // "ADD", @@ -356,7 +357,7 @@ impl<'a> Interpreter<'a> { 0xa2 => todo!(), // "LOG2", 0xa3 => todo!(), // "LOG3", 0xa4 => todo!(), // "LOG4", - 0xa5 => bail!("Executed PANIC"), // "PANIC", + 0xa5 => bail!("Executed PANIC, stack={:?}", self.stack()), // "PANIC", 0xf0 => todo!(), // "CREATE", 0xf1 => todo!(), // "CALL", 0xf2 => todo!(), // "CALLCODE", @@ -708,7 +709,7 @@ impl<'a> Interpreter<'a> { self.push( self.generation_state.memory.contexts[self.context].segments [Segment::ContextMetadata as usize] - .get(ContextMetadata::MSize as usize), + .get(ContextMetadata::MemWords as usize), ) } diff --git a/evm/src/cpu/kernel/tests/packing.rs b/evm/src/cpu/kernel/tests/packing.rs index 71f66e6d..43ca9b5f 100644 --- a/evm/src/cpu/kernel/tests/packing.rs +++ b/evm/src/cpu/kernel/tests/packing.rs @@ -7,7 +7,7 @@ use crate::memory::segments::Segment; #[test] fn test_mload_packing_1_byte() -> Result<()> { - let mstore_unpacking = KERNEL.global_labels["mload_packing"]; + let mload_packing = KERNEL.global_labels["mload_packing"]; let retdest = 0xDEADBEEFu32.into(); let len = 1.into(); @@ -16,7 +16,7 @@ fn test_mload_packing_1_byte() -> Result<()> { let context = 0.into(); let initial_stack = vec![retdest, len, offset, segment, context]; - let mut interpreter = Interpreter::new_with_kernel(mstore_unpacking, initial_stack); + let mut interpreter = Interpreter::new_with_kernel(mload_packing, initial_stack); interpreter.set_rlp_memory(vec![0, 0, 0xAB]); interpreter.run()?; @@ -27,7 +27,7 @@ fn test_mload_packing_1_byte() -> Result<()> { #[test] fn test_mload_packing_3_bytes() -> Result<()> { - let mstore_unpacking = KERNEL.global_labels["mload_packing"]; + let mload_packing = KERNEL.global_labels["mload_packing"]; let retdest = 0xDEADBEEFu32.into(); let len = 3.into(); @@ -36,7 +36,7 @@ fn test_mload_packing_3_bytes() -> Result<()> { let context = 0.into(); let initial_stack = vec![retdest, len, offset, segment, context]; - let mut interpreter = Interpreter::new_with_kernel(mstore_unpacking, initial_stack); + let mut interpreter = Interpreter::new_with_kernel(mload_packing, initial_stack); interpreter.set_rlp_memory(vec![0, 0, 0xAB, 0xCD, 0xEF]); interpreter.run()?; @@ -47,7 +47,7 @@ fn test_mload_packing_3_bytes() -> Result<()> { #[test] fn test_mload_packing_32_bytes() -> Result<()> { - let mstore_unpacking = KERNEL.global_labels["mload_packing"]; + let mload_packing = KERNEL.global_labels["mload_packing"]; let retdest = 0xDEADBEEFu32.into(); let len = 32.into(); @@ -56,7 +56,7 @@ fn test_mload_packing_32_bytes() -> Result<()> { let context = 0.into(); let initial_stack = vec![retdest, len, offset, segment, context]; - let mut interpreter = Interpreter::new_with_kernel(mstore_unpacking, initial_stack); + let mut interpreter = Interpreter::new_with_kernel(mload_packing, initial_stack); interpreter.set_rlp_memory(vec![0xFF; 32]); interpreter.run()?; diff --git a/evm/src/cpu/kernel/tests/rlp/encode.rs b/evm/src/cpu/kernel/tests/rlp/encode.rs index 4e04b248..2771dea0 100644 --- a/evm/src/cpu/kernel/tests/rlp/encode.rs +++ b/evm/src/cpu/kernel/tests/rlp/encode.rs @@ -86,8 +86,9 @@ fn test_prepend_rlp_list_prefix_small() -> Result<()> { let prepend_rlp_list_prefix = KERNEL.global_labels["prepend_rlp_list_prefix"]; let retdest = 0xDEADBEEFu32.into(); + let start_pos = 9.into(); let end_pos = (9 + 5).into(); - let initial_stack = vec![retdest, end_pos]; + let initial_stack = vec![retdest, start_pos, end_pos]; let mut interpreter = Interpreter::new_with_kernel(prepend_rlp_list_prefix, initial_stack); interpreter.set_rlp_memory(vec![ // Nine 0s to leave room for the longest possible RLP list prefix. @@ -114,8 +115,9 @@ fn test_prepend_rlp_list_prefix_large() -> Result<()> { let prepend_rlp_list_prefix = KERNEL.global_labels["prepend_rlp_list_prefix"]; let retdest = 0xDEADBEEFu32.into(); + let start_pos = 9.into(); let end_pos = (9 + 60).into(); - let initial_stack = vec![retdest, end_pos]; + let initial_stack = vec![retdest, start_pos, end_pos]; let mut interpreter = Interpreter::new_with_kernel(prepend_rlp_list_prefix, initial_stack); #[rustfmt::skip] diff --git a/evm/src/generation/mod.rs b/evm/src/generation/mod.rs index c01e8af1..2c786d8f 100644 --- a/evm/src/generation/mod.rs +++ b/evm/src/generation/mod.rs @@ -18,25 +18,24 @@ use crate::config::StarkConfig; use crate::cpu::bootstrap_kernel::generate_bootstrap_kernel; use crate::cpu::kernel::aggregator::KERNEL; use crate::cpu::kernel::constants::global_metadata::GlobalMetadata; -use crate::cpu::kernel::constants::global_metadata::GlobalMetadata::StateTrieRoot; -use crate::generation::mpt::AccountRlp; +use crate::generation::outputs::{get_outputs, GenerationOutputs}; use crate::generation::state::GenerationState; -use crate::generation::trie_extractor::read_state_trie_value; use crate::memory::segments::Segment; use crate::proof::{BlockMetadata, PublicValues, TrieRoots}; use crate::witness::memory::{MemoryAddress, MemoryChannel}; use crate::witness::transition::transition; pub mod mpt; +pub mod outputs; pub(crate) mod prover_input; pub(crate) mod rlp; pub(crate) mod state; mod trie_extractor; -use crate::generation::trie_extractor::read_trie; + use crate::witness::util::mem_write_log; -#[derive(Clone, Debug, Deserialize, Serialize, Default)] /// Inputs needed for trace generation. +#[derive(Clone, Debug, Deserialize, Serialize, Default)] pub struct GenerationInputs { pub signed_txns: Vec>, @@ -47,6 +46,13 @@ pub struct GenerationInputs { pub contract_code: HashMap>, pub block_metadata: BlockMetadata, + + /// A list of known addresses in the input state trie (which itself doesn't hold addresses, + /// only state keys). This is only useful for debugging, so that we can return addresses in the + /// post-state rather than state keys. (See `GenerationOutputs`, and in particular + /// `AddressOrStateKey`.) If the caller is not interested in the post-state, this can be left + /// empty. + pub addresses: Vec
, } #[derive(Clone, Debug, Deserialize, Serialize, Default)] @@ -104,7 +110,11 @@ pub(crate) fn generate_traces, const D: usize>( inputs: GenerationInputs, config: &StarkConfig, timing: &mut TimingTree, -) -> anyhow::Result<([Vec>; NUM_TABLES], PublicValues)> { +) -> anyhow::Result<( + [Vec>; NUM_TABLES], + PublicValues, + GenerationOutputs, +)> { let mut state = GenerationState::::new(inputs.clone(), &KERNEL.code); apply_metadata_memops(&mut state, &inputs.block_metadata); @@ -113,28 +123,19 @@ pub(crate) fn generate_traces, const D: usize>( timed!(timing, "simulate CPU", simulate_cpu(&mut state)?); + assert!( + state.mpt_prover_inputs.is_empty(), + "All MPT data should have been consumed" + ); + log::info!( "Trace lengths (before padding): {:?}", state.traces.checkpoint() ); - let read_metadata = |field| { - state.memory.get(MemoryAddress::new( - 0, - Segment::GlobalMetadata, - field as usize, - )) - }; - - log::debug!( - "Updated state trie:\n{:#?}", - read_trie::( - &state.memory, - read_metadata(StateTrieRoot).as_usize(), - read_state_trie_value - ) - ); + let outputs = get_outputs(&mut state); + let read_metadata = |field| state.memory.read_global_metadata(field); let trie_roots_before = TrieRoots { state_root: H256::from_uint(&read_metadata(StateTrieRootDigestBefore)), transactions_root: H256::from_uint(&read_metadata(TransactionTrieRootDigestBefore)), @@ -157,7 +158,7 @@ pub(crate) fn generate_traces, const D: usize>( "convert trace data to tables", state.traces.into_tables(all_stark, config, timing) ); - Ok((tables, public_values)) + Ok((tables, public_values, outputs)) } fn simulate_cpu, const D: usize>( diff --git a/evm/src/generation/outputs.rs b/evm/src/generation/outputs.rs new file mode 100644 index 00000000..b6da3576 --- /dev/null +++ b/evm/src/generation/outputs.rs @@ -0,0 +1,104 @@ +use std::collections::HashMap; + +use ethereum_types::{Address, BigEndianHash, H256, U256}; +use plonky2::field::types::Field; + +use crate::cpu::kernel::constants::global_metadata::GlobalMetadata::StateTrieRoot; +use crate::generation::state::GenerationState; +use crate::generation::trie_extractor::{ + read_state_trie_value, read_storage_trie_value, read_trie, AccountTrieRecord, +}; + +/// The post-state after trace generation; intended for debugging. +#[derive(Clone, Debug)] +pub struct GenerationOutputs { + pub accounts: HashMap, +} + +#[derive(Clone, Eq, PartialEq, Hash, Debug)] +pub enum AddressOrStateKey { + Address(Address), + StateKey(H256), +} + +#[derive(Clone, Debug)] +pub struct AccountOutput { + pub balance: U256, + pub nonce: u64, + pub code: Vec, + pub storage: HashMap, +} + +pub(crate) fn get_outputs(state: &mut GenerationState) -> GenerationOutputs { + // First observe all addresses passed in the by caller. + for address in state.inputs.addresses.clone() { + state.observe_address(address); + } + + let account_map = read_trie::( + &state.memory, + state.memory.read_global_metadata(StateTrieRoot).as_usize(), + read_state_trie_value, + ); + + let accounts = account_map + .into_iter() + .map(|(state_key_nibbles, account)| { + assert_eq!( + state_key_nibbles.count, 64, + "Each state key should have 64 nibbles = 256 bits" + ); + let state_key_h256 = H256::from_uint(&state_key_nibbles.packed); + + let addr_or_state_key = + if let Some(address) = state.state_key_to_address.get(&state_key_h256) { + AddressOrStateKey::Address(*address) + } else { + AddressOrStateKey::StateKey(state_key_h256) + }; + + let account_output = account_trie_record_to_output(state, account); + (addr_or_state_key, account_output) + }) + .collect(); + + GenerationOutputs { accounts } +} + +fn account_trie_record_to_output( + state: &GenerationState, + account: AccountTrieRecord, +) -> AccountOutput { + let storage = get_storage(state, account.storage_ptr); + + // TODO: This won't work if the account was created during the txn. + // Need to track changes to code, similar to how we track addresses + // with observe_new_address. + let code = state + .inputs + .contract_code + .get(&account.code_hash) + .unwrap_or_else(|| panic!("Code not found: {:?}", account.code_hash)) + .clone(); + + AccountOutput { + balance: account.balance, + nonce: account.nonce, + storage, + code, + } +} + +/// Get an account's storage trie, given a pointer to its root. +fn get_storage(state: &GenerationState, storage_ptr: usize) -> HashMap { + read_trie::(&state.memory, storage_ptr, read_storage_trie_value) + .into_iter() + .map(|(storage_key_nibbles, value)| { + assert_eq!( + storage_key_nibbles.count, 64, + "Each storage key should have 64 nibbles = 256 bits" + ); + (storage_key_nibbles.packed, value) + }) + .collect() +} diff --git a/evm/src/generation/state.rs b/evm/src/generation/state.rs index 88f17ade..9399e4b6 100644 --- a/evm/src/generation/state.rs +++ b/evm/src/generation/state.rs @@ -1,12 +1,17 @@ -use ethereum_types::U256; +use std::collections::HashMap; + +use ethereum_types::{Address, H160, H256, U256}; +use keccak_hash::keccak; use plonky2::field::types::Field; +use crate::cpu::kernel::aggregator::KERNEL; use crate::generation::mpt::all_mpt_prover_inputs_reversed; use crate::generation::rlp::all_rlp_prover_inputs_reversed; use crate::generation::GenerationInputs; use crate::witness::memory::MemoryState; use crate::witness::state::RegistersState; use crate::witness::traces::{TraceCheckpoint, Traces}; +use crate::witness::util::stack_peek; pub(crate) struct GenerationStateCheckpoint { pub(crate) registers: RegistersState, @@ -29,6 +34,11 @@ pub(crate) struct GenerationState { /// Prover inputs containing RLP data, in reverse order so that the next input can be obtained /// via `pop()`. pub(crate) rlp_prover_inputs: Vec, + + /// The state trie only stores state keys, which are hashes of addresses, but sometimes it is + /// useful to see the actual addresses for debugging. Here we store the mapping for all known + /// addresses. + pub(crate) state_key_to_address: HashMap, } impl GenerationState { @@ -53,9 +63,29 @@ impl GenerationState { next_txn_index: 0, mpt_prover_inputs, rlp_prover_inputs, + state_key_to_address: HashMap::new(), } } + /// Updates `program_counter`, and potentially adds some extra handling if we're jumping to a + /// special location. + pub fn jump_to(&mut self, dst: usize) { + self.registers.program_counter = dst; + if dst == KERNEL.global_labels["observe_new_address"] { + let address = stack_peek(self, 0).expect("Empty stack"); + let mut address_bytes = [0; 20]; + address.to_big_endian(&mut address_bytes); + self.observe_address(H160(address_bytes)); + } + } + + /// Observe the given address, so that we will be able to recognize the associated state key. + /// This is just for debugging purposes. + pub fn observe_address(&mut self, address: Address) { + let state_key = keccak(address.0); + self.state_key_to_address.insert(state_key, address); + } + pub fn checkpoint(&self) -> GenerationStateCheckpoint { GenerationStateCheckpoint { registers: self.registers, @@ -67,4 +97,11 @@ impl GenerationState { self.registers = checkpoint.registers; self.traces.rollback(checkpoint.traces); } + + pub(crate) fn stack(&self) -> Vec { + const MAX_TO_SHOW: usize = 10; + (0..self.registers.stack_len.min(MAX_TO_SHOW)) + .map(|i| stack_peek(self, i).unwrap()) + .collect() + } } diff --git a/evm/src/generation/trie_extractor.rs b/evm/src/generation/trie_extractor.rs index d35d67eb..66174419 100644 --- a/evm/src/generation/trie_extractor.rs +++ b/evm/src/generation/trie_extractor.rs @@ -1,50 +1,57 @@ +//! Code for extracting trie data after witness generation. This is intended only for debugging. + use std::collections::HashMap; use eth_trie_utils::partial_trie::Nibbles; use ethereum_types::{BigEndianHash, H256, U256}; -use plonky2::field::extension::Extendable; -use plonky2::hash::hash_types::RichField; use crate::cpu::kernel::constants::trie_type::PartialTrieType; -use crate::generation::mpt::AccountRlp; use crate::memory::segments::Segment; use crate::witness::memory::{MemoryAddress, MemoryState}; -pub(crate) fn read_state_trie_value(slice: &[U256]) -> AccountRlp { - AccountRlp { - nonce: slice[0], +/// Account data as it's stored in the state trie, with a pointer to the storage trie. +#[derive(Debug)] +pub(crate) struct AccountTrieRecord { + pub(crate) nonce: u64, + pub(crate) balance: U256, + pub(crate) storage_ptr: usize, + pub(crate) code_hash: H256, +} + +pub(crate) fn read_state_trie_value(slice: &[U256]) -> AccountTrieRecord { + AccountTrieRecord { + nonce: slice[0].as_u64(), balance: slice[1], - storage_root: H256::from_uint(&slice[2]), + storage_ptr: slice[2].as_usize(), code_hash: H256::from_uint(&slice[3]), } } -pub(crate) fn read_trie( +pub(crate) fn read_storage_trie_value(slice: &[U256]) -> U256 { + slice[0] +} + +pub(crate) fn read_trie( memory: &MemoryState, ptr: usize, read_value: fn(&[U256]) -> V, -) -> HashMap -where - F: RichField + Extendable, -{ +) -> HashMap { let mut res = HashMap::new(); let empty_nibbles = Nibbles { count: 0, packed: U256::zero(), }; - read_trie_helper::(memory, ptr, read_value, empty_nibbles, &mut res); + read_trie_helper::(memory, ptr, read_value, empty_nibbles, &mut res); res } -pub(crate) fn read_trie_helper( +pub(crate) fn read_trie_helper( memory: &MemoryState, ptr: usize, read_value: fn(&[U256]) -> V, prefix: Nibbles, res: &mut HashMap, -) where - F: RichField + Extendable, -{ +) { let load = |offset| memory.get(MemoryAddress::new(0, Segment::TrieData, offset)); let load_slice_from = |init_offset| { &memory.contexts[0].segments[Segment::TrieData as usize].content[init_offset..] @@ -58,13 +65,7 @@ pub(crate) fn read_trie_helper( let ptr_payload = ptr + 1; for i in 0u8..16 { let child_ptr = load(ptr_payload + i as usize).as_usize(); - read_trie_helper::( - memory, - child_ptr, - read_value, - prefix.merge_nibble(i), - res, - ); + read_trie_helper::(memory, child_ptr, read_value, prefix.merge_nibble(i), res); } let value_ptr = load(ptr_payload + 16).as_usize(); if value_ptr != 0 { @@ -76,7 +77,7 @@ pub(crate) fn read_trie_helper( let packed = load(ptr + 2); let nibbles = Nibbles { count, packed }; let child_ptr = load(ptr + 3).as_usize(); - read_trie_helper::( + read_trie_helper::( memory, child_ptr, read_value, diff --git a/evm/src/prover.rs b/evm/src/prover.rs index 9e26218a..97b28a4b 100644 --- a/evm/src/prover.rs +++ b/evm/src/prover.rs @@ -25,6 +25,7 @@ use crate::constraint_consumer::ConstraintConsumer; use crate::cpu::cpu_stark::CpuStark; use crate::cpu::kernel::aggregator::KERNEL; use crate::cross_table_lookup::{cross_table_lookup_data, CtlCheckVars, CtlData}; +use crate::generation::outputs::GenerationOutputs; use crate::generation::{generate_traces, GenerationInputs}; use crate::keccak::keccak_stark::KeccakStark; use crate::keccak_sponge::keccak_sponge_stark::KeccakSpongeStark; @@ -46,6 +47,28 @@ pub fn prove( inputs: GenerationInputs, timing: &mut TimingTree, ) -> Result> +where + F: RichField + Extendable, + C: GenericConfig, + [(); C::Hasher::HASH_SIZE]:, + [(); CpuStark::::COLUMNS]:, + [(); KeccakStark::::COLUMNS]:, + [(); KeccakSpongeStark::::COLUMNS]:, + [(); LogicStark::::COLUMNS]:, + [(); MemoryStark::::COLUMNS]:, +{ + let (proof, _outputs) = prove_with_outputs(all_stark, config, inputs, timing)?; + Ok(proof) +} + +/// Generate traces, then create all STARK proofs. Returns information about the post-state, +/// intended for debugging, in addition to the proof. +pub fn prove_with_outputs( + all_stark: &AllStark, + config: &StarkConfig, + inputs: GenerationInputs, + timing: &mut TimingTree, +) -> Result<(AllProof, GenerationOutputs)> where F: RichField + Extendable, C: GenericConfig, @@ -57,12 +80,13 @@ where [(); MemoryStark::::COLUMNS]:, { timed!(timing, "build kernel", Lazy::force(&KERNEL)); - let (traces, public_values) = timed!( + let (traces, public_values, outputs) = timed!( timing, "generate all traces", generate_traces(all_stark, inputs, config, timing)? ); - prove_with_traces(all_stark, config, traces, public_values, timing) + let proof = prove_with_traces(all_stark, config, traces, public_values, timing)?; + Ok((proof, outputs)) } /// Compute all STARK proofs. diff --git a/evm/src/witness/memory.rs b/evm/src/witness/memory.rs index f42dcaac..a2885796 100644 --- a/evm/src/witness/memory.rs +++ b/evm/src/witness/memory.rs @@ -10,6 +10,7 @@ pub enum MemoryChannel { use MemoryChannel::{Code, GeneralPurpose}; +use crate::cpu::kernel::constants::global_metadata::GlobalMetadata; use crate::memory::segments::Segment; impl MemoryChannel { @@ -173,6 +174,14 @@ impl MemoryState { ); self.contexts[address.context].segments[address.segment].set(address.virt, val); } + + pub(crate) fn read_global_metadata(&self, field: GlobalMetadata) -> U256 { + self.get(MemoryAddress::new( + 0, + Segment::GlobalMetadata, + field as usize, + )) + } } impl Default for MemoryState { diff --git a/evm/src/witness/operation.rs b/evm/src/witness/operation.rs index 3d1dddde..6d42eb48 100644 --- a/evm/src/witness/operation.rs +++ b/evm/src/witness/operation.rs @@ -200,7 +200,7 @@ pub(crate) fn generate_jump( state.traces.push_memory(log_in0); state.traces.push_cpu(row); - state.registers.program_counter = dst as usize; + state.jump_to(dst as usize); Ok(()) } @@ -224,7 +224,7 @@ pub(crate) fn generate_jumpi( let dst: u32 = dst .try_into() .map_err(|_| ProgramError::InvalidJumpiDestination)?; - state.registers.program_counter = dst as usize; + state.jump_to(dst as usize); } else { row.general.jumps_mut().should_jump = F::ZERO; row.general.jumps_mut().cond_sum_pinv = F::ZERO; @@ -589,7 +589,7 @@ pub(crate) fn generate_exit_kernel( state.registers.gas_used = gas_used_val; log::debug!( "Exiting to {}, is_kernel={}", - KERNEL.offset_name(program_counter), + program_counter, is_kernel_mode ); diff --git a/evm/src/witness/transition.rs b/evm/src/witness/transition.rs index ff10b08b..d48561db 100644 --- a/evm/src/witness/transition.rs +++ b/evm/src/witness/transition.rs @@ -1,5 +1,4 @@ use anyhow::bail; -use itertools::Itertools; use log::log_enabled; use plonky2::field::types::Field; @@ -12,7 +11,7 @@ use crate::witness::gas::gas_to_charge; use crate::witness::memory::MemoryAddress; use crate::witness::operation::*; use crate::witness::state::RegistersState; -use crate::witness::util::{mem_read_code_with_log_and_fill, stack_peek}; +use crate::witness::util::mem_read_code_with_log_and_fill; use crate::{arithmetic, logic}; fn read_code_memory(state: &mut GenerationState, row: &mut CpuColumnsView) -> u8 { @@ -121,7 +120,7 @@ fn decode(registers: RegistersState, opcode: u8) -> Result { log::warn!( "Kernel panic at {}", - KERNEL.offset_name(registers.program_counter) + KERNEL.offset_name(registers.program_counter), ); Err(ProgramError::KernelPanic) } @@ -284,9 +283,7 @@ fn log_kernel_instruction(state: &mut GenerationState, op: Operatio state.registers.context, KERNEL.offset_name(pc), op, - (0..state.registers.stack_len) - .map(|i| stack_peek(state, i).unwrap()) - .collect_vec() + state.stack() ); assert!(pc < KERNEL.code.len(), "Kernel PC is out of range: {}", pc); @@ -310,7 +307,12 @@ pub(crate) fn transition(state: &mut GenerationState) -> anyhow::Re Err(e) => { if state.registers.is_kernel { let offset_name = KERNEL.offset_name(state.registers.program_counter); - bail!("exception in kernel mode at {}: {:?}", offset_name, e); + bail!( + "{:?} in kernel at pc={}, stack={:?}", + e, + offset_name, + state.stack() + ); } state.rollback(checkpoint); handle_error(state) diff --git a/evm/tests/add11_yml.rs b/evm/tests/add11_yml.rs new file mode 100644 index 00000000..4b1eba54 --- /dev/null +++ b/evm/tests/add11_yml.rs @@ -0,0 +1,139 @@ +use std::collections::HashMap; +use std::time::Duration; + +use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV}; +use eth_trie_utils::partial_trie::{Nibbles, PartialTrie}; +use ethereum_types::Address; +use hex_literal::hex; +use keccak_hash::keccak; +use plonky2::field::goldilocks_field::GoldilocksField; +use plonky2::plonk::config::PoseidonGoldilocksConfig; +use plonky2::util::timing::TimingTree; +use plonky2_evm::all_stark::AllStark; +use plonky2_evm::config::StarkConfig; +use plonky2_evm::generation::mpt::AccountRlp; +use plonky2_evm::generation::{GenerationInputs, TrieInputs}; +use plonky2_evm::proof::BlockMetadata; +use plonky2_evm::prover::prove; +use plonky2_evm::verifier::verify_proof; + +type F = GoldilocksField; +const D: usize = 2; +type C = PoseidonGoldilocksConfig; + +/// Test a simple token transfer to a new address. +#[test] +fn add11_yml() -> anyhow::Result<()> { + init_logger(); + + let all_stark = AllStark::::default(); + let config = StarkConfig::standard_fast_config(); + + let beneficiary = hex!("2adc25665018aa1fe0e6bc666dac8fc2697ff9ba"); + let sender = hex!("a94f5374fce5edbc8e2a8697c15331677e6ebf0b"); + let to = hex!("095e7baea6a6c7c4c2dfeb977efac326af552d87"); + + let beneficiary_state_key = keccak(beneficiary); + let sender_state_key = keccak(sender); + let to_state_key = keccak(to); + + let beneficiary_nibbles = Nibbles::from_bytes_be(beneficiary_state_key.as_bytes()).unwrap(); + let sender_nibbles = Nibbles::from_bytes_be(sender_state_key.as_bytes()).unwrap(); + let to_nibbles = Nibbles::from_bytes_be(to_state_key.as_bytes()).unwrap(); + + let code = [0x60, 0x01, 0x60, 0x01, 0x01, 0x60, 0x00, 0x55, 0x00]; + let code_hash = keccak(code); + + let beneficiary_account_before = AccountRlp { + nonce: 1.into(), + ..AccountRlp::default() + }; + let sender_account_before = AccountRlp { + balance: 0x0de0b6b3a7640000u64.into(), + ..AccountRlp::default() + }; + let to_account_before = AccountRlp { + balance: 0x0de0b6b3a7640000u64.into(), + code_hash, + ..AccountRlp::default() + }; + + let mut state_trie_before = PartialTrie::Empty; + state_trie_before.insert( + beneficiary_nibbles, + rlp::encode(&beneficiary_account_before).to_vec(), + ); + state_trie_before.insert(sender_nibbles, rlp::encode(&sender_account_before).to_vec()); + state_trie_before.insert(to_nibbles, rlp::encode(&to_account_before).to_vec()); + + let tries_before = TrieInputs { + state_trie: state_trie_before, + transactions_trie: PartialTrie::Empty, + receipts_trie: PartialTrie::Empty, + storage_tries: vec![(Address::from_slice(&to), PartialTrie::Empty)], + }; + + let txn = hex!("f863800a83061a8094095e7baea6a6c7c4c2dfeb977efac326af552d87830186a0801ba0ffb600e63115a7362e7811894a91d8ba4330e526f22121c994c4692035dfdfd5a06198379fcac8de3dbfac48b165df4bf88e2088f294b61efb9a65fe2281c76e16"); + + let block_metadata = BlockMetadata { + block_beneficiary: Address::from(beneficiary), + block_base_fee: 0xa.into(), + ..BlockMetadata::default() + }; + + let mut contract_code = HashMap::new(); + contract_code.insert(keccak(vec![]), vec![]); + contract_code.insert(code_hash, code.to_vec()); + + let inputs = GenerationInputs { + signed_txns: vec![txn.to_vec()], + tries: tries_before, + contract_code, + block_metadata, + addresses: vec![], + }; + + let mut timing = TimingTree::new("prove", log::Level::Debug); + let proof = prove::(&all_stark, &config, inputs, &mut timing)?; + timing.filter(Duration::from_millis(100)).print(); + + let beneficiary_account_after = AccountRlp { + nonce: 1.into(), + ..AccountRlp::default() + }; + let sender_account_after = AccountRlp { + balance: 0xde0b6b3a75be550u64.into(), + nonce: 1.into(), + ..AccountRlp::default() + }; + let to_account_after = AccountRlp { + balance: 0xde0b6b3a76586a0u64.into(), + code_hash, + // Storage map: { 0 => 2 } + storage_root: PartialTrie::Leaf { + nibbles: Nibbles::from_h256_be(keccak([0u8; 32])), + value: vec![2], + } + .calc_hash(), + ..AccountRlp::default() + }; + + let mut expected_state_trie_after = PartialTrie::Empty; + expected_state_trie_after.insert( + beneficiary_nibbles, + rlp::encode(&beneficiary_account_after).to_vec(), + ); + expected_state_trie_after.insert(sender_nibbles, rlp::encode(&sender_account_after).to_vec()); + expected_state_trie_after.insert(to_nibbles, rlp::encode(&to_account_after).to_vec()); + + assert_eq!( + proof.public_values.trie_roots_after.state_root, + expected_state_trie_after.calc_hash() + ); + + verify_proof(&all_stark, proof, &config) +} + +fn init_logger() { + let _ = try_init_from_env(Env::default().filter_or(DEFAULT_FILTER_ENV, "info")); +} diff --git a/evm/tests/basic_smart_contract.rs b/evm/tests/basic_smart_contract.rs index 71d11933..4809ea93 100644 --- a/evm/tests/basic_smart_contract.rs +++ b/evm/tests/basic_smart_contract.rs @@ -95,12 +95,15 @@ fn test_basic_smart_contract() -> anyhow::Result<()> { }; let mut contract_code = HashMap::new(); + contract_code.insert(keccak(vec![]), vec![]); contract_code.insert(code_hash, code.to_vec()); + let inputs = GenerationInputs { signed_txns: vec![txn.to_vec()], tries: tries_before, contract_code, block_metadata, + addresses: vec![], }; let mut timing = TimingTree::new("prove", log::Level::Debug); diff --git a/evm/tests/empty_txn_list.rs b/evm/tests/empty_txn_list.rs index 2bd9a116..a0148e74 100644 --- a/evm/tests/empty_txn_list.rs +++ b/evm/tests/empty_txn_list.rs @@ -3,6 +3,7 @@ use std::time::Duration; use env_logger::{try_init_from_env, Env, DEFAULT_FILTER_ENV}; use eth_trie_utils::partial_trie::PartialTrie; +use keccak_hash::keccak; use plonky2::field::goldilocks_field::GoldilocksField; use plonky2::plonk::config::PoseidonGoldilocksConfig; use plonky2::util::timing::TimingTree; @@ -38,6 +39,9 @@ fn test_empty_txn_list() -> anyhow::Result<()> { let txns_trie_root = transactions_trie.calc_hash(); let receipts_trie_root = receipts_trie.calc_hash(); + let mut contract_code = HashMap::new(); + contract_code.insert(keccak(vec![]), vec![]); + let inputs = GenerationInputs { signed_txns: vec![], tries: TrieInputs { @@ -46,8 +50,9 @@ fn test_empty_txn_list() -> anyhow::Result<()> { receipts_trie, storage_tries, }, - contract_code: HashMap::new(), + contract_code, block_metadata, + addresses: vec![], }; let mut timing = TimingTree::new("prove", log::Level::Debug); diff --git a/evm/tests/simple_transfer.rs b/evm/tests/simple_transfer.rs index 094bc070..ff2bed31 100644 --- a/evm/tests/simple_transfer.rs +++ b/evm/tests/simple_transfer.rs @@ -70,11 +70,15 @@ fn test_simple_transfer() -> anyhow::Result<()> { ..BlockMetadata::default() }; + let mut contract_code = HashMap::new(); + contract_code.insert(keccak(vec![]), vec![]); + let inputs = GenerationInputs { signed_txns: vec![txn.to_vec()], tries: tries_before, - contract_code: HashMap::new(), + contract_code, block_metadata, + addresses: vec![], }; let mut timing = TimingTree::new("prove", log::Level::Debug); diff --git a/field/src/field_testing.rs b/field/src/field_testing.rs index 4c53c234..16dd87f4 100644 --- a/field/src/field_testing.rs +++ b/field/src/field_testing.rs @@ -10,9 +10,22 @@ macro_rules! test_field_arithmetic { use num::bigint::BigUint; use rand::rngs::OsRng; - use rand::Rng; + use rand::{Rng, RngCore}; use $crate::types::{Field, Sample}; + #[test] + fn modular_reduction() { + let mut rng = OsRng; + for _ in 0..10 { + let x_lo = rng.next_u64(); + let x_hi = rng.next_u32(); + let x = (x_lo as u128) + ((x_hi as u128) << 64); + let a = <$field>::from_noncanonical_u128(x); + let b = <$field>::from_noncanonical_u96((x_lo, x_hi)); + assert_eq!(a, b); + } + } + #[test] fn batch_inversion() { for n in 0..20 { diff --git a/field/src/goldilocks_field.rs b/field/src/goldilocks_field.rs index 9f0b0519..8c8d8bc3 100644 --- a/field/src/goldilocks_field.rs +++ b/field/src/goldilocks_field.rs @@ -110,6 +110,10 @@ impl Field for GoldilocksField { Self(n) } + fn from_noncanonical_u96((n_lo, n_hi): (u64, u32)) -> Self { + reduce96((n_lo, n_hi)) + } + fn from_noncanonical_u128(n: u128) -> Self { reduce128(n) } @@ -337,6 +341,15 @@ unsafe fn add_no_canonicalize_trashing_input(x: u64, y: u64) -> u64 { res_wrapped + EPSILON * (carry as u64) } +/// Reduces to a 64-bit value. The result might not be in canonical form; it could be in between the +/// field order and `2^64`. +#[inline] +fn reduce96((x_lo, x_hi): (u64, u32)) -> GoldilocksField { + let t1 = x_hi as u64 * EPSILON; + let t2 = unsafe { add_no_canonicalize_trashing_input(x_lo, t1) }; + GoldilocksField(t2) +} + /// Reduces to a 64-bit value. The result might not be in canonical form; it could be in between the /// field order and `2^64`. #[inline] diff --git a/plonky2/src/hash/poseidon_goldilocks.rs b/plonky2/src/hash/poseidon_goldilocks.rs index b6e9bc74..0d03b3e9 100644 --- a/plonky2/src/hash/poseidon_goldilocks.rs +++ b/plonky2/src/hash/poseidon_goldilocks.rs @@ -4,6 +4,9 @@ //! `poseidon_constants.sage` script in the `mir-protocol/hash-constants` //! repository. +use plonky2_field::types::Field; +use unroll::unroll_for_loops; + use crate::field::goldilocks_field::GoldilocksField; use crate::hash::poseidon::{Poseidon, N_PARTIAL_ROUNDS}; @@ -211,6 +214,39 @@ impl Poseidon for GoldilocksField { 0xdcedab70f40718ba, 0xe796d293a47a64cb, 0x80772dc2645b280b, ], ]; + #[cfg(target_arch="x86_64")] + #[inline(always)] + #[unroll_for_loops] + fn mds_layer(state: &[Self; 12]) -> [Self; 12] { + let mut result = [GoldilocksField::ZERO; 12]; + + // Using the linearity of the operations we can split the state into a low||high decomposition + // and operate on each with no overflow and then combine/reduce the result to a field element. + let mut state_l = [0u64; 12]; + let mut state_h = [0u64; 12]; + + for r in 0..12 { + let s = state[r].0; + state_h[r] = s >> 32; + state_l[r] = (s as u32) as u64; + } + + let state_h = mds_multiply_freq(state_h); + let state_l = mds_multiply_freq(state_l); + + for r in 0..12 { + let s = state_l[r] as u128 + ((state_h[r] as u128) << 32); + + result[r] = GoldilocksField::from_noncanonical_u96((s as u64, (s >> 64) as u32)); + } + + // Add first element with the only non-zero diagonal matrix coefficient. + let s = Self::MDS_MATRIX_DIAG[0] as u128 * (state[0].0 as u128); + result[0] += GoldilocksField::from_noncanonical_u96((s as u64, (s >> 64) as u32)); + + result + } + // #[cfg(all(target_arch="x86_64", target_feature="avx2", target_feature="bmi2"))] // #[inline] // fn poseidon(input: [Self; 12]) -> [Self; 12] { @@ -268,6 +304,142 @@ impl Poseidon for GoldilocksField { } } +// MDS layer helper methods +// The following code has been adapted from winterfell/crypto/src/hash/mds/mds_f64_12x12.rs +// located at https://github.com/facebook/winterfell. + +const MDS_FREQ_BLOCK_ONE: [i64; 3] = [16, 32, 16]; +const MDS_FREQ_BLOCK_TWO: [(i64, i64); 3] = [(2, -1), (-4, 1), (16, 1)]; +const MDS_FREQ_BLOCK_THREE: [i64; 3] = [-1, -8, 2]; + +/// Split 3 x 4 FFT-based MDS vector-multiplication with the Poseidon circulant MDS matrix. +#[inline(always)] +fn mds_multiply_freq(state: [u64; 12]) -> [u64; 12] { + let [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] = state; + + let (u0, u1, u2) = fft4_real([s0, s3, s6, s9]); + let (u4, u5, u6) = fft4_real([s1, s4, s7, s10]); + let (u8, u9, u10) = fft4_real([s2, s5, s8, s11]); + + // This where the multiplication in frequency domain is done. More precisely, and with + // the appropriate permuations in between, the sequence of + // 3-point FFTs --> multiplication by twiddle factors --> Hadamard multiplication --> + // 3 point iFFTs --> multiplication by (inverse) twiddle factors + // is "squashed" into one step composed of the functions "block1", "block2" and "block3". + // The expressions in the aforementioned functions are the result of explicit computations + // combined with the Karatsuba trick for the multiplication of complex numbers. + + let [v0, v4, v8] = block1([u0, u4, u8], MDS_FREQ_BLOCK_ONE); + let [v1, v5, v9] = block2([u1, u5, u9], MDS_FREQ_BLOCK_TWO); + let [v2, v6, v10] = block3([u2, u6, u10], MDS_FREQ_BLOCK_THREE); + // The 4th block is not computed as it is similar to the 2nd one, up to complex conjugation. + + let [s0, s3, s6, s9] = ifft4_real_unreduced((v0, v1, v2)); + let [s1, s4, s7, s10] = ifft4_real_unreduced((v4, v5, v6)); + let [s2, s5, s8, s11] = ifft4_real_unreduced((v8, v9, v10)); + + [s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] +} + +/// Real 2-FFT over u64 integers. +#[inline(always)] +fn fft2_real(x: [u64; 2]) -> [i64; 2] { + [(x[0] as i64 + x[1] as i64), (x[0] as i64 - x[1] as i64)] +} + +/// Real 2-iFFT over u64 integers. +/// Division by two to complete the inverse FFT is not performed here. +#[inline(always)] +fn ifft2_real_unreduced(y: [i64; 2]) -> [u64; 2] { + [(y[0] + y[1]) as u64, (y[0] - y[1]) as u64] +} + +/// Real 4-FFT over u64 integers. +#[inline(always)] +fn fft4_real(x: [u64; 4]) -> (i64, (i64, i64), i64) { + let [z0, z2] = fft2_real([x[0], x[2]]); + let [z1, z3] = fft2_real([x[1], x[3]]); + let y0 = z0 + z1; + let y1 = (z2, -z3); + let y2 = z0 - z1; + (y0, y1, y2) +} + +/// Real 4-iFFT over u64 integers. +/// Division by four to complete the inverse FFT is not performed here. +#[inline(always)] +fn ifft4_real_unreduced(y: (i64, (i64, i64), i64)) -> [u64; 4] { + let z0 = y.0 + y.2; + let z1 = y.0 - y.2; + let z2 = y.1 .0; + let z3 = -y.1 .1; + + let [x0, x2] = ifft2_real_unreduced([z0, z2]); + let [x1, x3] = ifft2_real_unreduced([z1, z3]); + + [x0, x1, x2, x3] +} + +#[inline(always)] +fn block1(x: [i64; 3], y: [i64; 3]) -> [i64; 3] { + let [x0, x1, x2] = x; + let [y0, y1, y2] = y; + let z0 = x0 * y0 + x1 * y2 + x2 * y1; + let z1 = x0 * y1 + x1 * y0 + x2 * y2; + let z2 = x0 * y2 + x1 * y1 + x2 * y0; + + [z0, z1, z2] +} + +#[inline(always)] +fn block2(x: [(i64, i64); 3], y: [(i64, i64); 3]) -> [(i64, i64); 3] { + let [(x0r, x0i), (x1r, x1i), (x2r, x2i)] = x; + let [(y0r, y0i), (y1r, y1i), (y2r, y2i)] = y; + let x0s = x0r + x0i; + let x1s = x1r + x1i; + let x2s = x2r + x2i; + let y0s = y0r + y0i; + let y1s = y1r + y1i; + let y2s = y2r + y2i; + + // Compute x0​y0 ​− ix1​y2​ − ix2​y1​ using Karatsuba for complex numbers multiplication + let m0 = (x0r * y0r, x0i * y0i); + let m1 = (x1r * y2r, x1i * y2i); + let m2 = (x2r * y1r, x2i * y1i); + let z0r = (m0.0 - m0.1) + (x1s * y2s - m1.0 - m1.1) + (x2s * y1s - m2.0 - m2.1); + let z0i = (x0s * y0s - m0.0 - m0.1) + (-m1.0 + m1.1) + (-m2.0 + m2.1); + let z0 = (z0r, z0i); + + // Compute x0​y1​ + x1​y0​ − ix2​y2 using Karatsuba for complex numbers multiplication + let m0 = (x0r * y1r, x0i * y1i); + let m1 = (x1r * y0r, x1i * y0i); + let m2 = (x2r * y2r, x2i * y2i); + let z1r = (m0.0 - m0.1) + (m1.0 - m1.1) + (x2s * y2s - m2.0 - m2.1); + let z1i = (x0s * y1s - m0.0 - m0.1) + (x1s * y0s - m1.0 - m1.1) + (-m2.0 + m2.1); + let z1 = (z1r, z1i); + + // Compute x0​y2​ + x1​y1 ​+ x2​y0​ using Karatsuba for complex numbers multiplication + let m0 = (x0r * y2r, x0i * y2i); + let m1 = (x1r * y1r, x1i * y1i); + let m2 = (x2r * y0r, x2i * y0i); + let z2r = (m0.0 - m0.1) + (m1.0 - m1.1) + (m2.0 - m2.1); + let z2i = (x0s * y2s - m0.0 - m0.1) + (x1s * y1s - m1.0 - m1.1) + (x2s * y0s - m2.0 - m2.1); + let z2 = (z2r, z2i); + + [z0, z1, z2] +} + +#[inline(always)] +fn block3(x: [i64; 3], y: [i64; 3]) -> [i64; 3] { + let [x0, x1, x2] = x; + let [y0, y1, y2] = y; + let z0 = x0 * y0 - x1 * y2 - x2 * y1; + let z1 = x0 * y1 + x1 * y0 - x2 * y2; + let z2 = x0 * y2 + x1 * y1 + x2 * y0; + + [z0, z1, z2] +} + #[cfg(test)] mod tests { use crate::field::goldilocks_field::GoldilocksField as F;