diff --git a/evm/src/cpu/kernel/aggregator.rs b/evm/src/cpu/kernel/aggregator.rs index e89c9916..49398288 100644 --- a/evm/src/cpu/kernel/aggregator.rs +++ b/evm/src/cpu/kernel/aggregator.rs @@ -82,6 +82,7 @@ pub(crate) fn combined_kernel() -> Kernel { include_str!("asm/util/assertions.asm"), include_str!("asm/util/basic_macros.asm"), include_str!("asm/util/keccak.asm"), + include_str!("asm/account_code.asm"), include_str!("asm/balance.asm"), ]; diff --git a/evm/src/cpu/kernel/asm/account_code.asm b/evm/src/cpu/kernel/asm/account_code.asm new file mode 100644 index 00000000..14ea4037 --- /dev/null +++ b/evm/src/cpu/kernel/asm/account_code.asm @@ -0,0 +1,134 @@ +retzero: + %stack (account_ptr, retdest) -> (retdest, 0) + JUMP + +global extcodehash: + // stack: address, retdest + %mpt_read_state_trie + // stack: account_ptr, retdest + DUP1 ISZERO %jumpi(retzero) + %add_const(3) + // stack: codehash_ptr, retdest + %mload_trie_data + // stack: codehash, retdest + SWAP1 JUMP + + +%macro codesize + // stack: (empty) + %address + %extcodesize +%endmacro + +%macro extcodesize + %stack (address) -> (address, %%after) + %jump(load_code) +%%after: +%endmacro + +global extcodesize: + // stack: address, retdest + %extcodesize + // stack: extcodesize(address), retdest + SWAP1 JUMP + + +%macro codecopy + // stack: dest_offset, offset, size, retdest + %address + // stack: address, dest_offset, offset, size, retdest + %jump(extcodecopy) +%endmacro + +// Pre stack: address, dest_offset, offset, size, retdest +// Post stack: (empty) +global extcodecopy: + // stack: address, dest_offset, offset, size, retdest + %stack (address, dest_offset, offset, size, retdest) -> (address, extcodecopy_contd, size, offset, dest_offset, retdest) + %jump(load_code) + +extcodecopy_contd: + // stack: code_length, size, offset, dest_offset, retdest + SWAP1 + // stack: size, code_length, offset, dest_offset, retdest + PUSH 0 + +// Loop copying the `code[offset]` to `memory[dest_offset]` until `i==size`. +// Each iteration increments `offset, dest_offset, i`. +extcodecopy_loop: + // stack: i, size, code_length, offset, dest_offset, retdest + DUP2 DUP2 EQ + // stack: i == size, i, size, code_length, offset, dest_offset, retdest + %jumpi(extcodecopy_end) + %stack (i, size, code_length, offset, dest_offset, retdest) -> (offset, code_length, offset, code_length, dest_offset, i, size, retdest) + LT + // stack: offset < code_length, offset, code_length, dest_offset, i, size, retdest + DUP2 + // stack: offset, offset < code_length, offset, code_length, dest_offset, i, size, retdest + %mload_current(@SEGMENT_KERNEL_ACCOUNT_CODE) + // stack: opcode, offset < code_length, offset, code_length, dest_offset, i, size, retdest + %stack (opcode, offset_lt_code_length, offset, code_length, dest_offset, i, size, retdest) -> (offset_lt_code_length, 0, opcode, offset, code_length, dest_offset, i, size, retdest) + // If `offset >= code_length`, use `opcode=0`. Necessary since `SEGMENT_KERNEL_ACCOUNT_CODE` might be clobbered from previous calls. + %select_bool + // stack: opcode, offset, code_length, dest_offset, i, size, retdest + DUP4 + // stack: dest_offset, opcode, offset, code_length, dest_offset, i, size, retdest + %mstore_main + // stack: offset, code_length, dest_offset, i, size, retdest + %increment + // stack: offset+1, code_length, dest_offset, i, size, retdest + SWAP2 + // stack: dest_offset, code_length, offset+1, i, size, retdest + %increment + // stack: dest_offset+1, code_length, offset+1, i, size, retdest + SWAP3 + // stack: i, code_length, offset+1, dest_offset+1, size, retdest + %increment + // stack: i+1, code_length, offset+1, dest_offset+1, size, retdest + %stack (i, code_length, offset, dest_offset, size, retdest) -> (i, size, code_length, offset, dest_offset, retdest) + %jump(extcodecopy_loop) + +extcodecopy_end: + %stack (i, size, code_length, offset, dest_offset, retdest) -> (retdest) + JUMP + + +// Loads the code at `address` in the `SEGMENT_KERNEL_ACCOUNT_CODE` at the current context and starting at offset 0. +// Checks that the hash of the loaded code corresponds to the `codehash` in the state trie. +// Pre stack: address, retdest +// Post stack: extcodesize(address) +load_code: + %stack (address, retdest) -> (extcodehash, address, load_code_ctd, retdest) + JUMP +load_code_ctd: + // stack: codehash, retdest + PROVER_INPUT(account_code::length) + // stack: code_length, codehash, retdest + PUSH 0 + +// Loop non-deterministically querying `code[i]` and storing it in `SEGMENT_KERNEL_ACCOUNT_CODE` at offset `i`, until `i==code_length`. +load_code_loop: + // stack: i, code_length, codehash, retdest + DUP2 DUP2 EQ + // stack: i == code_length, i, code_length, codehash, retdest + %jumpi(load_code_check) + PROVER_INPUT(account_code::get) + // stack: opcode, i, code_length, codehash, retdest + DUP2 + // stack: i, opcode, i, code_length, codehash, retdest + %mstore_current(@SEGMENT_KERNEL_ACCOUNT_CODE) + // stack: i, code_length, codehash, retdest + %increment + // stack: i+1, code_length, codehash, retdest + %jump(load_code_loop) + +// Check that the hash of the loaded code equals `codehash`. +load_code_check: + // stack: i, code_length, codehash, retdest + POP + // stack: code_length, codehash, retdest + %stack (code_length, codehash, retdest) -> (0, @SEGMENT_KERNEL_ACCOUNT_CODE, 0, code_length, codehash, retdest, code_length) + KECCAK_GENERAL + // stack: shouldbecodehash, codehash, retdest, code_length + %assert_eq + JUMP diff --git a/evm/src/cpu/kernel/asm/util/basic_macros.asm b/evm/src/cpu/kernel/asm/util/basic_macros.asm index 02a2c807..6ec13835 100644 --- a/evm/src/cpu/kernel/asm/util/basic_macros.asm +++ b/evm/src/cpu/kernel/asm/util/basic_macros.asm @@ -186,24 +186,24 @@ // stack: (pred != 0) * nz + (pred == 0) * z %endmacro -// If pred, yields z; otherwise, yields nz +// If pred, yields x; otherwise, yields y // Assumes pred is boolean (either 0 or 1). %macro select_bool - // stack: pred, nz, z + // stack: pred, y, x DUP1 - // stack: pred, pred, nz, z + // stack: pred, pred, y, x ISZERO - // stack: notpred, pred, nz, z + // stack: notpred, pred, y, x SWAP3 - // stack: z, pred, nz, notpred + // stack: x, pred, y, notpred MUL - // stack: pred * z, nz, notpred + // stack: pred * x, y, notpred SWAP2 - // stack: notpred, nz, pred * z + // stack: notpred, y, pred * x MUL - // stack: notpred * nz, pred * z + // stack: notpred * y, pred * x ADD - // stack: notpred * nz + pred * z + // stack: notpred * y + pred * x %endmacro %macro square diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index fe95d04c..82bd382b 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -71,7 +71,7 @@ pub struct Interpreter<'a> { kernel_mode: bool, jumpdests: Vec, pub(crate) offset: usize, - context: usize, + pub(crate) context: usize, pub(crate) memory: InterpreterMemory, pub(crate) generation_state: GenerationState, prover_inputs_map: &'a HashMap, diff --git a/evm/src/cpu/kernel/tests/account_code.rs b/evm/src/cpu/kernel/tests/account_code.rs new file mode 100644 index 00000000..ae9fc30d --- /dev/null +++ b/evm/src/cpu/kernel/tests/account_code.rs @@ -0,0 +1,186 @@ +use std::collections::HashMap; + +use anyhow::Result; +use eth_trie_utils::partial_trie::PartialTrie; +use ethereum_types::{Address, BigEndianHash, H256, U256}; +use keccak_hash::keccak; +use rand::{thread_rng, Rng}; + +use crate::cpu::kernel::aggregator::KERNEL; +use crate::cpu::kernel::constants::global_metadata::GlobalMetadata; +use crate::cpu::kernel::interpreter::Interpreter; +use crate::cpu::kernel::tests::mpt::nibbles_64; +use crate::generation::mpt::{all_mpt_prover_inputs_reversed, AccountRlp}; +use crate::memory::segments::Segment; + +// Test account with a given code hash. +fn test_account(code: &[u8]) -> AccountRlp { + AccountRlp { + nonce: U256::from(1111), + balance: U256::from(2222), + storage_root: PartialTrie::Empty.calc_hash(), + code_hash: keccak(code), + } +} + +fn random_code() -> Vec { + let mut rng = thread_rng(); + let num_bytes = rng.gen_range(0..1000); + (0..num_bytes).map(|_| rng.gen()).collect() +} + +// Stolen from `tests/mpt/insert.rs` +// Prepare the interpreter by inserting the account in the state trie. +fn prepare_interpreter( + interpreter: &mut Interpreter, + address: Address, + account: &AccountRlp, +) -> Result<()> { + let load_all_mpts = KERNEL.global_labels["load_all_mpts"]; + let mpt_insert_state_trie = KERNEL.global_labels["mpt_insert_state_trie"]; + let mpt_hash_state_trie = KERNEL.global_labels["mpt_hash_state_trie"]; + let state_trie: PartialTrie = Default::default(); + let trie_inputs = Default::default(); + + interpreter.offset = load_all_mpts; + interpreter.push(0xDEADBEEFu32.into()); + + interpreter.generation_state.mpt_prover_inputs = all_mpt_prover_inputs_reversed(&trie_inputs); + interpreter.run()?; + assert_eq!(interpreter.stack(), vec![]); + + let k = nibbles_64(U256::from_big_endian( + keccak(address.to_fixed_bytes()).as_bytes(), + )); + // Next, execute mpt_insert_state_trie. + interpreter.offset = mpt_insert_state_trie; + let trie_data = interpreter.get_trie_data_mut(); + if trie_data.is_empty() { + // In the assembly we skip over 0, knowing trie_data[0] = 0 by default. + // Since we don't explicitly set it to 0, we need to do so here. + trie_data.push(0.into()); + } + let value_ptr = trie_data.len(); + trie_data.push(account.nonce); + trie_data.push(account.balance); + // In memory, storage_root gets interpreted as a pointer to a storage trie, + // so we have to ensure the pointer is valid. It's easiest to set it to 0, + // which works as an empty node, since trie_data[0] = 0 = MPT_TYPE_EMPTY. + trie_data.push(H256::zero().into_uint()); + trie_data.push(account.code_hash.into_uint()); + let trie_data_len = trie_data.len().into(); + interpreter.set_global_metadata_field(GlobalMetadata::TrieDataSize, trie_data_len); + interpreter.push(0xDEADBEEFu32.into()); + interpreter.push(value_ptr.into()); // value_ptr + interpreter.push(k.packed); // key + + interpreter.run()?; + assert_eq!( + interpreter.stack().len(), + 0, + "Expected empty stack after insert, found {:?}", + interpreter.stack() + ); + + // Now, execute mpt_hash_state_trie. + interpreter.offset = mpt_hash_state_trie; + interpreter.push(0xDEADBEEFu32.into()); + interpreter.run()?; + + assert_eq!( + interpreter.stack().len(), + 1, + "Expected 1 item on stack after hashing, found {:?}", + interpreter.stack() + ); + let hash = H256::from_uint(&interpreter.stack()[0]); + + let updated_trie = state_trie.insert(k, rlp::encode(account).to_vec()); + let expected_state_trie_hash = updated_trie.calc_hash(); + assert_eq!(hash, expected_state_trie_hash); + + Ok(()) +} + +#[test] +fn test_extcodesize() -> Result<()> { + let code = random_code(); + let account = test_account(&code); + + let mut interpreter = Interpreter::new_with_kernel(0, vec![]); + let address: Address = thread_rng().gen(); + // Prepare the interpreter by inserting the account in the state trie. + prepare_interpreter(&mut interpreter, address, &account)?; + + let extcodesize = KERNEL.global_labels["extcodesize"]; + + // Test `extcodesize` + interpreter.offset = extcodesize; + interpreter.pop(); + assert!(interpreter.stack().is_empty()); + interpreter.push(0xDEADBEEFu32.into()); + interpreter.push(U256::from_big_endian(address.as_bytes())); + interpreter.generation_state.inputs.contract_code = + HashMap::from([(keccak(&code), code.clone())]); + interpreter.run()?; + + assert_eq!(interpreter.stack(), vec![code.len().into()]); + + Ok(()) +} + +#[test] +fn test_extcodecopy() -> Result<()> { + let code = random_code(); + let account = test_account(&code); + + let mut interpreter = Interpreter::new_with_kernel(0, vec![]); + let address: Address = thread_rng().gen(); + // Prepare the interpreter by inserting the account in the state trie. + prepare_interpreter(&mut interpreter, address, &account)?; + + let extcodecopy = KERNEL.global_labels["extcodecopy"]; + + // Put random data in main memory and the `KernelAccountCode` segment for realism. + let mut rng = thread_rng(); + for i in 0..2000 { + interpreter.memory.context_memory[interpreter.context].segments + [Segment::MainMemory as usize] + .set(i, U256::from(rng.gen::())); + interpreter.memory.context_memory[interpreter.context].segments + [Segment::KernelAccountCode as usize] + .set(i, U256::from(rng.gen::())); + } + + // Random inputs + let dest_offset = rng.gen_range(0..3000); + let offset = rng.gen_range(0..1500); + let size = rng.gen_range(0..1500); + + // Test `extcodecopy` + interpreter.offset = extcodecopy; + interpreter.pop(); + assert!(interpreter.stack().is_empty()); + interpreter.push(0xDEADBEEFu32.into()); + interpreter.push(size.into()); + interpreter.push(offset.into()); + interpreter.push(dest_offset.into()); + interpreter.push(U256::from_big_endian(address.as_bytes())); + interpreter.generation_state.inputs.contract_code = + HashMap::from([(keccak(&code), code.clone())]); + interpreter.run()?; + + assert!(interpreter.stack().is_empty()); + // Check that the code was correctly copied to memory. + for i in 0..size { + let memory = interpreter.memory.context_memory[interpreter.context].segments + [Segment::MainMemory as usize] + .get(dest_offset + i); + assert_eq!( + memory, + code.get(offset + i).copied().unwrap_or_default().into() + ); + } + + Ok(()) +} diff --git a/evm/src/cpu/kernel/tests/mod.rs b/evm/src/cpu/kernel/tests/mod.rs index f9c6377a..2ae9d2b0 100644 --- a/evm/src/cpu/kernel/tests/mod.rs +++ b/evm/src/cpu/kernel/tests/mod.rs @@ -1,3 +1,4 @@ +mod account_code; mod balance; mod core; mod curve_ops; diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs index d5d7df7c..ad1cfce0 100644 --- a/evm/src/generation/prover_input.rs +++ b/evm/src/generation/prover_input.rs @@ -1,6 +1,6 @@ use std::str::FromStr; -use ethereum_types::U256; +use ethereum_types::{BigEndianHash, H256, U256}; use plonky2::field::types::Field; use crate::generation::prover_input::EvmField::{ @@ -28,6 +28,7 @@ impl GenerationState { "ff" => self.run_ff(stack, input_fn), "mpt" => self.run_mpt(), "rlp" => self.run_rlp(), + "account_code" => self.run_account_code(stack, input_fn), _ => panic!("Unrecognized prover input function."), } } @@ -63,6 +64,29 @@ impl GenerationState { .pop() .unwrap_or_else(|| panic!("Out of RLP data")) } + + /// Account code. + fn run_account_code(&mut self, stack: &[U256], input_fn: &ProverInputFn) -> U256 { + match input_fn.0[1].as_str() { + "length" => { + // Return length of code. + // stack: codehash + let codehash = stack.last().expect("Empty stack"); + self.inputs.contract_code[&H256::from_uint(codehash)] + .len() + .into() + } + "get" => { + // Return `code[i]`. + // stack: i, code_length, codehash + let stacklen = stack.len(); + let i = stack[stacklen - 1].as_usize(); + let codehash = stack[stacklen - 3]; + self.inputs.contract_code[&H256::from_uint(&codehash)][i].into() + } + _ => panic!("Invalid prover input function."), + } + } } enum EvmField { diff --git a/evm/src/memory/segments.rs b/evm/src/memory/segments.rs index b8ba904f..f8d536e9 100644 --- a/evm/src/memory/segments.rs +++ b/evm/src/memory/segments.rs @@ -20,23 +20,25 @@ pub(crate) enum Segment { KernelGeneral = 7, /// Another segment for general purpose kernel use. KernelGeneral2 = 8, + /// Segment to hold account code for opcodes like `CODESIZE, CODECOPY,...`. + KernelAccountCode = 9, /// Contains normalized transaction fields; see `NormalizedTxnField`. - TxnFields = 9, + TxnFields = 10, /// Contains the data field of a transaction. - TxnData = 10, + TxnData = 11, /// A buffer used to hold raw RLP data. - RlpRaw = 11, + RlpRaw = 12, /// Contains all trie data. Tries are stored as immutable, copy-on-write trees, so this is an /// append-only buffer. It is owned by the kernel, so it only lives on context 0. - TrieData = 12, + TrieData = 13, /// A buffer used to store the encodings of a branch node's children. - TrieEncodedChild = 13, + TrieEncodedChild = 14, /// A buffer used to store the lengths of the encodings of a branch node's children. - TrieEncodedChildLen = 14, + TrieEncodedChildLen = 15, } impl Segment { - pub(crate) const COUNT: usize = 15; + pub(crate) const COUNT: usize = 16; pub(crate) fn all() -> [Self; Self::COUNT] { [ @@ -49,6 +51,7 @@ impl Segment { Self::ContextMetadata, Self::KernelGeneral, Self::KernelGeneral2, + Self::KernelAccountCode, Self::TxnFields, Self::TxnData, Self::RlpRaw, @@ -70,6 +73,7 @@ impl Segment { Segment::ContextMetadata => "SEGMENT_CONTEXT_METADATA", Segment::KernelGeneral => "SEGMENT_KERNEL_GENERAL", Segment::KernelGeneral2 => "SEGMENT_KERNEL_GENERAL_2", + Segment::KernelAccountCode => "SEGMENT_KERNEL_ACCOUNT_CODE", Segment::TxnFields => "SEGMENT_NORMALIZED_TXN", Segment::TxnData => "SEGMENT_TXN_DATA", Segment::RlpRaw => "SEGMENT_RLP_RAW", @@ -91,6 +95,7 @@ impl Segment { Segment::ContextMetadata => 256, Segment::KernelGeneral => 256, Segment::KernelGeneral2 => 256, + Segment::KernelAccountCode => 8, Segment::TxnFields => 256, Segment::TxnData => 256, Segment::RlpRaw => 8,