diff --git a/evm/spec/tables/memory.tex b/evm/spec/tables/memory.tex index e3a19cab..883134d6 100644 --- a/evm/spec/tables/memory.tex +++ b/evm/spec/tables/memory.tex @@ -76,5 +76,9 @@ By default, all memory is zero-initialized. However, to save numerous writes, we \begin{itemize} \item The read-only kernel code (in segment 0, context 0) is initialized with its correct values. It's checked by hashing the segment and verifying that the hash value matches a verifier-provided one. + \item The code segment (segment 0) in other contexts is initialized with externally-provided account code, then checked against the account code hash. +If the code is meant to be executed, there is a soundness concern: if the code is malformed and ends with an incomplete PUSH, then the missing bytes must +be 0 accordingly to the Ethereum specs. To prevent the issue, we manually write 33 zeros (at most 32 bytes for the PUSH argument, and an extra one for +the post-PUSH PC value). \item The ``TrieData'' segment is initialized with the input tries. The stored tries are hashed and checked against the provided initial hash. Note that the length of the segment and the pointers -- within the ``TrieData'' segment -- for the three tries are provided as prover inputs. The length is then checked against a value computed when hashing the tries. \end{itemize} diff --git a/evm/spec/zkevm.pdf b/evm/spec/zkevm.pdf index 0a409abd..1f72b69d 100644 Binary files a/evm/spec/zkevm.pdf and b/evm/spec/zkevm.pdf differ diff --git a/evm/src/cpu/kernel/asm/account_code.asm b/evm/src/cpu/kernel/asm/account_code.asm index 35f3deba..1a9262ee 100644 --- a/evm/src/cpu/kernel/asm/account_code.asm +++ b/evm/src/cpu/kernel/asm/account_code.asm @@ -48,8 +48,8 @@ retzero: %endmacro %macro extcodesize - %stack (address) -> (address, 0, @SEGMENT_KERNEL_ACCOUNT_CODE, %%after) - %jump(load_code) + %stack (address) -> (address, %%after) + %jump(extcodesize) %%after: %endmacro @@ -76,53 +76,54 @@ global sys_extcodesize: global extcodesize: // stack: address, retdest - %extcodesize - // stack: extcodesize(address), retdest - SWAP1 JUMP + %next_context_id + // stack: codesize_ctx, address, retdest + SWAP1 + // stack: address, codesize_ctx, retdest + %jump(load_code) -// Loads the code at `address` into memory, at the given context and segment, starting at offset 0. +// Loads the code at `address` into memory, in the code segment of the given context, starting at offset 0. // Checks that the hash of the loaded code corresponds to the `codehash` in the state trie. -// Pre stack: address, ctx, segment, retdest +// Pre stack: address, ctx, retdest // Post stack: code_size global load_code: - %stack (address, ctx, segment, retdest) -> (extcodehash, address, load_code_ctd, ctx, segment, retdest) + %stack (address, ctx, retdest) -> (extcodehash, address, load_code_ctd, ctx, retdest) JUMP load_code_ctd: - // stack: codehash, ctx, segment, retdest + // stack: codehash, ctx, retdest DUP1 ISZERO %jumpi(load_code_non_existent_account) - PROVER_INPUT(account_code::length) - // stack: code_size, codehash, ctx, segment, retdest - PUSH 0 - -// Loop non-deterministically querying `code[i]` and storing it in `SEGMENT_KERNEL_ACCOUNT_CODE` -// at offset `i`, until `i==code_size`. -load_code_loop: - // stack: i, code_size, codehash, ctx, segment, retdest - DUP2 DUP2 EQ - // stack: i == code_size, i, code_size, codehash, ctx, segment, retdest - %jumpi(load_code_check) - DUP1 - // stack: i, i, code_size, codehash, ctx, segment, retdest - DUP6 // segment - DUP6 // context - PROVER_INPUT(account_code::get) - // stack: opcode, context, segment, i, i, code_size, codehash, ctx, segment, retdest - MSTORE_GENERAL - // stack: i, code_size, codehash, ctx, segment, retdest - %increment - // stack: i+1, code_size, codehash, ctx, segment, retdest - %jump(load_code_loop) - -// Check that the hash of the loaded code equals `codehash`. -load_code_check: - // stack: i, code_size, codehash, ctx, segment, retdest - %stack (i, code_size, codehash, ctx, segment, retdest) - -> (ctx, segment, 0, code_size, codehash, retdest, code_size) + // Load the code non-deterministically in memory and return the length. + PROVER_INPUT(account_code) + %stack (code_size, codehash, ctx, retdest) -> (ctx, @SEGMENT_CODE, 0, code_size, codehash, retdest, code_size) + // Check that the hash of the loaded code equals `codehash`. KECCAK_GENERAL // stack: shouldbecodehash, codehash, retdest, code_size %assert_eq + // stack: retdest, code_size JUMP load_code_non_existent_account: - %stack (codehash, ctx, segment, retdest) -> (retdest, 0) + // Write 0 at address 0 for soundness. + // stack: codehash, ctx, retdest + %stack (codehash, ctx, retdest) -> (0, ctx, @SEGMENT_CODE, 0, retdest, 0) + MSTORE_GENERAL + // stack: retdest, 0 + JUMP + +// Identical to load_code, but adds 33 zeros after code_size for soundness reasons. +// If the code ends with an incomplete PUSH, we must make sure that every subsequent read is 0, +// accordingly to the Ethereum specs. +// Pre stack: address, ctx, retdest +// Post stack: code_size +global load_code_padded: + %stack (address, ctx, retdest) -> (address, ctx, load_code_padded_ctd, ctx, retdest) + %jump(load_code) + +load_code_padded_ctd: + %stack (code_size, ctx, retdest) -> (ctx, @SEGMENT_CODE, code_size, 0, ctx, retdest, code_size) + MSTORE_32BYTES_32 + // stack: last_offset, ctx, retdest, code_size + %stack (last_offset, ctx) -> (0, ctx, @SEGMENT_CODE, last_offset) + MSTORE_GENERAL + // stack: retdest, code_size JUMP diff --git a/evm/src/cpu/kernel/asm/core/call.asm b/evm/src/cpu/kernel/asm/core/call.asm index ba6c775e..2e7d1d73 100644 --- a/evm/src/cpu/kernel/asm/core/call.asm +++ b/evm/src/cpu/kernel/asm/core/call.asm @@ -352,8 +352,8 @@ call_too_deep: %endmacro %macro set_new_ctx_code - %stack (address, new_ctx) -> (address, new_ctx, @SEGMENT_CODE, %%after, new_ctx) - %jump(load_code) + %stack (address, new_ctx) -> (address, new_ctx, %%after, new_ctx) + %jump(load_code_padded) %%after: %set_new_ctx_code_size // stack: new_ctx diff --git a/evm/src/cpu/kernel/asm/core/process_txn.asm b/evm/src/cpu/kernel/asm/core/process_txn.asm index bb794b65..df39b9d8 100644 --- a/evm/src/cpu/kernel/asm/core/process_txn.asm +++ b/evm/src/cpu/kernel/asm/core/process_txn.asm @@ -248,11 +248,10 @@ global process_message_txn: %create_context // stack: new_ctx, retdest PUSH process_message_txn_code_loaded - PUSH @SEGMENT_CODE - DUP3 // new_ctx + DUP2 // new_ctx %mload_txn_field(@TXN_FIELD_TO) - // stack: address, new_ctx, segment, process_message_txn_code_loaded, new_ctx, retdest - %jump(load_code) + // stack: address, new_ctx, process_message_txn_code_loaded, new_ctx, retdest + %jump(load_code_padded) global process_message_txn_insufficient_balance: // stack: retdest diff --git a/evm/src/cpu/kernel/asm/memory/syscalls.asm b/evm/src/cpu/kernel/asm/memory/syscalls.asm index 638cd1e3..9798f424 100644 --- a/evm/src/cpu/kernel/asm/memory/syscalls.asm +++ b/evm/src/cpu/kernel/asm/memory/syscalls.asm @@ -197,8 +197,10 @@ global sys_extcodecopy: DUP1 %ensure_reasonable_offset %update_mem_bytes - %stack (kexit_info, address, dest_offset, offset, size) -> - (address, 0, @SEGMENT_KERNEL_ACCOUNT_CODE, extcodecopy_contd, 0, kexit_info, dest_offset, offset, size) + %next_context_id + + %stack (ctx, kexit_info, address, dest_offset, offset, size) -> + (address, ctx, extcodecopy_contd, ctx, kexit_info, dest_offset, offset, size) %jump(load_code) sys_extcodecopy_empty: @@ -207,8 +209,8 @@ sys_extcodecopy_empty: EXIT_KERNEL extcodecopy_contd: - // stack: code_size, src_ctx, kexit_info, dest_offset, offset, size - %codecopy_after_checks(@SEGMENT_KERNEL_ACCOUNT_CODE) + // stack: code_size, ctx, kexit_info, dest_offset, offset, size + %codecopy_after_checks(@SEGMENT_CODE) // The internal logic is similar to wcopy, but handles range overflow differently. diff --git a/evm/src/generation/prover_input.rs b/evm/src/generation/prover_input.rs index 6d40193f..b2a8f0ce 100644 --- a/evm/src/generation/prover_input.rs +++ b/evm/src/generation/prover_input.rs @@ -3,7 +3,7 @@ use std::str::FromStr; use anyhow::{bail, Error}; use ethereum_types::{BigEndianHash, H256, U256, U512}; -use itertools::Itertools; +use itertools::{enumerate, Itertools}; use num_bigint::BigUint; use plonky2::field::types::Field; use serde::{Deserialize, Serialize}; @@ -19,6 +19,7 @@ use crate::memory::segments::Segment::BnPairing; use crate::util::{biguint_to_mem_vec, mem_vec_to_biguint, u256_to_usize}; use crate::witness::errors::ProgramError; use crate::witness::errors::ProverInputError::*; +use crate::witness::memory::MemoryAddress; use crate::witness::util::{current_context_peek, stack_peek}; /// Prover input function represented as a scoped function name. @@ -42,7 +43,7 @@ impl GenerationState { "ffe" => self.run_ffe(input_fn), "rlp" => self.run_rlp(), "current_hash" => self.run_current_hash(), - "account_code" => self.run_account_code(input_fn), + "account_code" => self.run_account_code(), "bignum_modmul" => self.run_bignum_modmul(), "withdrawal" => self.run_withdrawal(), "num_bits" => self.run_num_bits(), @@ -130,35 +131,26 @@ impl GenerationState { Ok(U256::from_big_endian(&self.inputs.block_hashes.cur_hash.0)) } - /// Account code. - fn run_account_code(&mut self, input_fn: &ProverInputFn) -> Result { - match input_fn.0[1].as_str() { - "length" => { - // Return length of code. - // stack: codehash, ... - let codehash = stack_peek(self, 0)?; - Ok(self - .inputs - .contract_code - .get(&H256::from_uint(&codehash)) - .ok_or(ProgramError::ProverInputError(CodeHashNotFound))? - .len() - .into()) - } - "get" => { - // Return `code[i]`. - // stack: context, segment, i, i, code_size, codehash, ... - let i = stack_peek(self, 2).map(u256_to_usize)??; - let codehash = stack_peek(self, 5)?; - Ok(self - .inputs - .contract_code - .get(&H256::from_uint(&codehash)) - .ok_or(ProgramError::ProverInputError(CodeHashNotFound))?[i] - .into()) - } - _ => Err(ProgramError::ProverInputError(InvalidInput)), + /// Account code loading. + /// Initializes the code segment of the given context with the code corresponding + /// to the provided hash. + /// Returns the length of the code. + fn run_account_code(&mut self) -> Result { + // stack: codehash, ctx, ... + let codehash = stack_peek(self, 0)?; + let context = stack_peek(self, 1)?; + let context = u256_to_usize(context)?; + let mut address = MemoryAddress::new(context, Segment::Code, 0); + let code = self + .inputs + .contract_code + .get(&H256::from_uint(&codehash)) + .ok_or(ProgramError::ProverInputError(CodeHashNotFound))?; + for &byte in code { + self.memory.set(address, byte.into()); + address.increment(); } + Ok(code.len().into()) } // Bignum modular multiplication.