From 299aabf860241be96e4eaba4f1d7761289afbfb1 Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Tue, 11 Oct 2022 08:46:40 -0700 Subject: [PATCH] Fix branch hashing bug --- evm/src/cpu/kernel/asm/memory/core.asm | 13 +++++ evm/src/cpu/kernel/asm/mpt/hash.asm | 56 ++++++++++++------- .../cpu/kernel/constants/global_metadata.rs | 8 ++- evm/src/cpu/kernel/interpreter.rs | 10 +++- evm/src/cpu/kernel/tests/mpt/hash.rs | 6 +- evm/src/cpu/kernel/tests/mpt/insert.rs | 34 ++++++++++- evm/src/memory/segments.rs | 12 +++- 7 files changed, 113 insertions(+), 26 deletions(-) diff --git a/evm/src/cpu/kernel/asm/memory/core.asm b/evm/src/cpu/kernel/asm/memory/core.asm index f4bcf1f1..2b4d2b68 100644 --- a/evm/src/cpu/kernel/asm/memory/core.asm +++ b/evm/src/cpu/kernel/asm/memory/core.asm @@ -55,6 +55,19 @@ // stack: (empty) %endmacro +// Store a single value from the given segment of kernel (context 0) memory. +%macro mstore_kernel(segment, offset) + // stack: value + PUSH $offset + // stack: offset, value + PUSH $segment + // stack: segment, offset, value + PUSH 0 // kernel has context 0 + // stack: context, segment, offset, value + MSTORE_GENERAL + // stack: (empty) +%endmacro + // Load from the kernel a big-endian u32, consisting of 4 bytes (c_3, c_2, c_1, c_0) %macro mload_kernel_u32(segment) // stack: offset diff --git a/evm/src/cpu/kernel/asm/mpt/hash.asm b/evm/src/cpu/kernel/asm/mpt/hash.asm index 511e9d18..8342d650 100644 --- a/evm/src/cpu/kernel/asm/mpt/hash.asm +++ b/evm/src/cpu/kernel/asm/mpt/hash.asm @@ -118,14 +118,22 @@ encode_node_branch: POP // stack: node_payload_ptr, encode_value, retdest + // Get the next unused offset within the encoded child buffers. + // Then immediately increment the next unused offset by 16, so any + // recursive calls will use nonoverlapping offsets. + %mload_global_metadata(@TRIE_ENCODED_CHILD_SIZE) + DUP1 %add_const(16) + %mstore_global_metadata(@TRIE_ENCODED_CHILD_SIZE) + // stack: base_offset, node_payload_ptr, encode_value, retdest + // We will call encode_or_hash_node on each child. For the i'th child, we - // will store the result in SEGMENT_KERNEL_GENERAL[i], and its length in - // SEGMENT_KERNEL_GENERAL_2[i]. + // will store the result in SEGMENT_TRIE_ENCODED_CHILD[base + i], and its length in + // SEGMENT_TRIE_ENCODED_CHILD_LEN[base + i]. %encode_child(0) %encode_child(1) %encode_child(2) %encode_child(3) %encode_child(4) %encode_child(5) %encode_child(6) %encode_child(7) %encode_child(8) %encode_child(9) %encode_child(10) %encode_child(11) %encode_child(12) %encode_child(13) %encode_child(14) %encode_child(15) - // stack: node_payload_ptr, encode_value, retdest + // stack: base_offset, node_payload_ptr, encode_value, retdest // Now, append each child to our RLP tape. PUSH 9 // rlp_pos; we start at 9 to leave room to prepend a list prefix @@ -133,6 +141,11 @@ encode_node_branch: %append_child(4) %append_child(5) %append_child(6) %append_child(7) %append_child(8) %append_child(9) %append_child(10) %append_child(11) %append_child(12) %append_child(13) %append_child(14) %append_child(15) + // stack: rlp_pos', base_offset, node_payload_ptr, encode_value, retdest + + // We no longer need base_offset. + SWAP1 + POP // stack: rlp_pos', node_payload_ptr, encode_value, retdest SWAP1 @@ -165,43 +178,44 @@ encode_node_branch_prepend_prefix: JUMP // Part of the encode_node_branch function. Encodes the i'th child. -// Stores the result in SEGMENT_KERNEL_GENERAL[i], and its length in -// SEGMENT_KERNEL_GENERAL_2[i]. +// Stores the result in SEGMENT_TRIE_ENCODED_CHILD[base + i], and its length in +// SEGMENT_TRIE_ENCODED_CHILD_LEN[base + i]. %macro encode_child(i) - // stack: node_payload_ptr, encode_value, retdest + // stack: base_offset, node_payload_ptr, encode_value, retdest PUSH %%after_encode - DUP3 DUP3 - // stack: node_payload_ptr, encode_value, %%after_encode, node_payload_ptr, encode_value, retdest + DUP4 DUP4 + // stack: node_payload_ptr, encode_value, %%after_encode, base_offset, node_payload_ptr, encode_value, retdest %add_const($i) %mload_trie_data - // stack: child_i_ptr, encode_value, %%after_encode, node_payload_ptr, encode_value, retdest + // stack: child_i_ptr, encode_value, %%after_encode, base_offset, node_payload_ptr, encode_value, retdest %jump(encode_or_hash_node) %%after_encode: - // stack: result, result_len, node_payload_ptr, encode_value, retdest - %mstore_kernel_general($i) - %mstore_kernel_general_2($i) - // stack: node_payload_ptr, encode_value, retdest + // stack: result, result_len, base_offset, node_payload_ptr, encode_value, retdest + DUP3 %add_const($i) %mstore_kernel(@SEGMENT_TRIE_ENCODED_CHILD) + // stack: result_len, base_offset, node_payload_ptr, encode_value, retdest + DUP2 %add_const($i) %mstore_kernel(@SEGMENT_TRIE_ENCODED_CHILD_LEN) + // stack: base_offset, node_payload_ptr, encode_value, retdest %endmacro // Part of the encode_node_branch function. Appends the i'th child's RLP. %macro append_child(i) - // stack: rlp_pos, node_payload_ptr, encode_value, retdest - %mload_kernel_general($i) // load result - %mload_kernel_general_2($i) // load result_len - // stack: result_len, result, rlp_pos, node_payload_ptr, encode_value, retdest + // stack: rlp_pos, base_offset, node_payload_ptr, encode_value, retdest + DUP2 %add_const($i) %mload_kernel(@SEGMENT_TRIE_ENCODED_CHILD) // load result + DUP3 %add_const($i) %mload_kernel(@SEGMENT_TRIE_ENCODED_CHILD_LEN) // load result_len + // stack: result_len, result, rlp_pos, base_offset, node_payload_ptr, encode_value, retdest // If result_len != 32, result is raw RLP, with an appropriate RLP prefix already. DUP1 %sub_const(32) %jumpi(%%unpack) // Otherwise, result is a hash, and we need to add the prefix 0x80 + 32 = 160. - // stack: result_len, result, rlp_pos, node_payload_ptr, encode_value, retdest + // stack: result_len, result, rlp_pos, base_offset, node_payload_ptr, encode_value, retdest PUSH 160 DUP4 // rlp_pos %mstore_rlp SWAP2 %increment SWAP2 // rlp_pos += 1 %%unpack: - %stack (result_len, result, rlp_pos, node_payload_ptr, encode_value, retdest) - -> (rlp_pos, result, result_len, %%after_unpacking, node_payload_ptr, encode_value, retdest) + %stack (result_len, result, rlp_pos, base_offset, node_payload_ptr, encode_value, retdest) + -> (rlp_pos, result, result_len, %%after_unpacking, base_offset, node_payload_ptr, encode_value, retdest) %jump(mstore_unpacking_rlp) %%after_unpacking: - // stack: rlp_pos', node_payload_ptr, encode_value, retdest + // stack: rlp_pos', base_offset, node_payload_ptr, encode_value, retdest %endmacro encode_node_extension: diff --git a/evm/src/cpu/kernel/constants/global_metadata.rs b/evm/src/cpu/kernel/constants/global_metadata.rs index f3f34e7a..295cdfd5 100644 --- a/evm/src/cpu/kernel/constants/global_metadata.rs +++ b/evm/src/cpu/kernel/constants/global_metadata.rs @@ -31,10 +31,14 @@ pub(crate) enum GlobalMetadata { StateTrieRootDigestAfter = 11, TransactionTrieRootDigestAfter = 12, ReceiptTrieRootDigestAfter = 13, + + /// The sizes of the `TrieEncodedChild` and `TrieEncodedChildLen` buffers. In other words, the + /// next available offset in these buffers. + TrieEncodedChildSize = 14, } impl GlobalMetadata { - pub(crate) const COUNT: usize = 14; + pub(crate) const COUNT: usize = 15; pub(crate) fn all() -> [Self; Self::COUNT] { [ @@ -52,6 +56,7 @@ impl GlobalMetadata { Self::StateTrieRootDigestAfter, Self::TransactionTrieRootDigestAfter, Self::ReceiptTrieRootDigestAfter, + Self::TrieEncodedChildSize, ] } @@ -80,6 +85,7 @@ impl GlobalMetadata { GlobalMetadata::ReceiptTrieRootDigestAfter => { "GLOBAL_METADATA_RECEIPT_TRIE_DIGEST_AFTER" } + GlobalMetadata::TrieEncodedChildSize => "TRIE_ENCODED_CHILD_SIZE", } } } diff --git a/evm/src/cpu/kernel/interpreter.rs b/evm/src/cpu/kernel/interpreter.rs index 2eb9dcb9..5f3c7dcc 100644 --- a/evm/src/cpu/kernel/interpreter.rs +++ b/evm/src/cpu/kernel/interpreter.rs @@ -1,3 +1,5 @@ +//! An EVM interpreter for testing and debugging purposes. + use std::collections::HashMap; use anyhow::{anyhow, bail, ensure}; @@ -609,7 +611,13 @@ impl<'a> Interpreter<'a> { let segment = Segment::all()[self.pop().as_usize()]; let offset = self.pop().as_usize(); let value = self.pop(); - assert!(value.bits() <= segment.bit_range()); + assert!( + value.bits() <= segment.bit_range(), + "Value {} exceeds {:?} range of {} bits", + value, + segment, + segment.bit_range() + ); self.memory.mstore_general(context, segment, offset, value); } } diff --git a/evm/src/cpu/kernel/tests/mpt/hash.rs b/evm/src/cpu/kernel/tests/mpt/hash.rs index 6b31a523..dd09f350 100644 --- a/evm/src/cpu/kernel/tests/mpt/hash.rs +++ b/evm/src/cpu/kernel/tests/mpt/hash.rs @@ -90,7 +90,11 @@ fn mpt_hash_branch_to_leaf() -> Result<()> { value: account_rlp.to_vec(), }; let mut children = std::array::from_fn(|_| Box::new(PartialTrie::Empty)); - children[0] = Box::new(leaf); + children[5] = Box::new(PartialTrie::Branch { + children: children.clone(), + value: vec![], + }); + children[3] = Box::new(leaf); let state_trie = PartialTrie::Branch { children, value: vec![], diff --git a/evm/src/cpu/kernel/tests/mpt/insert.rs b/evm/src/cpu/kernel/tests/mpt/insert.rs index 103bdd2e..218e1681 100644 --- a/evm/src/cpu/kernel/tests/mpt/insert.rs +++ b/evm/src/cpu/kernel/tests/mpt/insert.rs @@ -135,7 +135,8 @@ fn mpt_insert_branch_replacing_empty_child() -> Result<()> { } #[test] -fn mpt_insert_extension_to_leaf_same_key() -> Result<()> { +fn mpt_insert_extension_nonoverlapping_keys() -> Result<()> { + // Existing keys are 0xABC, 0xABCDEF; inserted key is 0x12345. let mut children = std::array::from_fn(|_| Box::new(PartialTrie::Empty)); children[0xD] = Box::new(PartialTrie::Leaf { nibbles: Nibbles { @@ -154,6 +155,37 @@ fn mpt_insert_extension_to_leaf_same_key() -> Result<()> { value: test_account_1_rlp(), }), }; + let insert = InsertEntry { + nibbles: Nibbles { + count: 5, + packed: 0x12345.into(), + }, + v: test_account_2_rlp(), + }; + test_state_trie(state_trie, insert) +} + +#[test] +fn mpt_insert_extension_insert_key_extends_node_key() -> Result<()> { + // Existing keys are 0xA, 0xABCD; inserted key is 0xABCDEF. + let mut children = std::array::from_fn(|_| Box::new(PartialTrie::Empty)); + children[0xB] = Box::new(PartialTrie::Leaf { + nibbles: Nibbles { + count: 2, + packed: 0xCD.into(), + }, + value: test_account_1_rlp(), + }); + let state_trie = PartialTrie::Extension { + nibbles: Nibbles { + count: 1, + packed: 0xA.into(), + }, + child: Box::new(PartialTrie::Branch { + children, + value: test_account_1_rlp(), + }), + }; let insert = InsertEntry { nibbles: Nibbles { count: 6, diff --git a/evm/src/memory/segments.rs b/evm/src/memory/segments.rs index 44390a9b..b6254900 100644 --- a/evm/src/memory/segments.rs +++ b/evm/src/memory/segments.rs @@ -38,10 +38,14 @@ pub(crate) enum Segment { /// `StorageTriePointers` with `StorageTrieCheckpointPointers`. /// See also `StateTrieCheckpointPointer`. StorageTrieCheckpointPointers = 15, + /// A buffer used to store the encodings of a branch node's children. + TrieEncodedChild = 16, + /// A buffer used to store the lengths of the encodings of a branch node's children. + TrieEncodedChildLen = 17, } impl Segment { - pub(crate) const COUNT: usize = 16; + pub(crate) const COUNT: usize = 18; pub(crate) fn all() -> [Self; Self::COUNT] { [ @@ -61,6 +65,8 @@ impl Segment { Self::StorageTrieAddresses, Self::StorageTriePointers, Self::StorageTrieCheckpointPointers, + Self::TrieEncodedChild, + Self::TrieEncodedChildLen, ] } @@ -83,6 +89,8 @@ impl Segment { Segment::StorageTrieAddresses => "SEGMENT_STORAGE_TRIE_ADDRS", Segment::StorageTriePointers => "SEGMENT_STORAGE_TRIE_PTRS", Segment::StorageTrieCheckpointPointers => "SEGMENT_STORAGE_TRIE_CHECKPOINT_PTRS", + Segment::TrieEncodedChild => "SEGMENT_TRIE_ENCODED_CHILD", + Segment::TrieEncodedChildLen => "SEGMENT_TRIE_ENCODED_CHILD_LEN", } } @@ -105,6 +113,8 @@ impl Segment { Segment::StorageTrieAddresses => 160, Segment::StorageTriePointers => 32, Segment::StorageTrieCheckpointPointers => 32, + Segment::TrieEncodedChild => 256, + Segment::TrieEncodedChildLen => 6, } } }