diff --git a/evm/spec/tries.tex b/evm/spec/mpts.tex similarity index 53% rename from evm/spec/tries.tex rename to evm/spec/mpts.tex index 7ec0fcce..49d1d328 100644 --- a/evm/spec/tries.tex +++ b/evm/spec/mpts.tex @@ -6,21 +6,21 @@ Withour our zkEVM's kernel memory, \begin{enumerate} \item An empty node is encoded as $(\texttt{MPT\_NODE\_EMPTY})$. - \item A branch node is encoded as $(\texttt{MPT\_NODE\_BRANCH}, c_1, \dots, c_{16}, \abs{v}, v)$, where each $c_i$ is a pointer to a child node, and $v$ is a value of length $\abs{v}$.\footnote{If a branch node has no associated value, then $\abs{v} = 0$ and $v = ()$.} + \item A branch node is encoded as $(\texttt{MPT\_NODE\_BRANCH}, c_1, \dots, c_{16}, v)$, where each $c_i$ is a pointer to a child node, and $v$ is a pointer to a value. If a branch node has no associated value, then $v = 0$, i.e. the null pointer. \item An extension node is encoded as $(\texttt{MPT\_NODE\_EXTENSION}, k, c)$, $k$ represents the part of the key associated with this extension, and is encoded as a 2-tuple $(\texttt{packed\_nibbles}, \texttt{num\_nibbles})$. $c$ is a pointer to a child node. - \item A leaf node is encoded as $(\texttt{MPT\_NODE\_LEAF}, k, \abs{v}, v)$, where $k$ is a 2-tuple as above, and $v$ is a leaf payload. - \item A digest node is encoded as $(\texttt{MPT\_NODE\_DIGEST}, d)$, where $d$ is a Keccak256 digest. + \item A leaf node is encoded as $(\texttt{MPT\_NODE\_LEAF}, k, v)$, where $k$ is a 2-tuple as above, and $v$ is a pointer to a value. + \item A digest node is encoded as $(\texttt{MPT\_NODE\_HASH}, d)$, where $d$ is a Keccak256 digest. \end{enumerate} \subsection{Prover input format} -The initial state of each trie is given by the prover as a nondeterministic input tape. This tape has a similar format: +The initial state of each trie is given by the prover as a nondeterministic input tape. This tape has a slightly different format: \begin{enumerate} \item An empty node is encoded as $(\texttt{MPT\_NODE\_EMPTY})$. - \item A branch node is encoded as $(\texttt{MPT\_NODE\_BRANCH}, \abs{v}, v, c_1, \dots, c_{16})$, where $\abs{v}$ is the length of the value, and $v$ is the value itself. Each $c_i$ is the encoding of a child node. + \item A branch node is encoded as $(\texttt{MPT\_NODE\_BRANCH}, v_?, c_1, \dots, c_{16})$. Here $v_?$ consists of a flag indicating whether a value is present,\todo{In the current implementation, we use a length prefix rather than a is-present prefix, but we plan to change that.} followed by the actual value payload if one is present. Each $c_i$ is the encoding of a child node. \item An extension node is encoded as $(\texttt{MPT\_NODE\_EXTENSION}, k, c)$, $k$ represents the part of the key associated with this extension, and is encoded as a 2-tuple $(\texttt{packed\_nibbles}, \texttt{num\_nibbles})$. $c$ is a pointer to a child node. - \item A leaf node is encoded as $(\texttt{MPT\_NODE\_LEAF}, k, \abs{v}, v)$, where $k$ is a 2-tuple as above, and $v$ is a leaf payload. - \item A digest node is encoded as $(\texttt{MPT\_NODE\_DIGEST}, d)$, where $d$ is a Keccak256 digest. + \item A leaf node is encoded as $(\texttt{MPT\_NODE\_LEAF}, k, v)$, where $k$ is a 2-tuple as above, and $v$ is a value payload. + \item A digest node is encoded as $(\texttt{MPT\_NODE\_HASH}, d)$, where $d$ is a Keccak256 digest. \end{enumerate} -Nodes are thus given in depth-first order, leading to natural recursive methods for encoding and decoding this format. +Nodes are thus given in depth-first order, enabling natural recursive methods for encoding and decoding this format. diff --git a/evm/spec/zkevm.pdf b/evm/spec/zkevm.pdf index 184ba36b..f181eba6 100644 Binary files a/evm/spec/zkevm.pdf and b/evm/spec/zkevm.pdf differ diff --git a/evm/spec/zkevm.tex b/evm/spec/zkevm.tex index 65766986..2927e7a5 100644 --- a/evm/spec/zkevm.tex +++ b/evm/spec/zkevm.tex @@ -51,7 +51,7 @@ \input{introduction} \input{framework} \input{tables} -\input{tries} +\input{mpts} \input{instructions} \bibliography{bibliography}{} diff --git a/evm/src/cpu/kernel/asm/mpt/hash.asm b/evm/src/cpu/kernel/asm/mpt/hash.asm index 8342d650..ee8b5ca3 100644 --- a/evm/src/cpu/kernel/asm/mpt/hash.asm +++ b/evm/src/cpu/kernel/asm/mpt/hash.asm @@ -152,21 +152,17 @@ encode_node_branch: %add_const(16) // stack: value_ptr_ptr, rlp_pos', encode_value, retdest %mload_trie_data - // stack: value_len_ptr, rlp_pos', encode_value, retdest - DUP1 %mload_trie_data - // stack: value_len, value_len_ptr, rlp_pos', encode_value, retdest - %jumpi(encode_node_branch_with_value) + // stack: value_ptr, rlp_pos', encode_value, retdest + DUP1 %jumpi(encode_node_branch_with_value) // No value; append the empty string (0x80). - // stack: value_len_ptr, rlp_pos', encode_value, retdest - %stack (value_len_ptr, rlp_pos, encode_value) -> (rlp_pos, 0x80, rlp_pos) + // stack: value_ptr, rlp_pos', encode_value, retdest + %stack (value_ptr, rlp_pos, encode_value) -> (rlp_pos, 0x80, rlp_pos) %mstore_rlp // stack: rlp_pos', retdest %increment // stack: rlp_pos'', retdest %jump(encode_node_branch_prepend_prefix) encode_node_branch_with_value: - // stack: value_len_ptr, rlp_pos', encode_value, retdest - %increment // stack: value_ptr, rlp_pos', encode_value, retdest %stack (value_ptr, rlp_pos, encode_value) -> (encode_value, rlp_pos, value_ptr, encode_node_branch_prepend_prefix) @@ -276,7 +272,6 @@ encode_node_leaf_after_hex_prefix: %add_const(2) // The value pointer starts at index 3, after num_nibbles and packed_nibbles. // stack: value_ptr_ptr, rlp_pos, encode_value, retdest %mload_trie_data - %increment // skip over length prefix // stack: value_ptr, rlp_pos, encode_value, retdest %stack (value_ptr, rlp_pos, encode_value, retdest) -> (encode_value, rlp_pos, value_ptr, encode_node_leaf_after_encode_value, retdest) diff --git a/evm/src/cpu/kernel/asm/mpt/load.asm b/evm/src/cpu/kernel/asm/mpt/load.asm index 73f58b95..49258a31 100644 --- a/evm/src/cpu/kernel/asm/mpt/load.asm +++ b/evm/src/cpu/kernel/asm/mpt/load.asm @@ -70,7 +70,7 @@ load_mpt_branch: SWAP1 %append_to_trie_data // stack: node_ptr, retdest // Save the offset of our 16 child pointers so we can write them later. - // Then advance out current trie pointer beyond them, so we can load the + // Then advance our current trie pointer beyond them, so we can load the // value and have it placed after our child pointers. %get_trie_data_size // stack: children_ptr, node_ptr, retdest @@ -79,8 +79,8 @@ load_mpt_branch: %set_trie_data_size // stack: children_ptr, node_ptr, retdest %load_value - // stack: children_ptr, value_ptr, node_ptr, retdest SWAP1 + // stack: children_ptr, value_ptr, node_ptr, retdest // Load the 16 children. %rep 16 @@ -170,26 +170,28 @@ load_mpt_digest: %%after: %endmacro -// Load a leaf from prover input, append it to trie data, and return a pointer to it. +// Load a value from prover input, append it to trie data, and return a pointer to it. +// Return null if the value is empty. %macro load_value // stack: (empty) PROVER_INPUT(mpt) // stack: value_len - DUP1 ISZERO - %jumpi(%%return_null) + DUP1 %jumpi(%%has_value) + %stack (value_len) -> (0) + %jump(%%end) +%%has_value: // stack: value_len %get_trie_data_size + // stack: value_ptr, value_len SWAP1 // stack: value_len, value_ptr - DUP1 %append_to_trie_data - // stack: value_len, value_ptr %%loop: DUP1 ISZERO // stack: value_len == 0, value_len, value_ptr %jumpi(%%finish_loop) // stack: value_len, value_ptr PROVER_INPUT(mpt) - // stack: leaf_part, value_len, value_ptr + // stack: value_part, value_len, value_ptr %append_to_trie_data // stack: value_len, value_ptr %decrement @@ -199,8 +201,5 @@ load_mpt_digest: // stack: value_len, value_ptr POP // stack: value_ptr - %jump(%%end) -%%return_null: - %stack (value_len) -> (0) %%end: %endmacro diff --git a/evm/src/cpu/kernel/asm/mpt/read.asm b/evm/src/cpu/kernel/asm/mpt/read.asm index dae97336..d375bedc 100644 --- a/evm/src/cpu/kernel/asm/mpt/read.asm +++ b/evm/src/cpu/kernel/asm/mpt/read.asm @@ -1,6 +1,6 @@ -// Given an address, return a pointer to the associated (length-prefixed) -// account data, which consists of four words (nonce, balance, storage_root, -// code_hash), in the state trie. Returns 0 if the address is not found. +// Given an address, return a pointer to the associated account data, which +// consists of four words (nonce, balance, storage_root, code_hash), in the +// state trie. Returns null if the address is not found. global mpt_read_state_trie: // stack: addr, retdest // The key is the hash of the address. Since KECCAK_GENERAL takes input from @@ -24,7 +24,7 @@ mpt_read_state_trie_after_mstore: // - the key, as a U256 // - the number of nibbles in the key (should start at 64) // -// This function returns a pointer to the length-prefixed leaf, or 0 if the key is not found. +// This function returns a pointer to the value, or 0 if the key is not found. global mpt_read: // stack: node_ptr, num_nibbles, key, retdest DUP1 @@ -77,15 +77,6 @@ mpt_read_branch_end_of_key: %add_const(16) // skip over the 16 child nodes // stack: value_ptr_ptr, retdest %mload_trie_data - // stack: value_len_ptr, retdest - DUP1 %mload_trie_data - // stack: value_len, value_len_ptr, retdest - %jumpi(mpt_read_branch_found_value) - // This branch node contains no value, so return null. - %stack (value_len_ptr, retdest) -> (retdest, 0) -mpt_read_branch_found_value: - // stack: value_len_ptr, retdest - %increment // stack: value_ptr, retdest SWAP1 JUMP diff --git a/evm/src/cpu/kernel/tests/mpt/insert.rs b/evm/src/cpu/kernel/tests/mpt/insert.rs index 469ad1e4..5310d431 100644 --- a/evm/src/cpu/kernel/tests/mpt/insert.rs +++ b/evm/src/cpu/kernel/tests/mpt/insert.rs @@ -157,7 +157,6 @@ fn test_state_trie(state_trie: PartialTrie, k: Nibbles, v: Vec) -> Result<() let value_ptr = trie_data.len(); let account: AccountRlp = rlp::decode(&v).expect("Decoding failed"); let account_data = account.to_vec(); - trie_data.push(account_data.len().into()); trie_data.extend(account_data); let trie_data_len = trie_data.len().into(); interpreter.set_global_metadata_field(GlobalMetadata::TrieDataSize, trie_data_len); diff --git a/evm/src/cpu/kernel/tests/mpt/load.rs b/evm/src/cpu/kernel/tests/mpt/load.rs index 0572458d..b7b3b108 100644 --- a/evm/src/cpu/kernel/tests/mpt/load.rs +++ b/evm/src/cpu/kernel/tests/mpt/load.rs @@ -79,7 +79,6 @@ fn load_all_mpts_leaf() -> Result<()> { 3.into(), 0xABC.into(), 5.into(), // value ptr - 4.into(), // value length test_account_1().nonce, test_account_1().balance, test_account_1().storage_root.into_uint(), @@ -200,7 +199,6 @@ fn load_all_mpts_ext_to_leaf() -> Result<()> { 3.into(), // 3 nibbles 0xDEF.into(), // key part 9.into(), // value pointer - 4.into(), // value length test_account_1().nonce, test_account_1().balance, test_account_1().storage_root.into_uint(), diff --git a/evm/src/cpu/kernel/tests/mpt/read.rs b/evm/src/cpu/kernel/tests/mpt/read.rs index 06d89ff6..c45a6b60 100644 --- a/evm/src/cpu/kernel/tests/mpt/read.rs +++ b/evm/src/cpu/kernel/tests/mpt/read.rs @@ -44,12 +44,11 @@ fn mpt_read() -> Result<()> { assert_eq!(interpreter.stack().len(), 1); let result_ptr = interpreter.stack()[0].as_usize(); - let result = &interpreter.get_trie_data()[result_ptr..][..5]; - assert_eq!(result[0], 4.into()); - assert_eq!(result[1], account.nonce); - assert_eq!(result[2], account.balance); - assert_eq!(result[3], account.storage_root.into_uint()); - assert_eq!(result[4], account.code_hash.into_uint()); + let result = &interpreter.get_trie_data()[result_ptr..][..4]; + assert_eq!(result[0], account.nonce); + assert_eq!(result[1], account.balance); + assert_eq!(result[2], account.storage_root.into_uint()); + assert_eq!(result[3], account.code_hash.into_uint()); Ok(()) }