From 0c9847abf3283c59bd83975367b5fc5511976585 Mon Sep 17 00:00:00 2001 From: Daniel Lubarov Date: Wed, 5 Oct 2022 15:23:06 -0700 Subject: [PATCH] Rework MPT hashing to support inlining <32 byte children --- evm/src/cpu/kernel/asm/memory/packing.asm | 2 +- evm/src/cpu/kernel/asm/mpt/hash.asm | 203 ++++++++++++------ .../cpu/kernel/asm/mpt/hash_trie_specific.asm | 15 +- evm/src/cpu/kernel/asm/mpt/load.asm | 1 + 4 files changed, 149 insertions(+), 72 deletions(-) diff --git a/evm/src/cpu/kernel/asm/memory/packing.asm b/evm/src/cpu/kernel/asm/memory/packing.asm index 4150567f..3021c640 100644 --- a/evm/src/cpu/kernel/asm/memory/packing.asm +++ b/evm/src/cpu/kernel/asm/memory/packing.asm @@ -3,7 +3,7 @@ global mload_packing: // stack: context, segment, offset, len, retdest - // TODO + PANIC // TODO // stack: value // Pre stack: context, segment, offset, value, len, retdest diff --git a/evm/src/cpu/kernel/asm/mpt/hash.asm b/evm/src/cpu/kernel/asm/mpt/hash.asm index 053f357c..75ce379f 100644 --- a/evm/src/cpu/kernel/asm/mpt/hash.asm +++ b/evm/src/cpu/kernel/asm/mpt/hash.asm @@ -1,82 +1,155 @@ // Computes the Merkle root of the given trie node. // -// The encode_value function should take as input +// encode_value is a function which should take as input // - the position withing @SEGMENT_RLP_RAW to write to, // - the offset of a value within @SEGMENT_TRIE_DATA, and // - a return address. // It should serialize the value, write it to @SEGMENT_RLP_RAW starting at the // given position, and return an updated position (the next unused offset). -%macro mpt_hash(encode_value) - // stack: node_ptr, retdest - DUP1 - %mload_trie_data - // stack: node_type, node_ptr, retdest - // Increment node_ptr, so it points to the node payload instead of its type. - SWAP1 %add_const(1) SWAP1 - // stack: node_type, node_payload_ptr, retdest - - DUP1 %eq_const(@MPT_NODE_EMPTY) %jumpi(mpt_hash_empty) - DUP1 %eq_const(@MPT_NODE_HASH) %jumpi(mpt_hash_hash) - DUP1 %eq_const(@MPT_NODE_BRANCH) %jumpi(%%mpt_hash_branch) - DUP1 %eq_const(@MPT_NODE_EXTENSION) %jumpi(%%mpt_hash_extension) - DUP1 %eq_const(@MPT_NODE_LEAF) %jumpi(%%mpt_hash_leaf) - PANIC // Invalid node type? Shouldn't get here. - -%%mpt_hash_branch: - // stack: node_type, node_payload_ptr, retdest - POP - // stack: node_payload_ptr, retdest - PANIC // TODO - -%%mpt_hash_extension: - // stack: node_type, node_payload_ptr, retdest - POP - // stack: node_payload_ptr, retdest - PANIC // TODO - -%%mpt_hash_leaf: - // stack: node_type, node_payload_ptr, retdest - POP - // stack: node_payload_ptr, retdest - PUSH %%mpt_hash_leaf_after_hex_prefix // retdest - PUSH 1 // terminated - // stack: terminated, %%mpt_hash_leaf_after_hex_prefix, node_payload_ptr, retdest - DUP3 %add_const(1) %mload_trie_data // Load the packed_nibbles field, which is at index 1. - // stack: packed_nibbles, terminated, %%mpt_hash_leaf_after_hex_prefix, node_payload_ptr, retdest - DUP4 %mload_trie_data // Load the num_nibbles field, which is at index 0. - // stack: num_nibbles, packed_nibbles, terminated, %%mpt_hash_leaf_after_hex_prefix, node_payload_ptr, retdest - PUSH 9 // We start at 9 to leave room to prepend the largest possible RLP list header. - // stack: rlp_start, num_nibbles, packed_nibbles, terminated, %%mpt_hash_leaf_after_hex_prefix, node_payload_ptr, retdest - %jump(hex_prefix_rlp) -%%mpt_hash_leaf_after_hex_prefix: - // stack: rlp_pos, node_payload_ptr, retdest - SWAP1 - %add_const(2) // The value starts at index 2. - %stack (value_ptr, rlp_pos, retdest) - -> (rlp_pos, value_ptr, %%mpt_hash_leaf_after_encode_value, retdest) - %jump($encode_value) -%%mpt_hash_leaf_after_encode_value: - // stack: rlp_end_pos, retdest - %prepend_rlp_list_prefix - // stack: rlp_start_pos, rlp_len, retdest - PUSH @SEGMENT_RLP_RAW - PUSH 0 // kernel context - // stack: rlp_start_addr: 3, rlp_len, retdest +// +// Pre stack: node_ptr, encode_value, retdest +// Post stack: hash +global mpt_hash: + // stack: node_ptr, encode_value, retdest + %stack (node_ptr, encode_value) -> (node_ptr, encode_value, mpt_hash_hash_if_rlp) + %jump(encode_or_hash_node) +mpt_hash_hash_if_rlp: + // stack: result, result_len, retdest + // If result_len < 32, then we have an RLP blob, and we need to hash it. + DUP2 %lt_const(32) %jumpi(mpt_hash_hash_rlp) + // Otherwise, we already have a hash, so just return it. + // stack: result, result_len, retdest + %stack (result, result_len, retdest) -> (retdest, result) + JUMP +mpt_hash_hash_rlp: + // stack: result, result_len, retdest + %stack (result, result_len) + // context, segment, offset, value, len, retdest + -> (0, @SEGMENT_RLP_RAW, 0, result, result_len, mpt_hash_hash_rlp_after_unpacking) + %jump(mstore_unpacking) +mpt_hash_hash_rlp_after_unpacking: + // stack: result_len, retdest + PUSH 0 // offset + PUSH @SEGMENT_RLP_RAW // segment + PUSH 0 // context + // stack: result_addr: 3, result_len, retdest KECCAK_GENERAL // stack: hash, retdest SWAP1 JUMP -%endmacro -global mpt_hash_empty: - %stack (node_type, node_payload_ptr, retdest) -> (retdest, @EMPTY_NODE_HASH) +// Given a trie node, return its RLP encoding if it is is less than 32 bytes, +// otherwise return the Keccak256 hash of its RLP encoding. +// +// The result is given as a (value, length) pair, where the length is given +// in bytes. +// +// Pre stack: node_ptr, encode_value, retdest +// Post stack: result, result_len +global encode_or_hash_node: + %stack (node_ptr, encode_value) -> (node_ptr, encode_value, maybe_hash_node) + %jump(encode_node) +maybe_hash_node: + // stack: result_ptr, result_len, retdest + DUP2 %lt_const(32) + %jumpi(pack_small_rlp) + + // result_len >= 32, so we hash the result. + // stack: result_ptr, result_len, retdest + PUSH @SEGMENT_RLP_RAW // segment + PUSH 0 // context + // stack: result_addr: 3, result_len, retdest + KECCAK_GENERAL + %stack (hash, retdest) -> (retdest, hash, 32) + JUMP +pack_small_rlp: + // stack: result_ptr, result_len, retdest + PANIC // TODO: Return packed RLP + +// RLP encode the given trie node, and return an (pointer, length) pair +// indicating where the data lives within @SEGMENT_RLP_RAW. +// +// Pre stack: node_ptr, encode_value, retdest +// Post stack: result_ptr, result_len +global encode_node: + // stack: node_ptr, encode_value, retdest + DUP1 %mload_trie_data + // stack: node_type, node_ptr, encode_value, retdest + // Increment node_ptr, so it points to the node payload instead of its type. + SWAP1 %add_const(1) SWAP1 + // stack: node_type, node_payload_ptr, encode_value, retdest + + DUP1 %eq_const(@MPT_NODE_EMPTY) %jumpi(encode_node_empty) + DUP1 %eq_const(@MPT_NODE_HASH) %jumpi(encode_node_hash) + DUP1 %eq_const(@MPT_NODE_BRANCH) %jumpi(encode_node_branch) + DUP1 %eq_const(@MPT_NODE_EXTENSION) %jumpi(encode_node_extension) + DUP1 %eq_const(@MPT_NODE_LEAF) %jumpi(encode_node_leaf) + PANIC // Invalid node type? Shouldn't get here. + +global encode_node_empty: + // stack: node_type, node_payload_ptr, encode_value, retdest + %pop3 + // stack: retdest + // An empty node is encoded as a single byte, 0x80, which is the RLP + // encoding of the empty string. Write this byte to RLP[0] and return + // (0, 1). + PUSH 0x80 + PUSH 0 + %mstore_rlp + %stack (retdest) -> (retdest, 0, 1) JUMP -global mpt_hash_hash: - // stack: node_type, node_payload_ptr, retdest +global encode_node_hash: + // stack: node_type, node_payload_ptr, encode_value, retdest POP - // stack: node_payload_ptr, retdest + // stack: node_payload_ptr, encode_value, retdest %mload_trie_data - // stack: hash, retdest - SWAP1 + %stack (hash, encode_value, retdest) -> (retdest, hash, 32) + JUMP + +encode_node_branch: + // stack: node_type, node_payload_ptr, encode_value, retdest + POP + // stack: node_payload_ptr, encode_value, retdest + PUSH 9 // rlp_pos; we start at 9 to leave room to prepend a list prefix + %rep 16 + // stack: rlp_pos, node_child_ptr, encode_value, retdest + // TODO: Append encode_or_hash_node(child) to our RLP. Do all encode_or_hash_node calls first to avoid clobbering. + SWAP1 %increment SWAP1 // node_child_ptr += 1 + %endrep + // stack: node_value_ptr, encode_value, retdest + PANIC // TODO + +encode_node_extension: + // stack: node_type, node_payload_ptr, encode_value, retdest + POP + // stack: node_payload_ptr, encode_value, retdest + PANIC // TODO + +encode_node_leaf: + // stack: node_type, node_payload_ptr, encode_value, retdest + POP + // stack: node_payload_ptr, encode_value, retdest + PUSH encode_node_leaf_after_hex_prefix // retdest + PUSH 1 // terminated + // stack: terminated, encode_node_leaf_after_hex_prefix, node_payload_ptr, encode_value, retdest + DUP3 %add_const(1) %mload_trie_data // Load the packed_nibbles field, which is at index 1. + // stack: packed_nibbles, terminated, encode_node_leaf_after_hex_prefix, node_payload_ptr, encode_value, retdest + DUP4 %mload_trie_data // Load the num_nibbles field, which is at index 0. + // stack: num_nibbles, packed_nibbles, terminated, encode_node_leaf_after_hex_prefix, node_payload_ptr, encode_value, retdest + PUSH 9 // We start at 9 to leave room to prepend the largest possible RLP list header. + // stack: rlp_start, num_nibbles, packed_nibbles, terminated, encode_node_leaf_after_hex_prefix, node_payload_ptr, encode_value, retdest + %jump(hex_prefix_rlp) +encode_node_leaf_after_hex_prefix: + // stack: rlp_pos, node_payload_ptr, encode_value, retdest + SWAP1 + %add_const(2) // The value starts at index 2. + // stack: value_ptr, rlp_pos, encode_value, retdest + %stack (value_ptr, rlp_pos, encode_value, retdest) + -> (encode_value, rlp_pos, value_ptr, encode_node_leaf_after_encode_value, retdest) + JUMP +encode_node_leaf_after_encode_value: + // stack: rlp_end_pos, retdest + %prepend_rlp_list_prefix + %stack (rlp_start_pos, rlp_len, retdest) -> (retdest, rlp_start_pos, rlp_len) JUMP diff --git a/evm/src/cpu/kernel/asm/mpt/hash_trie_specific.asm b/evm/src/cpu/kernel/asm/mpt/hash_trie_specific.asm index 17596547..80763deb 100644 --- a/evm/src/cpu/kernel/asm/mpt/hash_trie_specific.asm +++ b/evm/src/cpu/kernel/asm/mpt/hash_trie_specific.asm @@ -2,9 +2,10 @@ global mpt_hash_state_trie: // stack: retdest + PUSH encode_account %mload_global_metadata(@GLOBAL_METADATA_STATE_TRIE_ROOT) - // stack: node_ptr, retdest - %mpt_hash(encode_account) + // stack: node_ptr, encode_account, retdest + %jump(mpt_hash) %macro mpt_hash_state_trie PUSH %%after @@ -14,9 +15,10 @@ global mpt_hash_state_trie: global mpt_hash_txn_trie: // stack: retdest + PUSH encode_txn %mload_global_metadata(@GLOBAL_METADATA_TXN_TRIE_ROOT) - // stack: node_ptr, retdest - %mpt_hash(encode_txn) + // stack: node_ptr, encode_txn, retdest + %jump(mpt_hash) %macro mpt_hash_txn_trie PUSH %%after @@ -26,9 +28,10 @@ global mpt_hash_txn_trie: global mpt_hash_receipt_trie: // stack: retdest + PUSH encode_receipt %mload_global_metadata(@GLOBAL_METADATA_RECEIPT_TRIE_ROOT) - // stack: node_ptr, retdest - %mpt_hash(encode_receipt) + // stack: node_ptr, encode_receipt, retdest + %jump(mpt_hash) %macro mpt_hash_receipt_trie PUSH %%after diff --git a/evm/src/cpu/kernel/asm/mpt/load.asm b/evm/src/cpu/kernel/asm/mpt/load.asm index 2f1bd624..58197594 100644 --- a/evm/src/cpu/kernel/asm/mpt/load.asm +++ b/evm/src/cpu/kernel/asm/mpt/load.asm @@ -159,6 +159,7 @@ load_mpt_digest: // Load a leaf from prover input, and append it to trie data. %macro load_leaf_value + // TODO: Need to store leaf len, or at least a has_leaf flag for branch nodes. // stack: (empty) PROVER_INPUT(mpt) // stack: leaf_len