mirror of https://github.com/status-im/nim-eth.git
262 lines
8.3 KiB
Nim
262 lines
8.3 KiB
Nim
|
import ../common/hashes, ../rlp, stew/arraybuf
|
||
|
|
||
|
export hashes
|
||
|
|
||
|
type
|
||
|
ShortHash = ArrayBuf[32, byte]
|
||
|
|
||
|
OrderedTrieRootBuilder* = object
|
||
|
## A special case of hexary trie root building for the case where keys are
|
||
|
## sorted integers and number of entries is known ahead of time.
|
||
|
##
|
||
|
## The builder must be initialized with the value count by calling `init`.
|
||
|
##
|
||
|
## In the ethereum MPT, leaf leaves are computed by prefixing the value with
|
||
|
## its trie path slice. When the keys are ordere, we can pre-compute the
|
||
|
## trie path slice thus avoiding unnecessary storage of leaf values.
|
||
|
##
|
||
|
## Similar implementations with various tradeoffs exist that cover the
|
||
|
## general case:
|
||
|
##
|
||
|
## * https://github.com/alloy-rs/trie
|
||
|
## * https://github.com/rust-ethereum/ethereum/blob/b160820620aa9fd30050d5fcb306be4e12d58c8c/src/util.rs#L152
|
||
|
## * https://github.com/ethereum/go-ethereum/blob/master/trie/stacktrie.go
|
||
|
##
|
||
|
## TODO We don't need to store all leaves - instead, we could for each
|
||
|
## level of the trie store only a hashing state that collects the trie
|
||
|
## built "so far", similar to the StackTrie implementation - this works
|
||
|
## for items 0x80 and up where the rlp-encoded order matches insertion
|
||
|
## order.
|
||
|
leaves: seq[ShortHash]
|
||
|
|
||
|
items: int
|
||
|
## Number of items added so far (and therefore also the key of the next item)
|
||
|
|
||
|
func init*(T: type OrderedTrieRootBuilder, expected: int): T =
|
||
|
T(leaves: newSeq[ShortHash](expected))
|
||
|
|
||
|
func toShortHash(v: openArray[byte]): ShortHash =
|
||
|
if v.len < 32:
|
||
|
ShortHash.initCopyFrom(v)
|
||
|
else:
|
||
|
ShortHash.initCopyFrom(keccak256(v).data)
|
||
|
|
||
|
func append(w: var RlpWriter, key: ShortHash) =
|
||
|
if 1 < key.len and key.len < 32:
|
||
|
w.appendRawBytes key.data
|
||
|
else:
|
||
|
w.append key.data
|
||
|
|
||
|
func keyAtIndex(b: var OrderedTrieRootBuilder, i: int): RlpIntBuf =
|
||
|
# Given a leaf index, compute the rlp-encoded key
|
||
|
let key =
|
||
|
if i <= 0x7f:
|
||
|
if i == min(0x7f, b.leaves.len - 1):
|
||
|
0'u64
|
||
|
else:
|
||
|
uint64 i + 1
|
||
|
else:
|
||
|
uint64 i
|
||
|
rlp.encodeInt(key)
|
||
|
|
||
|
func nibble(v: RlpIntBuf, i: int): byte =
|
||
|
let data = v.data[i shr 1]
|
||
|
if (i and 1) != 0:
|
||
|
data and 0xf
|
||
|
else:
|
||
|
data shr 4
|
||
|
|
||
|
func nibbles(v: RlpIntBuf): int =
|
||
|
v.len * 2
|
||
|
|
||
|
func sharedPrefixLen(a, b: RlpIntBuf): int =
|
||
|
# Number of nibbles the two buffers have in common
|
||
|
for i in 0 ..< min(a.len, b.len):
|
||
|
if a[i] != b[i]:
|
||
|
return
|
||
|
if a.nibble(i * 2) == b.nibble(i * 2):
|
||
|
i * 2 + 1
|
||
|
else:
|
||
|
i * 2
|
||
|
min(a.len, b.len)
|
||
|
|
||
|
func hexPrefixEncode(
|
||
|
r: RlpIntBuf, ibegin, iend: int, isLeaf = false
|
||
|
): ArrayBuf[10, byte] =
|
||
|
let nibbleCount = iend - ibegin
|
||
|
var oddnessFlag = (nibbleCount and 1) != 0
|
||
|
result.setLen((nibbleCount div 2) + 1)
|
||
|
result[0] = byte((int(isLeaf) * 2 + int(oddnessFlag)) shl 4)
|
||
|
var writeHead = 0
|
||
|
|
||
|
for i in ibegin ..< iend:
|
||
|
let nextNibble = r.nibble(i)
|
||
|
if oddnessFlag:
|
||
|
result[writeHead] = result[writeHead] or nextNibble
|
||
|
else:
|
||
|
inc writeHead
|
||
|
result[writeHead] = nextNibble shl 4
|
||
|
oddnessFlag = not oddnessFlag
|
||
|
|
||
|
proc keyToIndex(b: var OrderedTrieRootBuilder, key: uint64): int =
|
||
|
## Given a key, compute its position according to the rlp-encoded integer
|
||
|
## ordering, ie the order that would result from encoding the key
|
||
|
## with RLP, "shortest big endian encoding" and sorting lexicographically -
|
||
|
## this lexicographical order determines the location of the key in the trie
|
||
|
if key == 0:
|
||
|
# Key 0 goes into position 0x7f or last, depending on how many there are
|
||
|
min(0x7f, b.leaves.len - 1)
|
||
|
elif key <= uint64 min(0x7f, b.leaves.len - 1):
|
||
|
int key - 1
|
||
|
else:
|
||
|
int key
|
||
|
|
||
|
proc updateHash(b: var OrderedTrieRootBuilder, key: uint64, v: auto, w: var RlpWriter) =
|
||
|
let
|
||
|
pos = b.keyToIndex(key)
|
||
|
cur = rlp.encodeInt(key)
|
||
|
b.leaves[pos] =
|
||
|
try:
|
||
|
w.clear()
|
||
|
w.startList(2)
|
||
|
|
||
|
# compute the longest shared nibble prefix between a key and its sorted
|
||
|
# neighbours which determines how much of the key is left in the leaf
|
||
|
# itself during encoding
|
||
|
let spl =
|
||
|
if b.leaves.len == 1:
|
||
|
-1 # If there's only one leaf, the whole key is used as leaf path
|
||
|
else:
|
||
|
if pos + 1 < b.leaves.len:
|
||
|
let next = b.keyAtIndex(pos + 1)
|
||
|
if pos > 0:
|
||
|
let prev = b.keyAtIndex(pos - 1)
|
||
|
max(prev.sharedPrefixLen(cur), next.sharedPrefixLen(cur))
|
||
|
else:
|
||
|
next.sharedPrefixLen(cur)
|
||
|
else:
|
||
|
let prev = b.keyAtIndex(pos - 1)
|
||
|
prev.sharedPrefixLen(cur)
|
||
|
|
||
|
w.append(cur.hexPrefixEncode(spl + 1, cur.nibbles, isLeaf = true).data())
|
||
|
w.append(rlp.encode(v))
|
||
|
|
||
|
toShortHash(w.finish)
|
||
|
except RlpError:
|
||
|
raiseAssert "RLP failures not expected"
|
||
|
|
||
|
proc add*[T](b: var OrderedTrieRootBuilder, v: openArray[T]) =
|
||
|
## Add items to the trie root builder, calling `rlp.encode(item)` to compute
|
||
|
## the value of the item. The total number of items added before calling
|
||
|
## `rootHash` must equal what was given in `init`.
|
||
|
##
|
||
|
## TODO instead of RLP-encoding the items to bytes, we should be hashing them
|
||
|
## directly:
|
||
|
## * https://github.com/status-im/nim-eth/issues/724
|
||
|
## * https://github.com/status-im/nim-eth/issues/698
|
||
|
var w = initRlpWriter()
|
||
|
for item in v:
|
||
|
b.updateHash(uint64 b.items, item, w)
|
||
|
b.items += 1
|
||
|
|
||
|
proc computeKey(b: var OrderedTrieRootBuilder, rng: Slice[int], depth: int): ShortHash =
|
||
|
if rng.len == 0:
|
||
|
ShortHash.initCopyFrom([byte 128]) # RLP of empty list
|
||
|
elif rng.len == 1: # Leaf
|
||
|
b.leaves[rng.a]
|
||
|
else: # Branch (or extension)
|
||
|
var p = int.high
|
||
|
let ka = b.keyAtIndex(rng.a)
|
||
|
|
||
|
# Find the shortest shared prefix among the given keys - if this is not 0,
|
||
|
# it means an extension node must be introduced among the nodes in the given
|
||
|
# range. The top level always has a 0 shared length prefix because the
|
||
|
# encodings for 0 and 1 start with different nibbles.
|
||
|
if depth == 0:
|
||
|
p = 0
|
||
|
else:
|
||
|
for i in 1 ..< rng.len:
|
||
|
# TODO We can get rid of this loop by observing what the nibbles in the
|
||
|
# RLP integer encoding have in common and adjust accordingly
|
||
|
p = min(p, sharedPrefixLen(ka, b.keyAtIndex(rng.a + i)))
|
||
|
if p == depth:
|
||
|
break
|
||
|
|
||
|
var w = initRlpWriter()
|
||
|
|
||
|
if p == depth: # No shared prefix - this is a branch
|
||
|
w.startList(17)
|
||
|
# Sub-divide the keys by nibble and recurse
|
||
|
var pos = rng.a
|
||
|
for n in 0'u8 .. 15'u8:
|
||
|
var x: int
|
||
|
# Pick out the keys that have the asked-for nibble at the given depth
|
||
|
while pos + x <= rng.b and b.keyAtIndex(pos + x).nibble(depth) == n:
|
||
|
x += 1
|
||
|
|
||
|
if x > 0:
|
||
|
w.append b.computeKey(pos .. pos + x - 1, depth + 1)
|
||
|
else:
|
||
|
w.append(openArray[byte]([]))
|
||
|
pos += x
|
||
|
|
||
|
w.append(openArray[byte]([])) # No data in branch nodes
|
||
|
else:
|
||
|
w.startList(2)
|
||
|
w.append(ka.hexPrefixEncode(depth, p, isLeaf = false).data())
|
||
|
w.append(b.computeKey(rng, p))
|
||
|
|
||
|
toShortHash(w.finish())
|
||
|
|
||
|
proc rootHash*(b: var OrderedTrieRootBuilder): Root =
|
||
|
doAssert b.items == b.leaves.len, "Items added does not match initial length"
|
||
|
let h = b.computeKey(0 ..< b.leaves.len, 0)
|
||
|
if h.len == 32:
|
||
|
Root(h.buf)
|
||
|
else:
|
||
|
keccak256(h.data)
|
||
|
|
||
|
proc orderedTrieRoot*[T](items: openArray[T]): Root =
|
||
|
## Compute the MPT root of a list of items using their rlp-encoded index as
|
||
|
## key.
|
||
|
##
|
||
|
## Typical examples include the transaction and withdrawal roots that appear
|
||
|
## in blocks.
|
||
|
##
|
||
|
## The given values will be rlp-encoded using `rlp.encode`.
|
||
|
var b = OrderedTrieRootBuilder.init(items.len)
|
||
|
b.add(items)
|
||
|
b.rootHash
|
||
|
|
||
|
when isMainModule: # A small benchmark
|
||
|
import std/[monotimes, times], eth/trie/[hexary, db]
|
||
|
|
||
|
let n = 1000000
|
||
|
echo "Testing ", n
|
||
|
let values = block:
|
||
|
var tmp: seq[uint64]
|
||
|
for i in 0 .. n:
|
||
|
tmp.add i.uint64
|
||
|
tmp
|
||
|
|
||
|
let x0 = getMonoTime()
|
||
|
let b1 = block:
|
||
|
var db = OrderedTrieRootBuilder.init(values.len)
|
||
|
|
||
|
db.add(values)
|
||
|
db.rootHash()
|
||
|
echo b1
|
||
|
let x1 = getMonoTime()
|
||
|
let b2 = block:
|
||
|
var db2 = initHexaryTrie(newMemoryDB())
|
||
|
for v in values:
|
||
|
db2.put(rlp.encode(v), rlp.encode(v))
|
||
|
|
||
|
db2.rootHash()
|
||
|
let x2 = getMonoTime()
|
||
|
assert b1 == b2
|
||
|
|
||
|
echo (
|
||
|
(x1 - x0), (x2 - x1), (x1 - x0).inNanoseconds.float / (x2 - x1).inNanoseconds.float
|
||
|
)
|