Merge pull request #519 from status-im/block_witness_short_rlp

explicit block witness short rlp support
This commit is contained in:
andri lim 2020-07-09 21:57:25 +07:00 committed by GitHub
commit bfd7113035
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 126 additions and 46 deletions

View File

@ -28,8 +28,14 @@ type
mask*: uint
groups*: array[16, Group]
AccountKey* = tuple[address: EthAddress, codeTouched: bool, storageKeys: MultikeysRef]
MatchGroup* = tuple[match: bool, group: Group]
AccountKey* = object
address*: EthAddress
codeTouched*: bool
storageKeys*: MultikeysRef
MatchGroup* = object
match*: bool
group*: Group
func cmpHash(a, b: KeyHash): int =
var i = 0
@ -128,12 +134,12 @@ func groups*(m: MultikeysRef, depth: int, n: NibblesSeq, parentGroup: Group): Ma
if not compareNibbles(m.keys[i].hash, depth, n):
g.last = i - 1
# case 1: match and no match
return (true, g)
return MatchGroup(match: true, group: g)
inc i
# case 2: all is a match group
g.last = parentGroup.last
return (true, g)
return MatchGroup(match: true, group: g)
# no match came first, skip no match
# we only interested in a match group
@ -149,15 +155,15 @@ func groups*(m: MultikeysRef, depth: int, n: NibblesSeq, parentGroup: Group): Ma
if not compareNibbles(m.keys[i].hash, depth, n):
# case 3: no match, match, and no match
g.last = i - 1
return (true, g)
return MatchGroup(match: true, group: g)
inc i
# case 4: no match and match
g.last = parentGroup.last
return (true, g)
return MatchGroup(match: true, group: g)
# case 5: no match at all
result = (false, g)
result = MatchGroup(match: false, group: g)
func isValidMatch(mg: MatchGroup): bool {.inline.} =
result = mg.match and mg.group.first == mg.group.last

View File

@ -31,8 +31,9 @@ Every time you request a node using a hash key, you'll get one of the 3 types of
### Deviation from yellow paper
* In the Yellow Paper, the `hash to next node` may be replaced by the next node directly if the RLP encoded node bytes count
less than 32. But in a real Ethereum State trie, this never happened. An empty RLP encoded `Account` will have length of 70.
Combined with the Hex Prefix encoding of nibbles, it will be more than 70 bytes.
less than 32. But in a real Ethereum State trie, this never happened for account trie. An empty RLP encoded `Account` will have length of 70.
Combined with the Hex Prefix encoding of nibbles, it will be more than 70 bytes. Short Rlp node only exist in storage trie
with depth >= 9.
* In Yellow Paper, the 17th elem of the `Branch Node` can contains a value. But it always empty in a real Ethereum State trie.
The block witness spec also ignore this 17th elem when encoding or decoding `Branch Node`.
This can happen because in Ethereum `Secure Hexary Trie`, every keys have uniform length of 32 bytes or 64 nibbles.

View File

@ -69,7 +69,7 @@ proc setupStateDB(tester: var Tester, wantedState: JsonNode, stateDB: var Accoun
let sKeys = if storageKeys.len != 0: newMultiKeys(storageKeys) else: MultikeysRef(nil)
let codeTouched = code.len > 0
keys.add((account, codeTouched, sKeys))
keys.add(AccountKey(address: account, codeTouched: codeTouched, storageKeys: sKeys))
tester.keys = newMultiKeys(keys)
stateDB.persist()

View File

@ -75,18 +75,18 @@ proc runTest(numPairs: int, testStatusIMPL: var TestStatus,
for i in 0..<numPairs:
let acc = randAccount(memDB)
addrs[i] = (randAddress(), acc.codeTouched, acc.storageKeys)
addrs[i] = AccountKey(address: randAddress(), codeTouched: acc.codeTouched, storageKeys: acc.storageKeys)
accs[i] = acc.account
trie.put(addrs[i].address, rlp.encode(accs[i]))
when addInvalidKeys:
# invalidAddress should not end up in block witness
let invalidAddress = randAddress()
addrs.add((invalidAddress, false, MultikeysRef(nil)))
addrs.add(AccountKey(address: invalidAddress))
if addIdenticalKeys:
let invalidAddress = addrs[0].address
addrs.add((invalidAddress, false, MultikeysRef(nil)))
addrs.add(AccountKey(address: invalidAddress))
var mkeys = newMultiKeys(addrs)
let rootHash = trie.rootHash
@ -122,13 +122,20 @@ proc runTest(numPairs: int, testStatusIMPL: var TestStatus,
for kd in mkeys.keys:
check kd.visited == true
proc initMultiKeys(keys: openArray[string]): MultikeysRef =
proc initMultiKeys(keys: openArray[string], storageMode: bool = false): MultikeysRef =
result.new
for x in keys:
result.keys.add KeyData(
storageMode: false,
hash: hexToByteArray[32](x)
)
if storageMode:
for i, x in keys:
result.keys.add KeyData(
storageMode: true,
hash: hexToByteArray[32](x)
)
else:
for x in keys:
result.keys.add KeyData(
storageMode: false,
hash: hexToByteArray[32](x)
)
proc parseInvalidInput(payload: openArray[byte]): bool =
var db = newMemoryDB()
@ -246,5 +253,34 @@ proc witnessKeysMain*() =
let z = readFile(x)
check parseInvalidInput(z.toOpenArrayByte(0, z.len-1))
test "short rlp test":
let keys = [
"01234567abce7762869be690036144c12c256bdb06ee9073ad5ecca18a47c325",
"01234567b491732f964182ce4bde5e2468318692ed446e008f621b26f8ff5660",
"01234567c140158288775c8912aed274fb9d6a3a260e9e95e03e70ba8df30f6b",
]
let m = initMultiKeys(keys, true)
var memDB = newMemoryDB()
var trie = initSecureHexaryTrie(memDB)
var acc = randAccount(memDB)
var tt = initHexaryTrie(memDB)
for x in m.keys:
tt.put(x.hash, rlp.encode(1.u256))
acc.account.storageRoot = tt.rootHash
let addrs = @[AccountKey(address: randAddress(), codeTouched: acc.codeTouched, storageKeys: m)]
trie.put(addrs[0].address, rlp.encode(acc.account))
var mkeys = newMultiKeys(addrs)
let rootHash = trie.rootHash
var wb = initWitnessBuilder(memDB, rootHash, {wfEIP170})
var witness = wb.buildWitness(mkeys)
var db = newMemoryDB()
var tb = initTreeBuilder(witness, db, {wfEIP170})
let root = tb.buildTree()
check root.data == rootHash.data
when isMainModule:
witnessKeysMain()

View File

@ -29,6 +29,8 @@ type
# this TreeBuilder support short node parsing
# but a block witness should not contains short node
# for account trie. Short rlp node only appears in
# storage trie with depth >= 9
# the InputStream still unstable
# when using large dataset for testing
@ -147,6 +149,11 @@ proc toKeccak(r: var NodeKey, x: openArray[byte]) {.inline.} =
r.data[0..31] = x[0..31]
r.usedBytes = 32
proc toKeccak(r: var NodeKey, z: byte, x: openArray[byte]) {.inline.} =
r.data[0] = z
r.data[1..31] = x[0..30]
r.usedBytes = 32
proc append(r: var RlpWriter, n: NodeKey) =
if n.usedBytes < 32:
r.append rlpFromBytes(n.data.toOpenArray(0, n.usedBytes-1))
@ -162,6 +169,12 @@ proc toNodeKey(t: var TreeBuilder, z: openArray[byte]): NodeKey =
result.usedBytes = 32
t.db.put(result.data, z)
proc toNodeKey(z: openArray[byte]): NodeKey =
if z.len >= 32:
raise newException(ParsingError, "Failed when try to convert short rlp to NodeKey")
result.usedBytes = z.len
result.data[0..z.len-1] = z[0..z.len-1]
proc forceSmallNodeKeyToHash(t: var TreeBuilder, r: NodeKey): NodeKey =
let hash = keccak(r.data.toOpenArray(0, r.usedBytes-1))
t.db.put(hash.data, r.data.toOpenArray(0, r.usedBytes-1))
@ -176,7 +189,7 @@ proc branchNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey
proc extensionNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey
proc accountNode(t: var TreeBuilder, depth: int): NodeKey
proc accountStorageLeafNode(t: var TreeBuilder, depth: int): NodeKey
proc hashNode(t: var TreeBuilder): NodeKey
proc hashNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey
proc treeNode(t: var TreeBuilder, depth: int = 0, storageMode = false): NodeKey
proc buildTree*(t: var TreeBuilder): KeccakHash
@ -218,7 +231,7 @@ proc buildForest*(t: var TreeBuilder): seq[KeccakHash]
result.add KeccakHash(data: res.data)
proc treeNode(t: var TreeBuilder, depth: int, storageMode = false): NodeKey =
if depth >= 64:
if depth > 64:
raise newException(ParsingError, "invalid trie structure")
let nodeType = safeReadEnum(t, TrieNodeType)
@ -231,7 +244,7 @@ proc treeNode(t: var TreeBuilder, depth: int, storageMode = false): NodeKey =
result = t.accountStorageLeafNode(depth)
else:
result = t.accountNode(depth)
of HashNodeType: result = t.hashNode()
of HashNodeType: result = t.hashNode(depth, storageMode)
if depth == 0 and result.usedBytes < 32:
result = t.forceSmallNodeKeyToHash(result)
@ -335,7 +348,7 @@ proc extensionNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey =
let nodeType = safeReadEnum(t, TrieNodeType)
case nodeType
of BranchNodeType: r.append t.branchNode(depth + nibblesLen, storageMode)
of HashNodeType: r.append t.hashNode()
of HashNodeType: r.append t.hashNode(depth, storageMode)
else: raise newException(ParsingError, "wrong type during parsing child of extension node")
result = t.toNodeKey(r.finish)
@ -361,13 +374,13 @@ proc readCodeLen(t: var TreeBuilder): int =
t.keys[^1].codeLen = codeLen.int
result = codeLen.int
proc readHashNode(t: var TreeBuilder): NodeKey =
proc readHashNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey =
let nodeType = safeReadEnum(t, TrieNodeType)
if nodeType != HashNodeType:
raise newException(ParsingError, "hash node expected but got " & $nodeType)
result = t.hashNode()
result = t.hashNode(depth, storageMode)
proc readByteCode(t: var TreeBuilder, acc: var Account) =
proc readByteCode(t: var TreeBuilder, acc: var Account, depth: int) =
let bytecodeType = safeReadEnum(t, BytecodeType)
case bytecodeType
of CodeTouched:
@ -380,7 +393,7 @@ proc readByteCode(t: var TreeBuilder, acc: var Account) =
# we could discard it here
discard t.readCodeLen()
let codeHash = t.readHashNode()
let codeHash = t.readHashNode(depth, false)
doAssert(codeHash.usedBytes == 32)
acc.codeHash.data = codeHash.data
@ -413,7 +426,7 @@ proc accountNode(t: var TreeBuilder, depth: int): NodeKey =
acc.codeHash = blankStringHash
acc.storageRoot = emptyRlpHash
of ExtendedAccountType:
t.readByteCode(acc)
t.readByteCode(acc, depth)
# switch to account storage parsing mode
# and reset the depth
@ -478,6 +491,20 @@ proc accountStorageLeafNode(t: var TreeBuilder, depth: int): NodeKey =
when defined(debugHash):
doAssert(result == nodeKey, "account storage leaf node parsing error")
proc hashNode(t: var TreeBuilder): NodeKey =
safeReadBytes(t, 32):
result.toKeccak(t.read(32))
proc hashNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey =
if storageMode and depth >= 9:
let z = t.safeReadByte()
if z == ShortRlpPrefix:
let rlpLen = t.safeReadByte().int
if rlpLen == 0:
safeReadBytes(t, 31):
result.toKeccak(0, t.read(31))
else:
safeReadBytes(t, rlpLen):
result = toNodeKey(t.read(rlpLen))
else:
safeReadBytes(t, 31):
result.toKeccak(z, t.read(31))
else:
safeReadBytes(t, 32):
result.toKeccak(t.read(32))

View File

@ -136,17 +136,26 @@ proc writeBranchNode(wb: var WitnessBuilder, mask: uint, depth: int, node: openA
when defined(debugHash):
wb.write(keccak(node).data)
proc writeHashNode(wb: var WitnessBuilder, node: openArray[byte]) =
proc writeHashNode(wb: var WitnessBuilder, node: openArray[byte], depth: int, storageMode: bool) =
# usually a hash node means the recursion will not go deeper
# and the information can be represented by the hash
# for chunked witness, a hash node can be a root to another
# sub-trie in one of the chunks
wb.writeByte(HashNodeType)
if depth >= 9 and storageMode and node[0] == 0.byte:
wb.writeByte(ShortRlpPrefix)
wb.write(node)
proc writeShortRlp(wb: var WitnessBuilder, node: openArray[byte], depth: int, storageMode: bool) =
doAssert(node.len < 32 and depth >= 9 and storageMode)
wb.writeByte(HashNodeType)
wb.writeByte(ShortRlpPrefix)
wb.writeByte(node.len)
wb.write(node)
proc getBranchRecurse(wb: var WitnessBuilder, z: var StackElem) {.raises: [ContractCodeError, IOError, Defect, CatchableError, Exception].}
proc writeByteCode(wb: var WitnessBuilder, kd: KeyData, acc: Account) =
proc writeByteCode(wb: var WitnessBuilder, kd: KeyData, acc: Account, depth: int) =
if not kd.codeTouched:
# the account have code but not touched by the EVM
# in current block execution
@ -155,7 +164,7 @@ proc writeByteCode(wb: var WitnessBuilder, kd: KeyData, acc: Account) =
if wfEIP170 in wb.flags and code.len > EIP170_CODE_SIZE_LIMIT:
raise newException(ContractCodeError, "code len exceed EIP170 code size limit")
wb.writeUVarint32(code.len)
wb.writeHashNode(acc.codeHash.data)
wb.writeHashNode(acc.codeHash.data, depth, false)
# no need to write 'code' here
return
@ -172,10 +181,10 @@ proc writeByteCode(wb: var WitnessBuilder, kd: KeyData, acc: Account) =
wb.writeUVarint32(code.len)
wb.write(code)
proc writeStorage(wb: var WitnessBuilder, kd: KeyData, acc: Account) =
proc writeStorage(wb: var WitnessBuilder, kd: KeyData, acc: Account, depth: int) =
if kd.storageKeys.isNil:
# the account have storage but not touched by EVM
wb.writeHashNode(acc.storageRoot.data)
wb.writeHashNode(acc.storageRoot.data, depth, true)
elif acc.storageRoot != emptyRlpHash:
# the account have storage and the EVM use it
var zz = StackElem(
@ -188,7 +197,7 @@ proc writeStorage(wb: var WitnessBuilder, kd: KeyData, acc: Account) =
getBranchRecurse(wb, zz)
else:
# no storage at all
wb.writeHashNode(emptyRlpHash.data)
wb.writeHashNode(emptyRlpHash.data, depth, true)
proc writeAccountNode(wb: var WitnessBuilder, kd: KeyData, acc: Account,
node: openArray[byte], depth: int) {.raises: [ContractCodeError, IOError, Defect, CatchableError, Exception].} =
@ -212,8 +221,8 @@ proc writeAccountNode(wb: var WitnessBuilder, kd: KeyData, acc: Account,
wb.writeUVarint(acc.nonce)
if accountType != SimpleAccountType:
wb.writeByteCode(kd, acc)
wb.writeStorage(kd, acc)
wb.writeByteCode(kd, acc, depth)
wb.writeStorage(kd, acc, depth)
#0x00 address:<Address> balance:<Bytes32> nonce:<Bytes32>
#0x01 address:<Address> balance:<Bytes32> nonce:<Bytes32> bytecode:<Bytecode> storage:<Tree_Node(0,1)>
@ -235,6 +244,10 @@ proc writeAccountStorageLeafNode(wb: var WitnessBuilder, key: openArray[byte], v
proc getBranchRecurse(wb: var WitnessBuilder, z: var StackElem) =
if z.node.len == 0: return
if z.node.len < 32:
writeShortRlp(wb, z.node, z.depth, z.storageMode)
return
var nodeRlp = rlpFromBytes z.node
case nodeRlp.listLen
@ -244,7 +257,7 @@ proc getBranchRecurse(wb: var WitnessBuilder, z: var StackElem) =
if not mg.match:
# return immediately if there is no match
writeHashNode(wb, keccak(z.node).data)
writeHashNode(wb, keccak(z.node).data, z.depth, z.storageMode)
return
let value = nodeRlp.listElem(1)
@ -295,14 +308,10 @@ proc getBranchRecurse(wb: var WitnessBuilder, z: var StackElem) =
continue
if branch.isList:
# short node appear in yellow paper
# but never in the actual ethereum state trie
# an rlp encoded ethereum account will have length > 32 bytes
# block witness spec silent about this
doAssert(false, "Short node should not exist in block witness")
writeShortRlp(wb, branch.rawData, z.depth, z.storageMode)
else:
# if branch elem not empty and not a match, emit hash
writeHashNode(wb, branch.expectHash)
writeHashNode(wb, branch.expectHash, z.depth, z.storageMode)
# 17th elem should always empty
# 17th elem appear in yellow paper but never in

View File

@ -33,6 +33,7 @@ type
const
StorageLeafNodeType* = AccountNodeType
BlockWitnessVersion* = 0x01
ShortRlpPrefix* = 0.byte
proc setBranchMaskBit*(x: var uint, i: int) {.inline.} =
assert(i >= 0 and i < 17)