Merge pull request #519 from status-im/block_witness_short_rlp

explicit block witness short rlp support
2025-01-27 04:26:07 +00:00 · 2020-07-09 21:57:25 +07:00 · 2020-07-09 21:57:25 +07:00 · bfd7113035
commit bfd7113035
parent f81b3d5b96 c8871bd00c
7 changed files with 126 additions and 46 deletions
--- a/stateless/multi_keys.nim
+++ b/stateless/multi_keys.nim
@ -28,8 +28,14 @@ type
    mask*: uint
    groups*: array[16, Group]

-  AccountKey* = tuple[address: EthAddress, codeTouched: bool, storageKeys: MultikeysRef]
-  MatchGroup* = tuple[match: bool, group: Group]
+  AccountKey* = object
+    address*: EthAddress
+    codeTouched*: bool
+    storageKeys*: MultikeysRef
+
+  MatchGroup* = object
+    match*: bool
+    group*: Group

 func cmpHash(a, b: KeyHash): int =
  var i = 0
@ -128,12 +134,12 @@ func groups*(m: MultikeysRef, depth: int, n: NibblesSeq, parentGroup: Group): Ma
      if not compareNibbles(m.keys[i].hash, depth, n):
        g.last = i - 1
        # case 1: match and no match
-        return (true, g)
+        return MatchGroup(match: true, group: g)
      inc i

    # case 2: all is a match group
    g.last = parentGroup.last
-    return (true, g)
+    return MatchGroup(match: true, group: g)

  # no match came first, skip no match
  # we only interested in a match group
@ -149,15 +155,15 @@ func groups*(m: MultikeysRef, depth: int, n: NibblesSeq, parentGroup: Group): Ma
      if not compareNibbles(m.keys[i].hash, depth, n):
        # case 3: no match, match, and no match
        g.last = i - 1
-        return (true, g)
+        return MatchGroup(match: true,  group: g)
      inc i

    # case 4: no match and match
    g.last = parentGroup.last
-    return (true, g)
+    return MatchGroup(match: true, group: g)

  # case 5: no match at all
-  result = (false, g)
+  result = MatchGroup(match: false, group: g)

 func isValidMatch(mg: MatchGroup): bool {.inline.} =
  result = mg.match and mg.group.first == mg.group.last
--- a/stateless/readme.md
+++ b/stateless/readme.md
@ -31,8 +31,9 @@ Every time you request a node using a hash key, you'll get one of the 3 types of
 ### Deviation from yellow paper

 * In the Yellow Paper, the `hash to next node` may be replaced by the next node directly if the RLP encoded node bytes count
-  less than 32. But in a real Ethereum State trie, this never happened. An empty RLP encoded `Account` will have length of 70.
-  Combined with the Hex Prefix encoding of nibbles, it will be more than 70 bytes.
+  less than 32. But in a real Ethereum State trie, this never happened for account trie. An empty RLP encoded `Account` will have length of 70.
+  Combined with the Hex Prefix encoding of nibbles, it will be more than 70 bytes. Short Rlp node only exist in storage trie
+  with depth >= 9.
 * In Yellow Paper, the 17th elem of the `Branch Node` can contains a value. But it always empty in a real Ethereum State trie.
  The block witness spec also ignore this 17th elem when encoding or decoding `Branch Node`.
  This can happen because in Ethereum `Secure Hexary Trie`, every keys have uniform length of 32 bytes or 64 nibbles.
--- a/stateless/test_block_witness.nim
+++ b/stateless/test_block_witness.nim
@ -69,7 +69,7 @@ proc setupStateDB(tester: var Tester, wantedState: JsonNode, stateDB: var Accoun

    let sKeys = if storageKeys.len != 0: newMultiKeys(storageKeys) else: MultikeysRef(nil)
    let codeTouched = code.len > 0
-    keys.add((account, codeTouched, sKeys))
+    keys.add(AccountKey(address: account, codeTouched: codeTouched, storageKeys: sKeys))

  tester.keys = newMultiKeys(keys)
  stateDB.persist()
--- a/stateless/test_witness_keys.nim
+++ b/stateless/test_witness_keys.nim
@ -75,18 +75,18 @@ proc runTest(numPairs: int, testStatusIMPL: var TestStatus,

  for i in 0..<numPairs:
    let acc  = randAccount(memDB)
-    addrs[i] = (randAddress(), acc.codeTouched, acc.storageKeys)
+    addrs[i] = AccountKey(address: randAddress(), codeTouched: acc.codeTouched, storageKeys: acc.storageKeys)
    accs[i]  = acc.account
    trie.put(addrs[i].address, rlp.encode(accs[i]))

  when addInvalidKeys:
    # invalidAddress should not end up in block witness
    let invalidAddress = randAddress()
-    addrs.add((invalidAddress, false, MultikeysRef(nil)))
+    addrs.add(AccountKey(address: invalidAddress))

  if addIdenticalKeys:
    let invalidAddress = addrs[0].address
-    addrs.add((invalidAddress, false, MultikeysRef(nil)))
+    addrs.add(AccountKey(address: invalidAddress))

  var mkeys = newMultiKeys(addrs)
  let rootHash = trie.rootHash
@ -122,13 +122,20 @@ proc runTest(numPairs: int, testStatusIMPL: var TestStatus,
    for kd in mkeys.keys:
      check kd.visited == true

-proc initMultiKeys(keys: openArray[string]): MultikeysRef =
+proc initMultiKeys(keys: openArray[string], storageMode: bool = false): MultikeysRef =
  result.new
-  for x in keys:
-    result.keys.add KeyData(
-      storageMode: false,
-      hash: hexToByteArray[32](x)
-    )
+  if storageMode:
+    for i, x in keys:
+      result.keys.add KeyData(
+        storageMode: true,
+        hash: hexToByteArray[32](x)
+      )
+  else:
+    for x in keys:
+      result.keys.add KeyData(
+        storageMode: false,
+        hash: hexToByteArray[32](x)
+      )

 proc parseInvalidInput(payload: openArray[byte]): bool =
  var db = newMemoryDB()
@ -246,5 +253,34 @@ proc witnessKeysMain*() =
        let z = readFile(x)
        check parseInvalidInput(z.toOpenArrayByte(0, z.len-1))

+    test "short rlp test":
+      let keys = [
+        "01234567abce7762869be690036144c12c256bdb06ee9073ad5ecca18a47c325",
+        "01234567b491732f964182ce4bde5e2468318692ed446e008f621b26f8ff5660",
+        "01234567c140158288775c8912aed274fb9d6a3a260e9e95e03e70ba8df30f6b",
+      ]
+      let m  = initMultiKeys(keys, true)
+      var memDB = newMemoryDB()
+      var trie = initSecureHexaryTrie(memDB)
+      var acc  = randAccount(memDB)
+
+      var tt = initHexaryTrie(memDB)
+      for x in m.keys:
+        tt.put(x.hash, rlp.encode(1.u256))
+      acc.account.storageRoot = tt.rootHash
+
+      let addrs = @[AccountKey(address: randAddress(), codeTouched: acc.codeTouched, storageKeys: m)]
+
+      trie.put(addrs[0].address, rlp.encode(acc.account))
+      var mkeys = newMultiKeys(addrs)
+      let rootHash = trie.rootHash
+
+      var wb = initWitnessBuilder(memDB, rootHash, {wfEIP170})
+      var witness = wb.buildWitness(mkeys)
+      var db = newMemoryDB()
+      var tb = initTreeBuilder(witness, db, {wfEIP170})
+      let root = tb.buildTree()
+      check root.data == rootHash.data
+
 when isMainModule:
  witnessKeysMain()
--- a/stateless/tree_from_witness.nim
+++ b/stateless/tree_from_witness.nim
@ -29,6 +29,8 @@ type

 # this TreeBuilder support short node parsing
 # but a block witness should not contains short node
+# for account trie. Short rlp node only appears in
+# storage trie with depth >= 9

 # the InputStream still unstable
 # when using large dataset for testing
@ -147,6 +149,11 @@ proc toKeccak(r: var NodeKey, x: openArray[byte]) {.inline.} =
  r.data[0..31] = x[0..31]
  r.usedBytes = 32

+proc toKeccak(r: var NodeKey, z: byte, x: openArray[byte]) {.inline.} =
+  r.data[0] = z
+  r.data[1..31] = x[0..30]
+  r.usedBytes = 32
+
 proc append(r: var RlpWriter, n: NodeKey) =
  if n.usedBytes < 32:
    r.append rlpFromBytes(n.data.toOpenArray(0, n.usedBytes-1))
@ -162,6 +169,12 @@ proc toNodeKey(t: var TreeBuilder, z: openArray[byte]): NodeKey =
    result.usedBytes = 32
    t.db.put(result.data, z)

+proc toNodeKey(z: openArray[byte]): NodeKey =
+  if z.len >= 32:
+    raise newException(ParsingError, "Failed when try to convert short rlp to NodeKey")
+  result.usedBytes = z.len
+  result.data[0..z.len-1] = z[0..z.len-1]
+
 proc forceSmallNodeKeyToHash(t: var TreeBuilder, r: NodeKey): NodeKey =
  let hash = keccak(r.data.toOpenArray(0, r.usedBytes-1))
  t.db.put(hash.data, r.data.toOpenArray(0, r.usedBytes-1))
@ -176,7 +189,7 @@ proc branchNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey
 proc extensionNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey
 proc accountNode(t: var TreeBuilder, depth: int): NodeKey
 proc accountStorageLeafNode(t: var TreeBuilder, depth: int): NodeKey
-proc hashNode(t: var TreeBuilder): NodeKey
+proc hashNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey
 proc treeNode(t: var TreeBuilder, depth: int = 0, storageMode = false): NodeKey

 proc buildTree*(t: var TreeBuilder): KeccakHash
@ -218,7 +231,7 @@ proc buildForest*(t: var TreeBuilder): seq[KeccakHash]
    result.add KeccakHash(data: res.data)

 proc treeNode(t: var TreeBuilder, depth: int, storageMode = false): NodeKey =
-  if depth >= 64:
+  if depth > 64:
    raise newException(ParsingError, "invalid trie structure")

  let nodeType = safeReadEnum(t, TrieNodeType)
@ -231,7 +244,7 @@ proc treeNode(t: var TreeBuilder, depth: int, storageMode = false): NodeKey =
      result = t.accountStorageLeafNode(depth)
    else:
      result = t.accountNode(depth)
-  of HashNodeType: result = t.hashNode()
+  of HashNodeType: result = t.hashNode(depth, storageMode)

  if depth == 0 and result.usedBytes < 32:
    result = t.forceSmallNodeKeyToHash(result)
@ -335,7 +348,7 @@ proc extensionNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey =
  let nodeType = safeReadEnum(t, TrieNodeType)
  case nodeType
  of BranchNodeType: r.append t.branchNode(depth + nibblesLen, storageMode)
-  of HashNodeType: r.append t.hashNode()
+  of HashNodeType: r.append t.hashNode(depth, storageMode)
  else: raise newException(ParsingError, "wrong type during parsing child of extension node")

  result = t.toNodeKey(r.finish)
@ -361,13 +374,13 @@ proc readCodeLen(t: var TreeBuilder): int =
  t.keys[^1].codeLen = codeLen.int
  result = codeLen.int

-proc readHashNode(t: var TreeBuilder): NodeKey =
+proc readHashNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey =
  let nodeType = safeReadEnum(t, TrieNodeType)
  if nodeType != HashNodeType:
    raise newException(ParsingError, "hash node expected but got " & $nodeType)
-  result = t.hashNode()
+  result = t.hashNode(depth, storageMode)

-proc readByteCode(t: var TreeBuilder, acc: var Account) =
+proc readByteCode(t: var TreeBuilder, acc: var Account, depth: int) =
  let bytecodeType = safeReadEnum(t, BytecodeType)
  case bytecodeType
  of CodeTouched:
@ -380,7 +393,7 @@ proc readByteCode(t: var TreeBuilder, acc: var Account) =
    # we could discard it here
    discard t.readCodeLen()

-    let codeHash = t.readHashNode()
+    let codeHash = t.readHashNode(depth, false)
    doAssert(codeHash.usedBytes == 32)
    acc.codeHash.data = codeHash.data

@ -413,7 +426,7 @@ proc accountNode(t: var TreeBuilder, depth: int): NodeKey =
    acc.codeHash = blankStringHash
    acc.storageRoot = emptyRlpHash
  of ExtendedAccountType:
-    t.readByteCode(acc)
+    t.readByteCode(acc, depth)

    # switch to account storage parsing mode
    # and reset the depth
@ -478,6 +491,20 @@ proc accountStorageLeafNode(t: var TreeBuilder, depth: int): NodeKey =
  when defined(debugHash):
    doAssert(result == nodeKey, "account storage leaf node parsing error")

-proc hashNode(t: var TreeBuilder): NodeKey =
-  safeReadBytes(t, 32):
-    result.toKeccak(t.read(32))
+proc hashNode(t: var TreeBuilder, depth: int, storageMode: bool): NodeKey =
+  if storageMode and depth >= 9:
+    let z = t.safeReadByte()
+    if z == ShortRlpPrefix:
+      let rlpLen = t.safeReadByte().int
+      if rlpLen == 0:
+        safeReadBytes(t, 31):
+          result.toKeccak(0, t.read(31))
+      else:
+        safeReadBytes(t, rlpLen):
+          result = toNodeKey(t.read(rlpLen))
+    else:
+      safeReadBytes(t, 31):
+        result.toKeccak(z, t.read(31))
+  else:
+    safeReadBytes(t, 32):
+      result.toKeccak(t.read(32))
--- a/stateless/witness_from_tree.nim
+++ b/stateless/witness_from_tree.nim
@ -136,17 +136,26 @@ proc writeBranchNode(wb: var WitnessBuilder, mask: uint, depth: int, node: openA
  when defined(debugHash):
    wb.write(keccak(node).data)

-proc writeHashNode(wb: var WitnessBuilder, node: openArray[byte]) =
+proc writeHashNode(wb: var WitnessBuilder, node: openArray[byte], depth: int, storageMode: bool) =
  # usually a hash node means the recursion will not go deeper
  # and the information can be represented by the hash
  # for chunked witness, a hash node can be a root to another
  # sub-trie in one of the chunks
  wb.writeByte(HashNodeType)
+  if depth >= 9 and storageMode and node[0] == 0.byte:
+    wb.writeByte(ShortRlpPrefix)
+  wb.write(node)
+
+proc writeShortRlp(wb: var WitnessBuilder, node: openArray[byte], depth: int, storageMode: bool) =
+  doAssert(node.len < 32 and depth >= 9 and storageMode)
+  wb.writeByte(HashNodeType)
+  wb.writeByte(ShortRlpPrefix)
+  wb.writeByte(node.len)
  wb.write(node)

 proc getBranchRecurse(wb: var WitnessBuilder, z: var StackElem) {.raises: [ContractCodeError, IOError, Defect, CatchableError, Exception].}

-proc writeByteCode(wb: var WitnessBuilder, kd: KeyData, acc: Account) =
+proc writeByteCode(wb: var WitnessBuilder, kd: KeyData, acc: Account, depth: int) =
  if not kd.codeTouched:
    # the account have code but not touched by the EVM
    # in current block execution
@ -155,7 +164,7 @@ proc writeByteCode(wb: var WitnessBuilder, kd: KeyData, acc: Account) =
    if wfEIP170 in wb.flags and code.len > EIP170_CODE_SIZE_LIMIT:
      raise newException(ContractCodeError, "code len exceed EIP170 code size limit")
    wb.writeUVarint32(code.len)
-    wb.writeHashNode(acc.codeHash.data)
+    wb.writeHashNode(acc.codeHash.data, depth, false)
    # no need to write 'code' here
    return

@ -172,10 +181,10 @@ proc writeByteCode(wb: var WitnessBuilder, kd: KeyData, acc: Account) =
  wb.writeUVarint32(code.len)
  wb.write(code)

-proc writeStorage(wb: var WitnessBuilder, kd: KeyData, acc: Account) =
+proc writeStorage(wb: var WitnessBuilder, kd: KeyData, acc: Account, depth: int) =
  if kd.storageKeys.isNil:
    # the account have storage but not touched by EVM
-    wb.writeHashNode(acc.storageRoot.data)
+    wb.writeHashNode(acc.storageRoot.data, depth, true)
  elif acc.storageRoot != emptyRlpHash:
    # the account have storage and the EVM use it
    var zz = StackElem(
@ -188,7 +197,7 @@ proc writeStorage(wb: var WitnessBuilder, kd: KeyData, acc: Account) =
    getBranchRecurse(wb, zz)
  else:
    # no storage at all
-    wb.writeHashNode(emptyRlpHash.data)
+    wb.writeHashNode(emptyRlpHash.data, depth, true)

 proc writeAccountNode(wb: var WitnessBuilder, kd: KeyData, acc: Account,
  node: openArray[byte], depth: int) {.raises: [ContractCodeError, IOError, Defect, CatchableError, Exception].} =
@ -212,8 +221,8 @@ proc writeAccountNode(wb: var WitnessBuilder, kd: KeyData, acc: Account,
  wb.writeUVarint(acc.nonce)

  if accountType != SimpleAccountType:
-    wb.writeByteCode(kd, acc)
-    wb.writeStorage(kd, acc)
+    wb.writeByteCode(kd, acc, depth)
+    wb.writeStorage(kd, acc, depth)

  #0x00 address:<Address> balance:<Bytes32> nonce:<Bytes32>
  #0x01 address:<Address> balance:<Bytes32> nonce:<Bytes32> bytecode:<Bytecode> storage:<Tree_Node(0,1)>
@ -235,6 +244,10 @@ proc writeAccountStorageLeafNode(wb: var WitnessBuilder, key: openArray[byte], v

 proc getBranchRecurse(wb: var WitnessBuilder, z: var StackElem) =
  if z.node.len == 0: return
+  if z.node.len < 32:
+    writeShortRlp(wb, z.node, z.depth, z.storageMode)
+    return
+
  var nodeRlp = rlpFromBytes z.node

  case nodeRlp.listLen
@ -244,7 +257,7 @@ proc getBranchRecurse(wb: var WitnessBuilder, z: var StackElem) =

    if not mg.match:
      # return immediately if there is no match
-      writeHashNode(wb, keccak(z.node).data)
+      writeHashNode(wb, keccak(z.node).data, z.depth, z.storageMode)
      return

    let value = nodeRlp.listElem(1)
@ -295,14 +308,10 @@ proc getBranchRecurse(wb: var WitnessBuilder, z: var StackElem) =
        continue

      if branch.isList:
-        # short node appear in yellow paper
-        # but never in the actual ethereum state trie
-        # an rlp encoded ethereum account will have length > 32 bytes
-        # block witness spec silent about this
-        doAssert(false, "Short node should not exist in block witness")
+        writeShortRlp(wb, branch.rawData, z.depth, z.storageMode)
      else:
        # if branch elem not empty and not a match, emit hash
-        writeHashNode(wb, branch.expectHash)
+        writeHashNode(wb, branch.expectHash, z.depth, z.storageMode)

    # 17th elem should always empty
    # 17th elem appear in yellow paper but never in
--- a/stateless/witness_types.nim
+++ b/stateless/witness_types.nim
@ -33,6 +33,7 @@ type
 const
  StorageLeafNodeType* = AccountNodeType
  BlockWitnessVersion* = 0x01
+  ShortRlpPrefix*      = 0.byte

 proc setBranchMaskBit*(x: var uint, i: int) {.inline.} =
  assert(i >= 0 and i < 17)