From cd784581233ba2097127baa1c3de1f77b3de77d6 Mon Sep 17 00:00:00 2001 From: Jordan Hrycaj Date: Tue, 30 May 2023 12:47:47 +0100 Subject: [PATCH] Add items to `Aristo Trie` database (#1586) details: 1. Merging a leaf vertex merges a `Patricia Trie` path (while adding/modiying vertices) and adds a leaf node with payload 2. Merging a Merkel node merges a single vertex to the `Patricia Trie` and registers merkel hashes 3. Action 2 can be used before action 1 in order to construct a Merkel proof as required for handling `snap/1` data. 4. Unit tests show that action 3 is benign for now :) --- nimbus/db/aristo/aristo_debug.nim | 157 ++++++++++- nimbus/db/aristo/aristo_desc.nim | 18 ++ nimbus/db/aristo/aristo_error.nim | 32 ++- nimbus/db/aristo/aristo_get.nim | 30 ++- nimbus/db/aristo/aristo_hike.nim | 134 ++++++++++ nimbus/db/aristo/aristo_merge.nim | 430 ++++++++++++++++++++++++++++++ nimbus/db/aristo/aristo_path.nim | 70 +++++ tests/test_aristo.nim | 12 +- tests/test_aristo/test_merge.nim | 169 ++++++++++++ 9 files changed, 1039 insertions(+), 13 deletions(-) create mode 100644 nimbus/db/aristo/aristo_hike.nim create mode 100644 nimbus/db/aristo/aristo_merge.nim create mode 100644 nimbus/db/aristo/aristo_path.nim create mode 100644 tests/test_aristo/test_merge.nim diff --git a/nimbus/db/aristo/aristo_debug.nim b/nimbus/db/aristo/aristo_debug.nim index c53c23ceb..b823fb6c9 100644 --- a/nimbus/db/aristo/aristo_debug.nim +++ b/nimbus/db/aristo/aristo_debug.nim @@ -11,16 +11,19 @@ {.push raises: [].} import - std/[sequtils, strutils], + std/[algorithm, sets, sequtils, strutils, tables], eth/[common, trie/nibbles], stew/byteutils, ../../sync/snap/range_desc, - "."/[aristo_constants, aristo_desc, aristo_vid] + "."/[aristo_constants, aristo_desc, aristo_error, aristo_hike, aristo_vid] # ------------------------------------------------------------------------------ # Ptivate functions # ------------------------------------------------------------------------------ +proc toPfx(indent: int): string = + "\n" & " ".repeat(indent) + proc keyVidUpdate(db: AristoDbRef, key: NodeKey, vid: VertexID): string = if not key.isZero and not vid.isZero and @@ -57,7 +60,7 @@ proc stripZeros(a: string): string = return a proc ppVid(vid: VertexID): string = - if vid.isZero: "ø" else: "$" & vid.uint64.toHex.stripZeros + if vid.isZero: "ø" else: "$" & vid.uint64.toHex.stripZeros.toLowerAscii proc ppKey(key: NodeKey, db = AristoDbRef(nil)): string = if key.isZero: @@ -69,11 +72,13 @@ proc ppKey(key: NodeKey, db = AristoDbRef(nil)): string = if not db.isNil: db.pAmk.withValue(key, pRef): - return "£" & $pRef[] + return "£" & pRef[].uint64.toHex.stripZeros.toLowerAscii db.xMap.withValue(key, xRef): - return "£" & $xRef[] + return "£" & xRef[].uint64.toHex.stripZeros.toLowerAscii - "%" & ($key).squeeze(hex=true,ignLen=true) + "%" & key.ByteArray32 + .mapIt(it.toHex(2)).join.tolowerAscii + .squeeze(hex=true,ignLen=true) proc ppRootKey(a: NodeKey, db = AristoDbRef(nil)): string = if a != EMPTY_ROOT_KEY: @@ -83,6 +88,22 @@ proc ppCodeKey(a: NodeKey, db = AristoDbRef(nil)): string = if a != EMPTY_CODE_KEY: return a.ppKey(db) +proc ppPathTag(tag: NodeTag, db = AristoDbRef(nil)): string = + ## Raw key, for referenced key dump use `key.pp(db)` below + if not db.isNil: + db.lTab.withValue(tag, keyPtr): + return "@" & keyPtr[].ppVid + + "@" & tag.to(NodeKey).ByteArray32 + .mapIt(it.toHex(2)).join.toLowerAscii + .squeeze(hex=true,ignLen=true) + +proc ppPathPfx(pfx: NibblesSeq): string = + ($(pfx & EmptyNibbleSeq)).squeeze(hex=true) + +proc ppNibble(n: int8): string = + if n < 0: "ø" elif n < 10: $n else: n.toHex(1).toLowerAscii + # ------------------------------------------------------------------------------ # Public functions # ------------------------------------------------------------------------------ @@ -100,6 +121,15 @@ proc keyToVtxID*(db: AristoDbRef, key: NodeKey): VertexID = result = db.vidFetch() db.xMap[key] = result +proc pp*(vid: NodeKey): string = + vid.ppKey + +proc pp*(tag: NodeTag, db = AristoDbRef(nil)): string = + tag.ppPathTag(db) + +proc pp*(vid: VertexID): string = + vid.ppVid + proc pp*(vid: openArray[VertexID]): string = "[" & vid.mapIt(it.ppVid).join(",") & "]" @@ -124,9 +154,9 @@ proc pp*(nd: VertexRef, db = AristoDbRef(nil)): string = result = ["l(", "x(", "b("][nd.vType.ord] case nd.vType: of Leaf: - result &= $nd.lPfx & "," & nd.lData.pp(db) + result &= nd.lPfx.ppPathPfx & "," & nd.lData.pp(db) of Extension: - result &= $nd.ePfx & "," & nd.eVid.ppVid + result &= nd.ePfx.ppPathPfx & "," & nd.eVid.ppVid of Branch: result &= "[" for n in 0..15: @@ -145,10 +175,10 @@ proc pp*(nd: NodeRef, db = AristoDbRef(nil)): string = result = ["L(", "X(", "B("][nd.vType.ord] case nd.vType: of Leaf: - result &= $nd.lPfx & "," & nd.lData.pp(db) + result &= $nd.lPfx.ppPathPfx & "," & nd.lData.pp(db) of Extension: - result &= $nd.ePfx & "," & nd.eVid.ppVid & "," & nd.key[0].ppKey + result &= $nd.ePfx.ppPathPfx & "," & nd.eVid.ppVid & "," & nd.key[0].ppKey of Branch: result &= "[" @@ -166,6 +196,113 @@ proc pp*(nd: NodeRef, db = AristoDbRef(nil)): string = result[^1] = ']' result &= ")" +proc pp*( + sTab: var Table[VertexID,VertexRef]; + db = AristoDbRef(nil); + indent = 4; + ): string = + let pfx = indent.toPfx + var first = true + result = "{" + for vid in toSeq(sTab.keys).mapIt(it.uint64).sorted.mapIt(it.VertexID): + sTab.withValue(vid, vtxPtr): + if first: + first = false + else: + result &= pfx & " " + result &= "(" & vid.ppVid & "," & vtxPtr[].pp(db) & ")," + if result[^1] == ',': + result[^1] = '}' + else: + result &= "}" + +proc pp*(lTab: var Table[NodeTag,VertexID]; indent = 4): string = + let pfx = indent.toPfx + var first = true + result = "{" + for tag in toSeq(lTab.keys).mapIt(it.UInt256).sorted.mapIt(it.NodeTag): + lTab.withValue(tag,vidPtr): + if first: + first = false + else: + result &= pfx & " " + result &= "(" & tag.ppPathTag & "," & vidPtr[].ppVid & ")," + if result[^1] == ',': + result[^1] = '}' + else: + result &= "}" + +proc pp*(sDel: HashSet[VertexID]): string = + result = "{" + for vid in toSeq(sDel.items).mapIt(it.uint64).sorted.mapIt(it.VertexID): + result &= vid.ppVid & "," + if result[^1] == ',': + result[^1] = '}' + else: + result &= "}" + +proc pp*( + hike: Hike; + db = AristoDbRef(nil); + indent = 4; + ): string = + let pfx = indent.toPfx + var first = true + result = "[(" & hike.root.ppVid & ")" + for leg in hike.legs: + result &= "," & pfx & " (" & leg.wp.vid.ppVid + if not db.isNil: + var key = "ø" + db.kMap.withValue(leg.wp.vid, keyPtr): + key = keyPtr[].ppKey(db) + result &= "," & key + result &= "," & $leg.nibble.ppNibble & "," & leg.wp.vtx.pp(db) & ")" + result &= "," & pfx & " (" & $hike.tail & ")" + if hike.error != AristoError(0): + result &= "," & pfx & " (" & $hike.error & ")" + result &= "]" + +proc pp*( + kMap: var Table[VertexID,NodeKey]; + db = AristoDbRef(nil); + indent = 4; + ): string = + let pfx = indent.toPfx + var first = true + result = "{" + for vid in toSeq(kMap.keys).mapIt(it.uint64).sorted.mapIt(it.VertexID): + kMap.withValue(vid, keyPtr): + if first: + first = false + else: + result &= pfx & " " + result &= "(" & vid.ppVid & "," & keyPtr[].ppKey(db) & ")," + if result[^1] == ',': + result[^1] = '}' + else: + result &= "}" + +proc pp*( + pAmk: var Table[NodeKey,VertexID]; + db = AristoDbRef(nil); + indent = 4; + ): string = + let pfx = indent.toPfx + var first = true + result = "{" + for key in toSeq(pAmk.keys).mapIt(it.to(NodeTag).UInt256) + .sorted.mapIt(it.NodeTag.to(NodeKey)): + pAmk.withValue(key,vidPtr): + if first: + first = false + else: + result &= pfx & " " + result &= "(" & key.ppKey(db) & "," & vidPtr[].ppVid & ")," + if result[^1] == ',': + result[^1] = '}' + else: + result &= "}" + # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_desc.nim b/nimbus/db/aristo/aristo_desc.nim index fa678592b..5ead39748 100644 --- a/nimbus/db/aristo/aristo_desc.nim +++ b/nimbus/db/aristo/aristo_desc.nim @@ -110,6 +110,7 @@ type AristoDbObj = object ## Hexary trie plus helper structures sTab*: Table[VertexID,VertexRef] ## Structural vertex table making up a trie + lTab*: Table[NodeTag,VertexID] ## Direct access, path to leaf node sDel*: HashSet[VertexID] ## Deleted vertices kMap*: Table[VertexID,NodeKey] ## Merkle hash key mapping pAmk*: Table[NodeKey,VertexID] ## Reverse mapper for data import @@ -217,6 +218,23 @@ proc convertTo*(payload: PayloadRef; T: type Blob): T = of AccountData: result = rlp.encode payload.account +proc to*(node: NodeRef; T: type VertexRef): T = + ## Extract a copy of the `VertexRef` part from a `NodeRef`. For a leaf + ## type, the `lData` payload reference will be a shallow copy, i.e. only + ## the reference pointer is copied. + case node.vType: + of Leaf: + T(vType: Leaf, + lPfx: node.lPfx, + lData: node.lData) + of Extension: + T(vType: Extension, + ePfx: node.ePfx, + eVid: node.eVid) + of Branch: + T(vType: Branch, + bVid: node.bVid) + # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_error.nim b/nimbus/db/aristo/aristo_error.nim index 8a36c7334..831ae0267 100644 --- a/nimbus/db/aristo/aristo_error.nim +++ b/nimbus/db/aristo/aristo_error.nim @@ -46,5 +46,35 @@ type # Converter `asNode()` CacheMissingNodekeys -# End + # Get function `getVtxCascaded()` + GetVtxNotFound + GetTagNotFound + # Path function hikeUp()` + PathRootMissing + PathLeafTooEarly + PathBranchTailEmpty + PathBranchBlindEdge + PathExtTailEmpty + PathExtTailMismatch + + # Memory backend + MemBeVtxNotFound + MemBeKeyNotFound + + # Path/nibble/key conversions in `aisto_path.nim` + PathExpected64Nibbles + PathExpectedLeaf + + # Merge leaf `merge()` + MergeBrLinkLeafGarbled + MergeBrLinkVtxPfxTooShort + MergeBranchGarbledNibble + MergeBranchGarbledTail + MergeBranchRootExpected + MergeLeafGarbledHike + MergeRootBranchLinkBusy + + MergeNodeKeyZero + +# End diff --git a/nimbus/db/aristo/aristo_get.nim b/nimbus/db/aristo/aristo_get.nim index 0e7dc2a4a..8644409cf 100644 --- a/nimbus/db/aristo/aristo_get.nim +++ b/nimbus/db/aristo/aristo_get.nim @@ -16,8 +16,14 @@ import std/tables, stew/results, + ../../sync/snap/range_desc, "."/[aristo_desc, aristo_error] +type + VidVtxPair* = object + vid*: VertexID ## Table lookup vertex ID (if any) + vtx*: VertexRef ## Reference to vertex + # ------------------------------------------------------------------------------ # Public functions # ------------------------------------------------------------------------------ @@ -43,13 +49,35 @@ proc getVtxCascaded*( err(GetVtxNotFound) +proc getVtxCascaded*( + db: AristoDbRef; + tag: NodeTag; + ): Result[VidVtxPair,AristoError] = + ## Cascaded lookup for data record down the transaction cascade using + ## the Patricia path. + db.lTab.withValue(tag, vidPtr): + db.sTab.withValue(vidPtr[], vtxPtr): + return ok VidVtxPair(vid: vidPtr[], vtx: vtxPtr[]) + return err(GetTagNotFound) + + # Down the rabbit hole of transaction layers + var lDb = db + while lDb.cascaded: + lDb = lDb.stack + lDb.lTab.withValue(tag, vidPtr): + lDb.sTab.withValue(vidPtr[], vtxPtr): + return ok VidVtxPair(vid: vidPtr[], vtx: vtxPtr[]) + return err(GetTagNotFound) + + err(GetTagNotFound) + proc getVtx*(db: AristoDbRef; vid: VertexID): VertexRef = ## Variant of `getVtxCascaded()` with returning `nil` on error ignoring the ## error type information. let rc = db.getVtxCascaded vid if rc.isOk: return rc.value - + # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_hike.nim b/nimbus/db/aristo/aristo_hike.nim new file mode 100644 index 000000000..4d96aa6c3 --- /dev/null +++ b/nimbus/db/aristo/aristo_hike.nim @@ -0,0 +1,134 @@ +# nimbus-eth1 +# Copyright (c) 2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises: [].} + +import + eth/[common, trie/nibbles], + ../../sync/snap/range_desc, + "."/[aristo_constants, aristo_desc, aristo_error, aristo_get, aristo_path] + +type + Leg* = object + ## For constructing a `VertexPath` + wp*: VidVtxPair ## Vertex ID and data ref + nibble*: int8 ## Next vertex selector for `Branch` (if any) + + Hike* = object + ## Trie traversal path + root*: VertexID ## Handy for some fringe cases + legs*: seq[Leg] ## Chain of vertices and IDs + tail*: NibblesSeq ## Portion of non completed path + error*: AristoError ## Info for whoever wants it to see + +# ------------------------------------------------------------------------------ +# Private functions +# ------------------------------------------------------------------------------ + +func getNibblesImpl(hike: Hike; start = 0; maxLen = high(int)): NibblesSeq = + ## May be needed for partial rebuild, as well + for n in start ..< min(hike.legs.len, maxLen): + let leg = hike.legs[n] + case leg.wp.vtx.vType: + of Branch: + result = result & @[leg.nibble.byte].initNibbleRange.slice(1) + of Extension: + result = result & leg.wp.vtx.ePfx + of Leaf: + result = result & leg.wp.vtx.lPfx + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +func to*(hike: Hike; T: type NibblesSeq): T = + ## Convert back + hike.getNibblesImpl() & hike.tail + +func legsTo*(hike: Hike; T: type NibblesSeq): T = + ## Convert back + hike.getNibblesImpl() + +# -------- + +proc hikeUp*( + path: NibblesSeq; # Partial path + root: VertexID; # Start vertex + db: AristoDbRef; # Database + ): Hike = + ## For the argument `path`, find and return the logest possible path in the + ## argument database `db`. + result = Hike( + root: root, + tail: path) + + if root.isZero: + result.error = PathRootMissing + + else: + var vid = root + while not vid.isZero: + var vtx = db.getVtx vid + if vtx.isNil: + break + + var leg = Leg(wp: VidVtxPair(vid: vid, vtx: vtx), nibble: -1) + + case vtx.vType: + of Leaf: + if result.tail.len == result.tail.sharedPrefixLen(vtx.lPfx): + # Bingo, got full path + result.legs.add leg + result.tail = EmptyNibbleSeq + else: + result.error = PathLeafTooEarly # Ooops + break # Buck stops here + + of Branch: + if result.tail.len == 0: + result.legs.add leg + result.error = PathBranchTailEmpty # Ooops + break + + let + nibble = result.tail[0].int8 + nextVid = vtx.bVid[nibble] + + if nextVid.isZero: + result.error = PathBranchBlindEdge # Ooops + break + + leg.nibble = nibble + result.legs.add leg + result.tail = result.tail.slice(1) + vid = nextVid + + of Extension: + if result.tail.len == 0: + result.legs.add leg + result.tail = EmptyNibbleSeq + result.error = PathExtTailEmpty # Well, somehow odd + break + + if vtx.ePfx.len != result.tail.sharedPrefixLen(vtx.ePfx): + result.error = PathExtTailMismatch # Need to branch from here + break + + result.legs.add leg + result.tail = result.tail.slice(vtx.ePfx.len) + vid = vtx.eVid + +proc hikeUp*(keyOrTag: NodeKey|NodeTag; root: VertexID; db: AristoDbRef): Hike = + ## Variant of `hike()` + keyOrTag.pathAsNibbles.hikeUp(root, db) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_merge.nim b/nimbus/db/aristo/aristo_merge.nim new file mode 100644 index 000000000..b0e3d0d06 --- /dev/null +++ b/nimbus/db/aristo/aristo_merge.nim @@ -0,0 +1,430 @@ +# nimbus-eth1 +# Copyright (c) 2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises: [].} + +import + std/tables, + chronicles, + eth/[common, trie/nibbles], + stew/results, + ../../sync/snap/range_desc, + ./aristo_debug, + "."/[aristo_desc, aristo_error, aristo_get, aristo_hike, aristo_path, + aristo_vid] + +logScope: + topics = "aristo-leaf" + +# ------------------------------------------------------------------------------ +# Private getters & setters +# ------------------------------------------------------------------------------ + +proc xPfx(vtx: VertexRef): NibblesSeq = + case vtx.vType: + of Leaf: + return vtx.lPfx + of Extension: + return vtx.ePfx + of Branch: + doAssert vtx.vType != Branch # Ooops + +proc `xPfx=`(vtx: VertexRef, val: NibblesSeq) = + case vtx.vType: + of Leaf: + vtx.lPfx = val + of Extension: + vtx.ePfx = val + of Branch: + doAssert vtx.vType != Branch # Ooops + +# ----------- + +proc insertBranch( + db: AristoDbRef; # Database, top layer + hike: Hike; + linkID: VertexID; + linkVtx: VertexRef; + payload: PayloadRef; # Leaf data payload + ): Hike = + ## + ## Insert `Extension->Branch` vertex chain or just a `Branch` vertex + ## + ## --(linkID)--> + ## + ## will become either + ## + ## --(linkID)--> + ## --(local1)--> + ## [linkInx] --(local2)--> + ## [leafInx] --(local3)--> + ## + ## or in case that there is no common prefix + ## + ## --(linkID)--> + ## [linkInx] --(local2)--> + ## [leafInx] --(local3)--> + ## + let n = linkVtx.xPfx.sharedPrefixLen hike.tail + + # Verify minimum requirements + if hike.tail.len == n: + # Should have been tackeld by `hikeUp()`, already + return Hike(error: MergeLeafGarbledHike) + if linkVtx.xPfx.len == n: + return Hike(error: MergeBrLinkVtxPfxTooShort) + + # Provide and install `forkVtx` + let + forkVtx = VertexRef(vType: Branch) + linkInx = linkVtx.xPfx[n] + leafInx = hike.tail[n] + var + leafLeg = Leg(nibble: -1) + + # Install `forkVtx` + block: + let local = db.vidFetch + + # Update vertex path lookup + if linkVtx.vType == Leaf: + let + path = hike.legsTo(NibblesSeq) & linkVtx.lPfx + rc = path.pathToTag() + if rc.isErr: + error "Branch link leaf path garbled", linkID, path + return Hike(error: MergeBrLinkLeafGarbled) + db.lTab[rc.value] = local # update leaf path lookup cache + + forkVtx.bVid[linkInx] = local + db.sTab[local] = linkVtx + linkVtx.xPfx = linkVtx.xPfx.slice(1+n) + block: + let local = db.vidFetch + forkVtx.bVid[leafInx] = local + leafLeg.wp.vid = local + leafLeg.wp.vtx = VertexRef( + vType: Leaf, + lPfx: hike.tail.slice(1+n), + lData: payload) + db.sTab[local] = leafLeg.wp.vtx + + # Update branch leg, ready to append more legs + result = Hike(root: hike.root, legs: hike.legs) + + # Update in-beween glue linking `branch --[..]--> forkVtx` + if 0 < n: + let extVtx = VertexRef( + vType: Extension, + ePfx: hike.tail.slice(0,n), + eVid: db.vidFetch) + + db.sTab[linkID] = extVtx + + result.legs.add Leg( + nibble: -1, + wp: VidVtxPair( + vid: linkID, + vtx: extVtx)) + + db.sTab[extVtx.eVid] = forkVtx + result.legs.add Leg( + nibble: leafInx.int8, + wp: VidVtxPair( + vid: extVtx.eVid, + vtx: forkVtx)) + else: + db.sTab[linkID] = forkVtx + result.legs.add Leg( + nibble: leafInx.int8, + wp: VidVtxPair( + vid: linkID, + vtx: forkVtx)) + + result.legs.add leafLeg + + +proc appendBranchAndLeaf( + db: AristoDbRef; # Database, top layer + hike: Hike; # Path top has a `Branch` vertex + brID: VertexID; # Branch vertex ID from from `Hike` top + brVtx: VertexRef; # Branch vertex, linked to from `Hike` + payload: PayloadRef; # Leaf data payload + ): Hike = + ## Append argument branch vertex passed as argument `(brID,brVtx)` and then + ## a `Leaf` vertex derived from the argument `payload`. + + if hike.tail.len == 0: + return Hike(error: MergeBranchGarbledTail) + let nibble = hike.tail[0].int8 + if not brVtx.bVid[nibble].isZero: + return Hike(error: MergeRootBranchLinkBusy) + + # Append branch node + result = Hike(root: hike.root, legs: hike.legs) + result.legs.add Leg(wp: VidVtxPair(vtx: brVtx, vid: brID), nibble: nibble) + + # Append leaf node + let + vid = db.vidFetch + vtx = VertexRef( + vType: Leaf, + lPfx: hike.tail.slice(1), + lData: payload) + brVtx.bVid[nibble] = vid + db.sTab[vid] = vtx + result.legs.add Leg(wp: VidVtxPair(vtx: vtx, vid: vid), nibble: -1) + +# ------------------------------------------------------------------------------ +# Private functions +# ------------------------------------------------------------------------------ + +proc hikeTopBranchAppendLeaf( + db: AristoDbRef; # Database, top layer + hike: Hike; # Path top has a `Branch` vertex + payload: PayloadRef; # Leaf data payload + proofMode: bool; # May have dangling links + ): Hike = + ## Append a `Leaf` vertex derived from the argument `payload` after the top + ## leg of the `hike` argument which is assumend to refert to a `Branch` + ## vertex. If successful, the function returns the updated `hike` trail. + if hike.tail.len == 0: + return Hike(error: MergeBranchGarbledTail) + + let nibble = hike.legs[^1].nibble + if nibble < 0: + return Hike(error: MergeBranchGarbledNibble) + + let + branch = hike.legs[^1].wp.vtx + linkID = branch.bVid[nibble] + + # Busy slot, check for dangling link + linkVtx = block: + let rc = db.getVtxCascaded linkID + if rc.isErr and not proofMode: + # Not much else that can be done here + error "Dangling leaf link, reused", branch=hike.legs[^1].wp.vid, + nibble, linkID, leafPfx=hike.tail + if rc.isErr or rc.value.isNil: + # Reuse placeholder entry in table + let vtx = VertexRef( + vType: Leaf, + lPfx: hike.tail, + lData: payload) + db.sTab[linkID] = vtx + result = Hike(root: hike.root, legs: hike.legs) + result.legs.add Leg(wp: VidVtxPair(vid: linkID, vtx: vtx), nibble: -1) + return + rc.value + + # Slot link to a branch vertex should be handled by `hikeUp()` + if linkVtx.vType == Branch: + return db.appendBranchAndLeaf(hike, linkID, linkVtx, payload) + + db.insertBranch(hike, linkID, linkVtx, payload) + + +proc hikeTopExtensionAppendLeaf( + db: AristoDbRef; # Database, top layer + hike: Hike; # Path top has an `Extension` vertex + payload: PayloadRef; # Leaf data payload + ): Hike = + ## Append a `Leaf` vertex derived from the argument `payload` after the top + ## leg of the `hike` argument which is assumend to refert to a `Extension` + ## vertex. If successful, the function returns the + ## updated `hike` trail. + let + parVtx = hike.legs[^1].wp.vtx + parID = hike.legs[^1].wp.vid + brVtx = db.getVtx parVtx.eVid + + result = Hike(root: hike.root, legs: hike.legs) + + if brVtx.isNil: + # Blind vertex, promote to leaf node. + let vtx = VertexRef( + vType: Leaf, + lPfx: parVtx.ePfx & hike.tail, + lData: payload) + db.sTab[parID] = vtx + result.legs[^1].wp.vtx = vtx + + elif brVtx.vType != Branch: + return Hike(error: MergeBranchRootExpected) + + else: + let nibble = hike.tail[0].int8 + if not brVtx.bVid[nibble].isZero: + return Hike(error: MergeRootBranchLinkBusy) + let + vid = db.vidFetch + vtx = VertexRef( + vType: Leaf, + lPfx: hike.tail.slice(1), + lData: payload) + brVtx.bVid[nibble] = vid + db.sTab[vid] = vtx + result.legs[^1].nibble = nibble + result.legs.add Leg(wp: VidVtxPair(vtx: vtx, vid: vid), nibble: -1) + + +proc emptyHikeAppendLeaf( + db: AristoDbRef; # Database, top layer + hike: Hike; # No path legs + rootVtx: VertexRef; # Root vertex + payload: PayloadRef; # Leaf data payload + ): Hike = + ## Append a `Leaf` vertex derived from the argument `payload` after the + ## argument vertex `rootVtx` and append both the empty arguent `hike`. + if rootVtx.vType == Branch: + let nibble = hike.tail[0].int8 + if not rootVtx.bVid[nibble].isZero: + return Hike(error: MergeRootBranchLinkBusy) + let + leafVid = db.vidFetch + leafVtx = VertexRef( + vType: Leaf, + lPfx: hike.tail.slice(1), + lData: payload) + rootVtx.bVid[nibble] = leafVid + db.sTab[leafVid] = leafVtx + return Hike( + root: hike.root, + legs: @[Leg(wp: VidVtxPair(vtx: rootVtx, vid: hike.root), nibble: nibble), + Leg(wp: VidVtxPair(vtx: leafVtx, vid: leafVid), nibble: -1)]) + + db.insertBranch(hike, hike.root, rootVtx, payload) + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc merge*( + db: AristoDbRef; # Database, top layer + pathTag: NodeTag; # `Patricia Trie` path root-to-leaf + payload: PayloadRef; # Leaf data payload + root = VertexID(0); # Root node reference + proofMode = false; # May have dangling links + noisy = false; + ): Hike = + ## Merge the argument `leaf` record into the top level vertex table of the + ## database `db`. The argument `pathKey` is used to index the leaf on the + ## `Patricia Tree`. The argument `payload` is stored with the leaf vertex in + ## the database unless the leaf vertex exists already. + + proc setUpAsRoot(vid: VertexID): Hike = + let + vtx = VertexRef( + vType: Leaf, + lPfx: pathTag.pathAsNibbles, + lData: payload) + wp = VidVtxPair(vid: vid, vtx: vtx) + db.sTab[vid] = vtx + Hike(root: vid, legs: @[Leg(wp: wp, nibble: -1)]) + + if root.isZero: + if noisy: echo ">>> merge (1)" + result = db.vidFetch.setUpAsRoot() # bootstrap: new root ID + + else: + let hike = pathTag.hikeUp(root, db) + if noisy: echo "<<< merge (2) >>>", "\n ", hike.pp(db) + + if 0 < hike.legs.len: + case hike.legs[^1].wp.vtx.vType: + of Branch: + if noisy: echo ">>> merge (3)" + result = db.hikeTopBranchAppendLeaf(hike, payload, proofMode) + of Leaf: + if noisy: echo ">>> merge (4)" + if 0 < hike.tail.len: # `Leaf` vertex problem? + return Hike(error: MergeLeafGarbledHike) + result = hike + of Extension: + if noisy: echo ">>> merge (5)" + result = db.hikeTopExtensionAppendLeaf(hike, payload) + + else: + # Empty hike + let rootVtx = db.getVtx root + + if rootVtx.isNil: + if noisy: echo ">>> merge (6)" + result = root.setUpAsRoot() # bootstrap for existing root ID + else: + if noisy: echo ">>> merge (7)" + result = db.emptyHikeAppendLeaf(hike, rootVtx, payload) + + # Update leaf acccess cache + if result.error == AristoError(0): + db.lTab[pathTag] = result.legs[^1].wp.vid + + +proc merge*( + db: AristoDbRef; # Database, top layer + nodeKey: NodeKey; # Merkel hash of node + node: NodeRef; # Node derived from RLP representation + ): Result[VertexID,AristoError] = + ## Merge a node key expanded from its RLP representation into the database. + ## + ## There is some rudimentaty check whether the `node` is consistent. It is + ## *not* checked, whether the vertex IDs have been allocated, already. If + ## the node comes straight from the `decode()` RLP decoder, these vertex IDs + ## will be all zero. + + proc register(key: NodeKey): VertexID = + db.pAmk.withValue(key,vidPtr): + return vidPtr[] + let vid = db.vidFetch + db.pAmk[key] = vid + db.kMap[vid] = key + vid + + # Check whether the record is correct + if node.error != AristoError(0): + return err(node.error) + + # Verify `nodeKey` + if nodeKey.isZero: + return err(MergeNodeKeyZero) + + # Check whether the node exists, already + db.pAmk.withValue(nodeKey,vidPtr): + if db.sTab.hasKey vidPtr[]: + return ok vidPtr[] + + let + vid = nodeKey.register + vtx = node.to(VertexRef) # the vertex IDs need to be set up now (if any) + + case node.vType: + of Leaf: + discard + of Extension: + if not node.key[0].isZero: + db.pAmk.withValue(node.key[0],vidPtr): + vtx.eVid = vidPtr[] + do: + vtx.eVid = node.key[0].register + of Branch: + for n in 0..15: + if not node.key[n].isZero: + db.pAmk.withValue(node.key[n],vidPtr): + vtx.bVid[n] = vidPtr[] + do: + vtx.bVid[n] = node.key[n].register + + db.sTab[vid] = vtx + ok vid + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_path.nim b/nimbus/db/aristo/aristo_path.nim new file mode 100644 index 000000000..ae0f39a1f --- /dev/null +++ b/nimbus/db/aristo/aristo_path.nim @@ -0,0 +1,70 @@ +# nimbus-eth1 +# Copyright (c) 2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises: [].} + +import + eth/[common, trie/nibbles], + stew/results, + ../../sync/snap/range_desc, + "."/[aristo_constants, aristo_error] + +# Info snippet (just a reminder to keep somewhere) +# +# Extension of a compact encoded as prefixed sequence of nibbles (i.e. +# half bytes with 4 bits.) +# +# pfx | bits | vertex type | layout +# ----+ -----+-------------+---------------------------------------- +# 0 | 0000 | extension | @[, nibble-pair, ..] +# 1 | 0001 | extension | @[, nibble-pair, ..] +# 2 | 0010 | leaf | @[, nibble-pair, ..] +# 3 | 0011 | leaf | @[, nibble-pair, ..] +# +# where the `ignored` part is typically expected a zero nibble. + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc pathAsNibbles*(key: NodeKey): NibblesSeq = + key.ByteArray32.initNibbleRange() + +proc pathAsNibbles*(tag: NodeTag): NibblesSeq = + tag.to(NodeKey).pathAsNibbles() + +proc pathAsBlob*(keyOrTag: NodeKey|Nodetag): Blob = + keyOrTag.pathAsNibbles.hexPrefixEncode(isLeaf=true) + + +proc pathToKey*(partPath: NibblesSeq): Result[NodeKey,AristoError] = + var key: ByteArray32 + if partPath.len == 64: + # Trailing dummy nibbles (aka no nibbles) force a nibble seq reorg + let path = (partPath & EmptyNibbleSeq).getBytes() + (addr key[0]).copyMem(unsafeAddr path[0], 32) + return ok(key.NodeKey) + err(PathExpected64Nibbles) + +proc pathToKey*(partPath: Blob): Result[NodeKey,AristoError] = + let (isLeaf,pathSegment) = partPath.hexPrefixDecode + if isleaf: + return pathSegment.pathToKey() + err(PathExpectedLeaf) + +proc pathToTag*(partPath: NibblesSeq|Blob): Result[NodeTag,AristoError] = + let rc = partPath.pathToKey() + if rc.isOk: + return ok(rc.value.to(NodeTag)) + err(rc.error) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tests/test_aristo.nim b/tests/test_aristo.nim index a89d5efdb..cc95ccff5 100644 --- a/tests/test_aristo.nim +++ b/tests/test_aristo.nim @@ -24,7 +24,7 @@ import hexary_desc, rocky_bulk_load, snapdb_accounts, snapdb_desc], ./replay/[pp, undump_accounts], ./test_sync_snap/[snap_test_xx, test_accounts, test_types], - ./test_aristo/[test_transcode] + ./test_aristo/[test_merge, test_transcode] const baseDir = [".", "..", ".."/"..", $DirSep] @@ -177,6 +177,16 @@ proc trancodeRunner(noisy = true; sample = accSample; stopAfter = high(int)) = suite &"Aristo: transcoding {fileInfo} accounts and proofs for {info}": + # --- Merging --- + + test &"Merge {accLst.len} account lists to database": + noisy.test_mergeAccounts accLst.mapIt(it.data) + + test &"Merge {accLst.len} proof & account lists to database": + noisy.test_mergeProofsAndAccounts accLst + + # --- Transcoding --- + test &"Trancoding VertexID recyling lists (seed={accLst.len})": noisy.test_transcodeVidRecycleLists(accLst.len) diff --git a/tests/test_aristo/test_merge.nim b/tests/test_aristo/test_merge.nim new file mode 100644 index 000000000..9acfbdac8 --- /dev/null +++ b/tests/test_aristo/test_merge.nim @@ -0,0 +1,169 @@ +# Nimbus - Types, data structures and shared utilities used in network sync +# +# Copyright (c) 2018-2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + +## Aristo (aka Patricia) DB records merge test + +import + eth/common, + unittest2, + ../../nimbus/db/kvstore_rocksdb, + ../../nimbus/db/aristo/[ + aristo_desc, aristo_debug, aristo_error, aristo_hike, + aristo_merge, aristo_transcode], + ../../nimbus/sync/snap/range_desc, + ../replay/undump_accounts, + ./test_helpers + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +# ------------------------------------------------------------------------------ +# Public test function +# ------------------------------------------------------------------------------ + +proc test_mergeAccounts*( + noisy: bool; + lst: openArray[PackedAccountRange]; + ) = + for u,par in lst: + let db = AristoDbRef() + var + root = VertexID(0) + count = 0 + + for n,w in par.accounts: + let + sTabState = db.sTab.pp(db) + payload = PayloadRef(pType: BlobData, blob: w.accBlob) + pathTag = w.accKey.to(NodeTag) + hike = db.merge(pathTag, payload, root, proofMode=false) + ekih = pathTag.hikeUp(hike.root, db) + + if hike.error == AristoError(0): + root = hike.root + + count = n + if hike.error != AristoError(0): # or true: + noisy.say "***", "<", n, "> ", pathTag.pp, + "\n hike", + "\n ", hike.pp(db), + "\n sTab (prev)", + "\n ", sTabState, + "\n sTab", + "\n ", db.sTab.pp(db), + "\n lTab", + "\n ", db.lTab.pp, + "\n" + + check hike.error == AristoError(0) + check ekih.error == AristoError(0) + + if ekih.legs.len == 0: + check 0 < ekih.legs.len + elif ekih.legs[^1].wp.vtx.vType != Leaf: + check ekih.legs[^1].wp.vtx.vType == Leaf + else: + check ekih.legs[^1].wp.vtx.lData.blob == w.accBlob + + if db.lTab.len != n + 1: + check db.lTab.len == n + 1 # quick leaf access table + break # makes no sense to go on further + + noisy.say "***", "sample ", u, "/", lst.len ," leafs merged: ", count+1 + + +proc test_mergeProofsAndAccounts*( + noisy: bool; + lst: openArray[UndumpAccounts]; + ) = + for u,par in lst: + let + db = AristoDbRef() + rootKey = par.root.to(NodeKey) + var + rootID: VertexID + count = 0 + + for n,w in par.data.proof: + let + key = w.Blob.digestTo(NodeKey) + node = w.Blob.decode(NodeRef) + rc = db.merge(key, node) + if rc.isErr: + check rc.isOK # provoke message and error + check rc.error == AristoError(0) + continue + + check n + 1 < db.pAmk.len + check n + 1 < db.kMap.len + check db.sTab.len == n + 1 + + # Set up root ID + db.pAmk.withValue(rootKey, vidPtr): + rootID = vidPtr[] + + check not rootID.isZero + + if true and false: + noisy.say "***", count, " proof nodes, root=", rootID.pp, + #"\n pAmk", + #"\n ", db.pAmk.pp(db), + "\n kMap", + "\n ", db.kMap.pp(db), + "\n sTab", + "\n ", db.sTab.pp(db), + "\n" + + for n,w in par.data.accounts: + let + sTabState = db.sTab.pp(db) + payload = PayloadRef(pType: BlobData, blob: w.accBlob) + pathTag = w.accKey.to(NodeTag) + hike = db.merge(pathTag, payload, rootID, proofMode=true) #, noisy=true) + ekih = pathTag.hikeUp(rootID, db) + + count = n + if hike.error != AristoError(0): # or true: + noisy.say "***", "<", n, "> ", pathTag.pp, + "\n hike", + "\n ", hike.pp(db), + "\n sTab (prev)", + "\n ", sTabState, + "\n sTab", + "\n ", db.sTab.pp(db), + "\n lTab", + "\n ", db.lTab.pp, + "\n" + + check hike.error == AristoError(0) + check ekih.error == AristoError(0) + + if ekih.legs.len == 0: + check 0 < ekih.legs.len + elif ekih.legs[^1].wp.vtx.vType != Leaf: + check ekih.legs[^1].wp.vtx.vType == Leaf + else: + check ekih.legs[^1].wp.vtx.lData.blob == w.accBlob + + if db.lTab.len != n + 1: + check db.lTab.len == n + 1 # quick leaf access table + break # makes no sense to go on further + + #if 10 < n: + # break + + noisy.say "***", "sample ", u, "/", lst.len ," leafs merged: ", count+1 + #break + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------