# nimbus-eth1 # Copyright (c) 2021 Status Research & Development GmbH # Licensed under either of # * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or # http://www.apache.org/licenses/LICENSE-2.0) # * MIT license ([LICENSE-MIT](LICENSE-MIT) or # http://opensource.org/licenses/MIT) # at your option. This file may not be copied, modified, or distributed # except according to those terms. ## Aristo DB -- Patricia Trie builder, raw node insertion ## ====================================================== ## ## This module merges `HashID` values as hexary lookup paths into the ## `Patricia Trie`. When changing vertices (aka nodes without Merkle hashes), ## associated (but separated) Merkle hashes will be deleted unless locked. ## Instead of deleting locked hashes error handling is applied. ## ## Also, nodes (vertices plus merkle hashes) can be added which is needed for ## boundary proofing after `snap/1` download. The vertices are split from the ## nodes and stored as-is on the table holding `Patricia Trie` entries. The ## hashes are stored iin a separate table and the vertices are labelled ## `locked`. {.push raises: [].} import std/[algorithm, sequtils, strutils, sets, tables], chronicles, eth/[common, trie/nibbles], stew/results, ../../sync/protocol, "."/[aristo_desc, aristo_get, aristo_hike, aristo_path, aristo_transcode, aristo_vid] logScope: topics = "aristo-merge" type LeafTiePayload* = object ## Generalised key-value pair for a sub-trie. The main trie is the ## sub-trie with `root=VertexID(1)`. leafTie*: LeafTie ## Full `Patricia Trie` path root-to-leaf payload*: PayloadRef ## Leaf data payload # ------------------------------------------------------------------------------ # Private getters & setters # ------------------------------------------------------------------------------ proc xPfx(vtx: VertexRef): NibblesSeq = case vtx.vType: of Leaf: return vtx.lPfx of Extension: return vtx.ePfx of Branch: doAssert vtx.vType != Branch # Ooops proc `xPfx=`(vtx: VertexRef, val: NibblesSeq) = case vtx.vType: of Leaf: vtx.lPfx = val of Extension: vtx.ePfx = val of Branch: doAssert vtx.vType != Branch # Ooops # ------------------------------------------------------------------------------ # Private helpers # ------------------------------------------------------------------------------ proc clearMerkleKeys( db: AristoDb; # Database, top layer hike: Hike; # Implied vertex IDs to clear hashes for vid: VertexID; # Additionall vertex IDs to clear ) = for vid in hike.legs.mapIt(it.wp.vid) & @[vid]: let lbl = db.top.kMap.getOrVoid vid if lbl.isValid: db.top.kMap.del vid db.top.pAmk.del lbl elif db.getKeyBackend(vid).isOK: # Register for deleting on backend db.top.kMap[vid] = VOID_HASH_LABEL db.top.pAmk.del lbl # ----------- proc insertBranch( db: AristoDb; # Database, top layer hike: Hike; # Current state linkID: VertexID; # Vertex ID to insert linkVtx: VertexRef; # Vertex to insert payload: PayloadRef; # Leaf data payload ): Hike = ## ## Insert `Extension->Branch` vertex chain or just a `Branch` vertex ## ## ... --(linkID)--> ## ## <-- immutable --> <---- mutable ----> .. ## ## will become either ## ## --(linkID)--> ## --(local1)--> ## [linkInx] --(local2)--> ## [leafInx] --(local3)--> ## ## or in case that there is no common prefix ## ## --(linkID)--> ## [linkInx] --(local2)--> ## [leafInx] --(local3)--> ## ## *) vertex was slightly modified or removed if obsolete `Extension` ## let n = linkVtx.xPfx.sharedPrefixLen hike.tail # Verify minimum requirements if hike.tail.len == n: # Should have been tackeld by `hikeUp()`, already return Hike(error: MergeLeafGarbledHike) if linkVtx.xPfx.len == n: return Hike(error: MergeBrLinkVtxPfxTooShort) # Provide and install `forkVtx` let forkVtx = VertexRef(vType: Branch) linkInx = linkVtx.xPfx[n] leafInx = hike.tail[n] var leafLeg = Leg(nibble: -1) # Install `forkVtx` block: # Clear Merkle hashes (aka hash keys) unless proof mode. if db.top.pPrf.len == 0: db.clearMerkleKeys(hike, linkID) elif linkID in db.top.pPrf: return Hike(error: MergeNonBranchProofModeLock) if linkVtx.vType == Leaf: # Update vertex path lookup let path = hike.legsTo(NibblesSeq) & linkVtx.lPfx rc = path.pathToTag() if rc.isErr: debug "Branch link leaf path garbled", linkID, path return Hike(error: MergeBrLinkLeafGarbled) let local = db.vidFetch lty = LeafTie(root: hike.root, path: rc.value) db.top.lTab[lty] = local # update leaf path lookup cache db.top.sTab[local] = linkVtx linkVtx.lPfx = linkVtx.lPfx.slice(1+n) forkVtx.bVid[linkInx] = local elif linkVtx.ePfx.len == n + 1: # This extension `linkVtx` becomes obsolete forkVtx.bVid[linkInx] = linkVtx.eVid else: let local = db.vidFetch db.top.sTab[local] = linkVtx linkVtx.ePfx = linkVtx.ePfx.slice(1+n) forkVtx.bVid[linkInx] = local block: let local = db.vidFetch forkVtx.bVid[leafInx] = local leafLeg.wp.vid = local leafLeg.wp.vtx = VertexRef( vType: Leaf, lPfx: hike.tail.slice(1+n), lData: payload) db.top.sTab[local] = leafLeg.wp.vtx # Update branch leg, ready to append more legs result = Hike(root: hike.root, legs: hike.legs) # Update in-beween glue linking `branch --[..]--> forkVtx` if 0 < n: let extVtx = VertexRef( vType: Extension, ePfx: hike.tail.slice(0,n), eVid: db.vidFetch) db.top.sTab[linkID] = extVtx result.legs.add Leg( nibble: -1, wp: VidVtxPair( vid: linkID, vtx: extVtx)) db.top.sTab[extVtx.eVid] = forkVtx result.legs.add Leg( nibble: leafInx.int8, wp: VidVtxPair( vid: extVtx.eVid, vtx: forkVtx)) else: db.top.sTab[linkID] = forkVtx result.legs.add Leg( nibble: leafInx.int8, wp: VidVtxPair( vid: linkID, vtx: forkVtx)) result.legs.add leafLeg proc concatBranchAndLeaf( db: AristoDb; # Database, top layer hike: Hike; # Path top has a `Branch` vertex brVid: VertexID; # Branch vertex ID from from `Hike` top brVtx: VertexRef; # Branch vertex, linked to from `Hike` payload: PayloadRef; # Leaf data payload ): Hike = ## Append argument branch vertex passed as argument `(brID,brVtx)` and then ## a `Leaf` vertex derived from the argument `payload`. ## if hike.tail.len == 0: return Hike(error: MergeBranchGarbledTail) let nibble = hike.tail[0].int8 if brVtx.bVid[nibble].isValid: return Hike(error: MergeRootBranchLinkBusy) # Clear Merkle hashes (aka hash keys) unless proof mode. if db.top.pPrf.len == 0: db.clearMerkleKeys(hike, brVid) elif brVid in db.top.pPrf: return Hike(error: MergeBranchProofModeLock) # Ooops # Append branch vertex result = Hike(root: hike.root, legs: hike.legs) result.legs.add Leg(wp: VidVtxPair(vtx: brVtx, vid: brVid), nibble: nibble) # Append leaf vertex let vid = db.vidFetch vtx = VertexRef( vType: Leaf, lPfx: hike.tail.slice(1), lData: payload) brVtx.bVid[nibble] = vid db.top.sTab[brVid] = brVtx db.top.sTab[vid] = vtx result.legs.add Leg(wp: VidVtxPair(vtx: vtx, vid: vid), nibble: -1) # ------------------------------------------------------------------------------ # Private functions: add Particia Trie leaf vertex # ------------------------------------------------------------------------------ proc topIsBranchAddLeaf( db: AristoDb; # Database, top layer hike: Hike; # Path top has a `Branch` vertex payload: PayloadRef; # Leaf data payload ): Hike = ## Append a `Leaf` vertex derived from the argument `payload` after the top ## leg of the `hike` argument which is assumend to refert to a `Branch` ## vertex. If successful, the function returns the updated `hike` trail. if hike.tail.len == 0: return Hike(error: MergeBranchGarbledTail) let nibble = hike.legs[^1].nibble if nibble < 0: return Hike(error: MergeBranchGarbledNibble) let branch = hike.legs[^1].wp.vtx linkID = branch.bVid[nibble] linkVtx = db.getVtx linkID if not linkVtx.isValid: # # .. [nibble] --(linkID)--> nil # # <-------- immutable ------------> <---- mutable ----> .. # if db.top.pPrf.len == 0: # Not much else that can be done here debug "Dangling leaf link, reused", branch=hike.legs[^1].wp.vid, nibble, linkID, leafPfx=hike.tail # Reuse placeholder entry in table let vtx = VertexRef( vType: Leaf, lPfx: hike.tail, lData: payload) db.top.sTab[linkID] = vtx result = Hike(root: hike.root, legs: hike.legs) result.legs.add Leg(wp: VidVtxPair(vid: linkID, vtx: vtx), nibble: -1) return if linkVtx.vType == Branch: # Slot link to a branch vertex should be handled by `hikeUp()` # # .. [nibble] --(linkID)--> [] # # <-------- immutable ------------> <---- mutable ----> .. # return db.concatBranchAndLeaf(hike, linkID, linkVtx, payload) db.insertBranch(hike, linkID, linkVtx, payload) proc topIsExtAddLeaf( db: AristoDb; # Database, top layer hike: Hike; # Path top has an `Extension` vertex payload: PayloadRef; # Leaf data payload ): Hike = ## Append a `Leaf` vertex derived from the argument `payload` after the top ## leg of the `hike` argument which is assumend to refert to a `Extension` ## vertex. If successful, the function returns the ## updated `hike` trail. let extVtx = hike.legs[^1].wp.vtx extVid = hike.legs[^1].wp.vid brVid = extVtx.eVid brVtx = db.getVtx brVid result = Hike(root: hike.root, legs: hike.legs) if not brVtx.isValid: # Blind vertex, promote to leaf vertex. # # --(extVid)--> --(brVid)--> nil # # <-------- immutable --------------> # let vtx = VertexRef( vType: Leaf, lPfx: extVtx.ePfx & hike.tail, lData: payload) db.top.sTab[extVid] = vtx result.legs[^1].wp.vtx = vtx elif brVtx.vType != Branch: return Hike(error: MergeBranchRootExpected) else: let nibble = hike.tail[0].int8 linkID = brVtx.bVid[nibble] # # Required # # --(extVid)--> --(brVid)--> [nibble] --(linkID)--> nil # # <-------- immutable --------------> <-------- mutable ----------> .. # if linkID.isValid: return Hike(error: MergeRootBranchLinkBusy) # Clear Merkle hashes (aka hash keys) unless proof mode if db.top.pPrf.len == 0: db.clearMerkleKeys(hike, brVid) elif brVid in db.top.pPrf: return Hike(error: MergeBranchProofModeLock) let vid = db.vidFetch vtx = VertexRef( vType: Leaf, lPfx: hike.tail.slice(1), lData: payload) brVtx.bVid[nibble] = vid db.top.sTab[brVid] = brVtx db.top.sTab[vid] = vtx result.legs.add Leg(wp: VidVtxPair(vtx: brVtx, vid: brVid), nibble: nibble) result.legs.add Leg(wp: VidVtxPair(vtx: vtx, vid: vid), nibble: -1) proc topIsEmptyAddLeaf( db: AristoDb; # Database, top layer hike: Hike; # No path legs rootVtx: VertexRef; # Root vertex payload: PayloadRef; # Leaf data payload ): Hike = ## Append a `Leaf` vertex derived from the argument `payload` after the ## argument vertex `rootVtx` and append both the empty arguent `hike`. if rootVtx.vType == Branch: let nibble = hike.tail[0].int8 if rootVtx.bVid[nibble].isValid: return Hike(error: MergeRootBranchLinkBusy) # Clear Merkle hashes (aka hash keys) unless proof mode if db.top.pPrf.len == 0: db.clearMerkleKeys(hike, hike.root) elif hike.root in db.top.pPrf: return Hike(error: MergeBranchProofModeLock) let leafVid = db.vidFetch leafVtx = VertexRef( vType: Leaf, lPfx: hike.tail.slice(1), lData: payload) rootVtx.bVid[nibble] = leafVid db.top.sTab[hike.root] = rootVtx db.top.sTab[leafVid] = leafVtx return Hike( root: hike.root, legs: @[Leg(wp: VidVtxPair(vtx: rootVtx, vid: hike.root), nibble: nibble), Leg(wp: VidVtxPair(vtx: leafVtx, vid: leafVid), nibble: -1)]) db.insertBranch(hike, hike.root, rootVtx, payload) proc updatePayload( db: AristoDb; # Database, top layer hike: Hike; # No path legs leaf: LeafTiePayload; # Leaf data and payload ): Hike = ## Update leaf vertex if payloads differ result = hike let vtx = result.legs[^1].wp.vtx # Update payloads if they differ if vtx.lData != leaf.payload: let vid = result.legs[^1].wp.vid vtx.lData = leaf.payload db.top.sTab[vid] = vtx db.top.lTab[leaf.leafTie] = vid db.clearMerkleKeys(result, vid) # ------------------------------------------------------------------------------ # Private functions: add Merkle proof node # ------------------------------------------------------------------------------ proc mergeNodeImpl( db: AristoDb; # Database, top layer hashKey: HashKey; # Merkel hash of node node: NodeRef; # Node derived from RLP representation rootVid: VertexID; # Current sub-trie ): Result[VertexID,AristoError] = ## The function merges the argument hash key `hashKey` as expanded from the ## node RLP representation into the `Aristo Trie` database. The vertex is ## split off from the node and stored separately. So are the Merkle hashes. ## The vertex is labelled `locked`. ## ## The `node` argument is *not* checked, whether the vertex IDs have been ## allocated, already. If the node comes straight from the `decode()` RLP ## decoder as expected, these vertex IDs will be all zero. ## if node.error != AristoError(0): return err(node.error) if not rootVid.isValid: return err(MergeRootKeyInvalid) # Verify `hashKey` if not hashKey.isValid: return err(MergeHashKeyInvalid) # Make sure that the `vid<->hashLbl` reverse mapping has been cached, # already. This is provided for if the `nodes` are processed in the right # order `root->.. ->leaf`. var hashLbl = HashLabel(root: rootVid, key: hashKey) vid = db.top.pAmk.getOrVoid hashLbl if not vid.isValid: return err(MergeRevVidMustHaveBeenCached) let lbl = db.top.kMap.getOrVoid vid if lbl == hashLbl: if db.top.sTab.hasKey vid: # This is tyically considered OK return err(MergeHashKeyCachedAlready) # Otherwise proceed elif lbl.isValid: # Different key assigned => error return err(MergeHashKeyDiffersFromCached) let (vtx, hasVtx) = block: let vty = db.getVtx vid if vty.isValid: (vty, true) else: (node.to(VertexRef), false) # The `vertexID <-> hashLabel` mappings need to be set up now (if any) case node.vType: of Leaf: discard of Extension: if node.key[0].isValid: let eLbl = HashLabel(root: rootVid, key: node.key[0]) if hasVtx: if not vtx.eVid.isValid: return err(MergeNodeVtxDiffersFromExisting) db.top.pAmk[eLbl] = vtx.eVid else: let eVid = db.top.pAmk.getOrVoid eLbl if eVid.isValid: vtx.eVid = eVid else: vtx.eVid = db.vidAttach eLbl of Branch: for n in 0..15: if node.key[n].isValid: let bLbl = HashLabel(root: rootVid, key: node.key[n]) if hasVtx: if not vtx.bVid[n].isValid: return err(MergeNodeVtxDiffersFromExisting) db.top.pAmk[bLbl] = vtx.bVid[n] else: let bVid = db.top.pAmk.getOrVoid bLbl if bVid.isValid: vtx.bVid[n] = bVid else: vtx.bVid[n] = db.vidAttach bLbl db.top.pPrf.incl vid db.top.sTab[vid] = vtx ok vid # ------------------------------------------------------------------------------ # Public functions # ------------------------------------------------------------------------------ proc merge*( db: AristoDb; # Database, top layer leaf: LeafTiePayload; # Leaf item to add to the database ): Hike = ## Merge the argument `leaf` key-value-pair into the top level vertex table ## of the database `db`. The field `pathKey` of the `leaf` argument is used ## to index the leaf vertex on the `Patricia Trie`. The field `payload` is ## stored with the leaf vertex in the database unless the leaf vertex exists ## already. ## # Check whether the leaf is on the database and payloads match var haveLeafOk = false block: let vid = db.top.lTab.getOrVoid leaf.leafTie if vid.isValid: let vtx = db.getVtx vid if vtx.isValid and vtx.lData == leaf.payload: haveLeafOk = true if haveLeafOk: result.error = MergeLeafPathCachedAlready else: let hike = leaf.leafTie.hikeUp(db) if 0 < hike.legs.len: case hike.legs[^1].wp.vtx.vType: of Branch: result = db.topIsBranchAddLeaf(hike, leaf.payload) of Leaf: if 0 < hike.tail.len: # `Leaf` vertex problem? return Hike(error: MergeLeafGarbledHike) result = db.updatePayload(hike, leaf) of Extension: result = db.topIsExtAddLeaf(hike, leaf.payload) else: # Empty hike let rootVtx = db.getVtx hike.root if rootVtx.isValid: result = db.topIsEmptyAddLeaf(hike,rootVtx,leaf.payload) else: # Bootstrap for existing root ID let wp = VidVtxPair( vid: hike.root, vtx: VertexRef( vType: Leaf, lPfx: leaf.leafTie.path.to(NibblesSeq), lData: leaf.payload)) db.top.sTab[wp.vid] = wp.vtx result = Hike(root: wp.vid, legs: @[Leg(wp: wp, nibble: -1)]) # Double check the result until the code is more reliable block: let rc = result.to(NibblesSeq).pathToKey if rc.isErr or rc.value != leaf.leafTie.path.to(HashKey): result.error = MergeAssemblyFailed # Ooops # Update leaf acccess cache if result.error == AristoError(0): db.top.lTab[leaf.leafTie] = result.legs[^1].wp.vid # End else (1st level) proc merge*( db: AristoDb; # Database, top layer leafs: openArray[LeafTiePayload]; # Leaf items to add to the database ): tuple[merged: int, dups: int, error: AristoError] = ## Variant of `merge()` for leaf lists. var (merged, dups) = (0, 0) for n,w in leafs: let hike = db.merge w if hike.error == AristoError(0): merged.inc elif hike.error == MergeLeafPathCachedAlready: dups.inc else: return (n,dups,hike.error) (merged, dups, AristoError(0)) # --------------------- proc merge*( db: AristoDb; # Database, top layer proof: openArray[SnapProof]; # RLP encoded node records rootVid: VertexID; # Current sub-trie ): tuple[merged: int, dups: int, error: AristoError] {.gcsafe, raises: [RlpError].} = ## The function merges the argument `proof` list of RLP encoded node records ## into the `Aristo Trie` database. This function is intended to be used with ## the proof nodes as returened by `snap/1` messages. ## if not rootVid.isValid: return (0,0,MergeRootVidInvalid) let rootKey = db.getKey rootVid if not rootKey.isValid: return (0,0,MergeRootKeyInvalid) # Expand and collect hash keys and nodes var nodeTab: Table[HashKey,NodeRef] for w in proof: let key = w.Blob.digestTo(HashKey) node = w.Blob.decode(NodeRef) nodeTab[key] = node # Create a table with back links var backLink: Table[HashKey,HashKey] blindNodes: HashSet[HashKey] for (key,node) in nodeTab.pairs: case node.vType: of Leaf: blindNodes.incl key of Extension: if nodeTab.hasKey node.key[0]: backLink[node.key[0]] = key else: blindNodes.incl key of Branch: var isBlind = true for n in 0 .. 15: if nodeTab.hasKey node.key[n]: isBlind = false backLink[node.key[n]] = key if isBlind: blindNodes.incl key # Run over blind nodes and build chains from a blind/bottom level node up # to the root node. Select only chains that end up at the pre-defined root # node. var chains: seq[seq[HashKey]] for w in blindNodes: # Build a chain of nodes up to the root node var chain: seq[HashKey] nodeKey = w while nodeKey.isValid and nodeTab.hasKey nodeKey: chain.add nodeKey nodeKey = backLink.getOrDefault(nodeKey, VOID_HASH_KEY) if 0 < chain.len and chain[^1] == rootKey: chains.add chain # Make sure that the reverse lookup for the root vertex label is available. block: let lbl = HashLabel(root: rootVid, key: rootKey) vid = db.top.pAmk.getOrVoid lbl if not vid.isvalid: db.top.pAmk[lbl] = rootVid # Process over chains in reverse mode starting with the root node. This # allows the algorithm to find existing nodes on the backend. var seen: HashSet[HashKey] (merged, dups) = (0, 0) # Process the root ID which is common to all chains for chain in chains: for key in chain.reversed: if key in seen: discard else: seen.incl key let node = nodeTab.getOrDefault(key, NodeRef(nil)) rc = db.mergeNodeImpl(key, node, rootVid) if rc.isOK: merged.inc elif rc.error == MergeHashKeyCachedAlready: dups.inc else: return (merged, dups, rc.error) (merged, dups, AristoError(0)) proc merge*( db: AristoDb; # Database, top layer rootKey: HashKey; # Merkle hash for root rootVid = VertexID(0) # Optionally, force root vertex ID ): Result[VertexID,AristoError] = ## Set up a `rootKey` associated with a vertex ID. ## ## If argument `rootVid` is unset (defaults to `VertexID(0)`) then the main ## trie is tested for `VertexID(1)`. If assigned with a different Merkle key ## already, a new vertex ID is created and the argument root key is assigned ## to this vertex ID. ## ## If the argument `rootVid` is set (to a value different from `VertexID(0)`), ## then a sub-trie with root `rootVid` is checked for. If it exists with a ## diffent root key assigned, then an error is returned. Otherwise a new ## vertex ID is created and the argument root key is assigned. ## ## Upon successful return, the vertex ID assigned to the root key is returned. ## if not rootKey.isValid: return err(MergeRootKeyInvalid) if rootVid.isValid and rootVid != VertexID(1): let key = db.getKey rootVid if key == rootKey: return ok rootVid if not key.isValid: db.vidAttach(HashLabel(root: rootVid, key: rootKey), rootVid) return ok rootVid else: let key = db.getKey VertexID(1) if key == rootKey: return ok VertexID(1) # Otherwise assign unless valid if not key.isValid: db.vidAttach(HashLabel(root: VertexID(1), key: rootKey), VertexID(1)) return ok VertexID(1) # Create and assign a new root key if not rootVid.isValid: return ok db.vidRoot(rootKey) err(MergeRootKeyDiffersForVid) # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------