# nimbus-eth1 # Copyright (c) 2021 Status Research & Development GmbH # Licensed under either of # * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or # http://www.apache.org/licenses/LICENSE-2.0) # * MIT license ([LICENSE-MIT](LICENSE-MIT) or # http://opensource.org/licenses/MIT) # at your option. This file may not be copied, modified, or distributed # except according to those terms. import std/[algorithm, sequtils, tables], eth/[common, trie/nibbles], stew/interval_set, ../../range_desc, "."/[hexary_desc, hexary_error, hexary_nearby, hexary_paths] {.push raises: [Defect].} # ------------------------------------------------------------------------------ # Private helpers # ------------------------------------------------------------------------------ proc `==`(a, b: XNodeObj): bool = if a.kind == b.kind: case a.kind: of Leaf: return a.lPfx == b.lPfx and a.lData == b.lData of Extension: return a.ePfx == b.ePfx and a.eLink == b.eLink of Branch: return a.bLink == b.bLink proc isZeroLink(a: Blob): bool = ## Persistent database has `Blob` as key a.len == 0 proc isZeroLink(a: RepairKey): bool = ## Persistent database has `RepairKey` as key a.isZero proc convertTo(key: RepairKey; T: type NodeKey): T = ## Might be lossy, check before use discard result.init(key.ByteArray33[1 .. 32]) proc toNodeSpecs(nodeKey: RepairKey; partialPath: Blob): NodeSpecs = NodeSpecs( nodeKey: nodeKey.convertTo(NodeKey), partialPath: partialPath) proc toNodeSpecs(nodeKey: Blob; partialPath: Blob): NodeSpecs = NodeSpecs( nodeKey: nodeKey.convertTo(NodeKey), partialPath: partialPath) template noKeyErrorOops(info: static[string]; code: untyped) = try: code except KeyError as e: raiseAssert "Impossible KeyError (" & info & "): " & e.msg template noRlpErrorOops(info: static[string]; code: untyped) = try: code except RlpError as e: raiseAssert "Impossible RlpError (" & info & "): " & e.msg # ------------------------------------------------------------------------------ # Private functions # ------------------------------------------------------------------------------ proc padPartialPath(pfx: NibblesSeq; dblNibble: byte): NodeKey = ## Extend (or cut) `partialPath` nibbles sequence and generate `NodeKey` # Pad with zeroes var padded: NibblesSeq let padLen = 64 - pfx.len if 0 <= padLen: padded = pfx & dblNibble.repeat(padlen div 2).initNibbleRange if (padLen and 1) == 1: padded = padded & @[dblNibble].initNibbleRange.slice(1) else: let nope = seq[byte].default.initNibbleRange padded = pfx.slice(0,63) & nope # nope forces re-alignment let bytes = padded.getBytes (addr result.ByteArray32[0]).copyMem(unsafeAddr bytes[0], bytes.len) proc decomposeLeft( envPt: RPath|XPath; ivPt: RPath|XPath; ): Result[seq[NodeSpecs],HexaryError] = ## Helper for `hexaryEnvelopeDecompose()` for handling left side of ## envelope from partial path argument # # partialPath # / \ # / \ # envPt.. -- envelope left end of partial path # | # ivPt.. -- `iv`, not fully covering left of `env` # var collect: seq[NodeSpecs] block rightCurbEnvelope: for n in 0 ..< min(envPt.path.len+1, ivPt.path.len): if n == envPt.path.len or envPt.path[n] != ivPt.path[n]: # # At this point, the `node` entries of either `path[n]` step are # the same. This is so because the predecessor steps were the same # or were the `rootKey` in case n == 0. # # But then (`node` entries being equal) the only way for the # `path[n]` steps to differ is in the entry selector `nibble` for # a branch node. # for m in n ..< ivPt.path.len: let pfx = ivPt.getNibbles(0, m) # common path segment top = ivPt.path[m].nibble # need nibbles smaller than top # # Incidentally for a non-`Branch` node, the value `top` becomes # `-1` and the `for`- loop will be ignored (which is correct) for nibble in 0 ..< top: let nodeKey = ivPt.path[m].node.bLink[nibble] if not nodeKey.isZeroLink: collect.add nodeKey.toNodeSpecs hexPrefixEncode( pfx & @[nibble.byte].initNibbleRange.slice(1),isLeaf=false) break rightCurbEnvelope # # Fringe case, e.g. when `partialPath` is an empty prefix (aka `@[0]`) # and the database has a single leaf node `(a,some-value)` where the # `rootKey` is the hash of this node. In that case, `pMin == 0` and # `pMax == high(NodeTag)` and `iv == [a,a]`. # return err(DecomposeDegenerated) ok(collect) proc decomposeRight( envPt: RPath|XPath; ivPt: RPath|XPath; ): Result[seq[NodeSpecs],HexaryError] = ## Helper for `hexaryEnvelopeDecompose()` for handling right side of ## envelope from partial path argument # # partialPath # / \ # / \ # .. envPt -- envelope right end of partial path # | # .. ivPt -- `iv`, not fully covering right of `env` # var collect: seq[NodeSpecs] block leftCurbEnvelope: for n in 0 ..< min(envPt.path.len+1, ivPt.path.len): if n == envPt.path.len or envPt.path[n] != ivPt.path[n]: for m in n ..< ivPt.path.len: let pfx = ivPt.getNibbles(0, m) # common path segment base = ivPt.path[m].nibble # need nibbles greater/equal if 0 <= base: for nibble in base+1 .. 15: let nodeKey = ivPt.path[m].node.bLink[nibble] if not nodeKey.isZeroLink: collect.add nodeKey.toNodeSpecs hexPrefixEncode( pfx & @[nibble.byte].initNibbleRange.slice(1),isLeaf=false) break leftCurbEnvelope return err(DecomposeDegenerated) ok(collect) proc decomposeImpl( partialPath: Blob; ## Hex encoded partial path rootKey: NodeKey; ## State root iv: NodeTagRange; ## Proofed range of leaf paths db: HexaryGetFn|HexaryTreeDbRef; ## Database abstraction ): Result[seq[NodeSpecs],HexaryError] {.gcsafe, raises: [Defect,RlpError,KeyError].} = ## Database agnostic implementation of `hexaryEnvelopeDecompose()`. let env = partialPath.hexaryEnvelope if iv.maxPt < env.minPt or env.maxPt < iv.minPt: return err(DecomposeDisjuct) # empty result var nodeSpex: seq[NodeSpecs] # So ranges do overlap. The case that the `partialPath` envelope is fully # contained in `iv` results in `@[]` which is implicitely handled by # non-matching any of the cases, below. if env.minPt < iv.minPt: let envPt = env.minPt.hexaryPath(rootKey, db) # Make sure that the min point is the nearest node to the right ivPt = block: let rc = iv.minPt.hexaryPath(rootKey, db).hexaryNearbyRight(db) if rc.isErr: return err(rc.error) rc.value block: let rc = envPt.decomposeLeft ivPt if rc.isErr: return err(rc.error) nodeSpex &= rc.value if iv.maxPt < env.maxPt: let envPt = env.maxPt.hexaryPath(rootKey, db) ivPt = block: let rc = iv.maxPt.hexaryPath(rootKey, db).hexaryNearbyLeft(db) if rc.isErr: return err(rc.error) rc.value block: let rc = envPt.decomposeRight ivPt if rc.isErr: return err(rc.error) nodeSpex &= rc.value ok(nodeSpex) # ------------------------------------------------------------------------------ # Public functions, envelope constructor # ------------------------------------------------------------------------------ proc hexaryEnvelope*(partialPath: Blob): NodeTagRange = ## Convert partial path to range of all concievable node keys starting with ## the partial path argument `partialPath`. let pfx = partialPath.hexPrefixDecode[1] NodeTagRange.new( pfx.padPartialPath(0).to(NodeTag), pfx.padPartialPath(255).to(NodeTag)) proc hexaryEnvelope*(node: NodeSpecs): NodeTagRange = ## variant of `hexaryEnvelope()` node.partialPath.hexaryEnvelope() # ------------------------------------------------------------------------------ # Public functions, helpers # ------------------------------------------------------------------------------ proc hexaryEnvelopeUniq*( partialPaths: openArray[Blob]; ): seq[Blob] {.gcsafe, raises: [Defect,KeyError].} = ## Sort and simplify a list of partial paths by sorting envelopes while ## removing nested entries. var tab: Table[NodeTag,(Blob,bool)] for w in partialPaths: let iv = w.hexaryEnvelope tab[iv.minPt] = (w,true) # begin entry tab[iv.maxPt] = (@[],false) # end entry # When sorted, nested entries look like # # 123000000.. (w0, true) # 123400000.. (w1, true) # 1234fffff.. (, false) # 123ffffff.. (, false) # ... # 777000000.. (w2, true) # var level = 0 for key in toSeq(tab.keys).sorted(cmp): let (w,begin) = tab[key] if begin: if level == 0: result.add w level.inc else: level.dec proc hexaryEnvelopeUniq*( nodes: openArray[NodeSpecs]; ): seq[NodeSpecs] {.gcsafe, raises: [Defect,KeyError].} = ## Variant of `hexaryEnvelopeUniq` for sorting a `NodeSpecs` list by ## partial paths. var tab: Table[NodeTag,(NodeSpecs,bool)] for w in nodes: let iv = w.partialPath.hexaryEnvelope tab[iv.minPt] = (w,true) # begin entry tab[iv.maxPt] = (NodeSpecs(),false) # end entry var level = 0 for key in toSeq(tab.keys).sorted(cmp): let (w,begin) = tab[key] if begin: if level == 0: result.add w level.inc else: level.dec proc hexaryEnvelopeTouchedBy*( rangeSet: NodeTagRangeSet; ## Set of intervals (aka ranges) partialPath: Blob; ## Partial path for some node ): NodeTagRangeSet = ## For the envelope interval of the `partialPath` argument, this function ## returns the complete set of intervals from the argument set `rangeSet` ## that have a common point with the envelope (i.e. they are non-disjunct to ## the envelope.) result = NodeTagRangeSet.init() let probe = partialPath.hexaryEnvelope if 0 < rangeSet.covered probe: # Find an interval `start` that starts before the `probe` interval. # Preferably, this interval is the rightmost one starting before `probe`. var startSearch = low(NodeTag) # Try least interval starting within or to the right of `probe`. let rc = rangeSet.ge probe.minPt if rc.isOk: # Try predecessor let rx = rangeSet.le rc.value.minPt if rx.isOk: # Predecessor interval starts before `probe`, e.g. # # .. [..rx..] [..rc..] .. # [..probe..] # startSearch = rx.value.minPt else: # No predecessor, so `rc.value` is the very first interval, e.g. # # [..rc..] .. # [..probe..] # startSearch = rc.value.minPt else: # No interval starts in or after `probe`. # # So, if an interval ends before the right end of `probe`, it must # start before `probe`. let rx = rangeSet.le probe.maxPt if rx.isOk: # # .. [..rx..] .. # [..probe..] # startSearch = rc.value.minPt else: # Otherwise there is no interval preceding `probe`, so the zero # value for `start` will do the job, e.g. # # [.....rx......] # [..probe..] discard # Collect intervals left-to-right for non-disjunct to `probe` for w in increasing[NodeTag,UInt256](rangeSet, startSearch): if (w * probe).isOk: discard result.merge w elif probe.maxPt < w.minPt: break # all the `w` following will be disjuct, too proc hexaryEnvelopeTouchedBy*( rangeSet: NodeTagRangeSet; ## Set of intervals (aka ranges) node: NodeSpecs; ## Node w/hex encoded partial path ): NodeTagRangeSet = ## Variant of `hexaryEnvelopeTouchedBy()` rangeSet.hexaryEnvelopeTouchedBy(node) # ------------------------------------------------------------------------------ # Public functions, complement sub-tries # ------------------------------------------------------------------------------ proc hexaryEnvelopeDecompose*( partialPath: Blob; ## Hex encoded partial path rootKey: NodeKey; ## State root iv: NodeTagRange; ## Proofed range of leaf paths db: HexaryTreeDbRef; ## Database ): Result[seq[NodeSpecs],HexaryError] {.gcsafe, raises: [Defect,KeyError].} = ## The idea of this function is to compute the difference of the envelope ## of a `partialPath` off the range `iv` and express the result as a ## list of envelopes (represented by nodes.) ## ## More formally, let the argument `partialPath` refer to an allocated node ## and the argument `iv` to a range of `NodeTag` points where left and right ## end have boundary proofs (see discussion below) in the database (e.g. as ## downloaded via the `snap/1` protocol.) ## ## Then this function returns a set `W` of partial paths (represented by ## nodes) where the envelope of each partial path in `W` has no common node ## key with `iv` (i.e. it is disjunct to the sub-range of `iv` where the ## boundaries are node keys.) ## ## This set `W` is maximal in the sense that for every every envelope of a ## partial path which is prefixed by the argument `partialPath` there exists ## an envelope implied by `W` that contains the former envelope, i.e. ## ## * if `p = partialPath & extension` with `hexaryEnvelope(p) * iv` has no ## node key in the hexary trie database ## ## * then there is a `w` in `W` with `hexaryEnvelope(p) <= hexaryEnvelope(w)` ## ## Although not required here (see `hexaryEnvelopeUniq()`) the set `W` will ## be minimal. ## ## Beware: ## Currently, the right end must be an exisiting node rather than come ## with a boundaty proof. ## ## Comparison with `hexaryInspect()` ## --------------------------------- ## The function `hexaryInspect()` implements a width-first search for ## dangling nodes starting at the state root (think of the cathode ray of ## a CRT.) For the sake of comparison with `hexaryEnvelopeDecompose()`, the ## search may be amended to ignore nodes the envelope of is fully contained ## in some range `iv`. For a fully allocated hexary trie, there will be at ## least one sub-trie of length `N` with leafs not in `iv`. So the number ## of nodes visited is O(16^N) for some `N` at most 63. ## ## The function `hexaryEnvelopeDecompose()` take the left or rightmost leaf ## path from `iv`, calculates a chain length `N` of nodes from the state ## root to the leaf, and for each node collects the links not pointing inside ## the range `iv`. The number of nodes visited is O(N). ## ## The results of both functions are not interchangeable, though. The first ## function `hexaryInspect()`, always returns dangling nodes if there are ## any in which case the hexary trie is incomplete and there will be no way ## to visit all nodes as they simply do not exist. But iteratively adding ## nodes or sub-tries and re-running this algorithm will end up with having ## all nodes visited. ## ## The other function `hexaryEnvelopeDecompose()` always returns the same ## result where some nodes might be dangling and may be treated similar to ## what was discussed in the previous paragraph. This function also reveals ## allocated nodes which might be checked for whether they exist fully or ## partially for another state root hexary trie. ## ## So both are sort of complementary where the function ## `hexaryEnvelopeDecompose()` is a fast one and `hexaryInspect()` the ## thorough one of last resort. ## ## Relation to boundary proofs ## --------------------------- ## The `boundary proof` for a range of leaf paths (e.g. account hashes) for ## a given state root is a set of nodes enough to construct the partial ## Merkel Patricia trie containing the leafs. If the given range is larger ## than the left or rightmost leaf paths, the `boundary proof` also implies ## that there is no other leaf path between the range boundary and the left ## or rightmost leaf path. ## ## Consider the result of the function `hexaryEnvelopeDecompose()` of an ## empty partial path (the envelope of represents `UIn256`) for a range `iv`. ## This result is a `boundary proof` for `iv` according to the definition ## above though it is highly redundant. All bottom level nodes with ## envelopes disjunct from `iv` can be removed for a `boundary proof`. ## noRlpErrorOops("in-memory hexaryEnvelopeDecompose"): return partialPath.decomposeImpl(rootKey, iv, db) proc hexaryEnvelopeDecompose*( partialPath: Blob; ## Hex encoded partial path rootKey: NodeKey; ## State root iv: NodeTagRange; ## Proofed range of leaf paths getFn: HexaryGetFn; ## Database abstraction ): Result[seq[NodeSpecs],HexaryError] {.gcsafe, raises: [Defect,RlpError].} = ## Variant of `decompose()` for persistent database. noKeyErrorOops("persistent hexaryEnvelopeDecompose"): return partialPath.decomposeImpl(rootKey, iv, getFn) # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------