nimbus-eth1/nimbus/sync/snap/range_desc.nim

446 lines
14 KiB
Nim

# Nimbus
# Copyright (c) 2018-2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or
# distributed except according to those terms.
{.push raises: [].}
import
std/[math, sequtils, strutils, hashes],
eth/common,
stew/interval_set,
stint,
../../constants,
../../utils/prettify,
../protocol,
../types
type
ByteArray32* = array[32,byte]
## Used for 32 byte database keys
NodeKey* = distinct ByteArray32
## Hash key without the hash wrapper (as opposed to `NodeTag` which is a
## number.)
NodeTag* = distinct UInt256
## Trie leaf item, account hash etc. This data type is a representation
## for a `NodeKey` geared up for arithmetic and comparing keys.
NodeTagRange* = Interval[NodeTag,UInt256]
## Interval `[minPt,maxPt]` of` NodeTag` elements, can be managed in an
## `IntervalSet` data type.
NodeTagRangeSet* = IntervalSetRef[NodeTag,UInt256]
## Managed structure to handle non-adjacent `NodeTagRange` intervals
NodeSpecs* = object
## Multi purpose descriptor for a hexary trie node:
## * Missing node specs. If the `data` argument is empty, the `partialPath`
## refers to a missoing node entry. The `nodeKey` is another way of
## writing the node hash and used to verify that a potential data `Blob`
## is acceptable as node data.
## * Node data. If the `data` argument is non-empty, the `partialPath`
## fields can/will be used as function argument for various functions
## when healing.
partialPath*: Blob ## Compact encoded partial path nibbles
nodeKey*: NodeKey ## Derived from node hash
data*: Blob ## Node data (might not be present)
PackedAccountRange* = object
## Re-packed version of `SnapAccountRange`. The reason why repacking is
## needed is that the `snap/1` protocol uses another RLP encoding than is
## used for storing in the database. So the `PackedAccount` is `BaseDB`
## trie compatible.
accounts*: seq[PackedAccount] ## List of re-packed accounts data
proof*: seq[SnapProof] ## Boundary proofs
PackedAccount* = object
## In fact, the `snap/1` driver returns the `Account` structure which is
## unwanted overhead, here.
accKey*: NodeKey
accBlob*: Blob
AccountSlotsHeader* = object
## Storage root header
accKey*: NodeKey ## Owner account, maybe unnecessary
storageRoot*: Hash256 ## Start of storage tree
subRange*: Option[NodeTagRange] ## Sub-range of slot range covered
AccountSlotsChanged* = object
## Variant of `AccountSlotsHeader` representing some transition
account*: AccountSlotsHeader ## Account header
newRange*: Option[NodeTagRange] ## New sub-range (if-any)
AccountStorageRange* = object
## List of storage descriptors, the last `AccountSlots` storage data might
## be incomplete and the `proof` is needed for proving validity.
storages*: seq[AccountSlots] ## List of accounts and storage data
proof*: seq[SnapProof] ## Boundary proofs for last entry
base*: NodeTag ## Lower limit for last entry w/proof
AccountSlots* = object
## Account storage descriptor
account*: AccountSlotsHeader
data*: seq[SnapStorage]
# See below for definition of constant `FullNodeTagRange`
# ------------------------------------------------------------------------------
# Public helpers
# ------------------------------------------------------------------------------
proc to*(tag: NodeTag; T: type Hash256): T =
## Convert to serialised equivalent
result.data = tag.UInt256.toBytesBE
proc to*(key: NodeKey; T: type NodeTag): T =
## Convert from serialised equivalent
UInt256.fromBytesBE(key.ByteArray32).T
proc to*(key: Hash256; T: type NodeTag): T =
## Syntactic sugar
key.data.NodeKey.to(T)
proc to*(tag: NodeTag; T: type NodeKey): T =
## Syntactic sugar
tag.UInt256.toBytesBE.T
proc to*(hash: Hash256; T: type NodeKey): T =
## Syntactic sugar
hash.data.NodeKey
proc to*(key: NodeKey; T: type Hash256): T =
## Syntactic sugar
T(data: key.ByteArray32)
proc to*(key: NodeKey; T: type Blob): T =
## Syntactic sugar
key.ByteArray32.toSeq
proc to*(n: SomeUnsignedInt|UInt256; T: type NodeTag): T =
## Syntactic sugar
n.u256.T
proc digestTo*(data: Blob; T: type NodeKey): T =
keccakHash(data).data.T
proc hash*(a: NodeKey): Hash =
## Table/KeyedQueue mixin
a.ByteArray32.hash
proc `==`*(a, b: NodeKey): bool =
## Table/KeyedQueue mixin
a.ByteArray32 == b.ByteArray32
# ------------------------------------------------------------------------------
# Public constructors
# ------------------------------------------------------------------------------
proc init*(key: var NodeKey; data: openArray[byte]): bool =
## Import argument `data` into `key` which must have length either `32`, or
## `0`. The latter case is equivalent to an all zero byte array of size `32`.
if data.len == 32:
(addr key.ByteArray32[0]).copyMem(unsafeAddr data[0], data.len)
return true
elif data.len == 0:
key.reset
return true
proc init*(tag: var NodeTag; data: openArray[byte]): bool =
## Similar to `init(key: var NodeHash; .)`.
var key: NodeKey
if key.init(data):
tag = key.to(NodeTag)
return true
# ------------------------------------------------------------------------------
# Public rlp support
# ------------------------------------------------------------------------------
proc read*[T: NodeTag|NodeKey](rlp: var Rlp, W: type T): T
{.gcsafe, raises: [RlpError].} =
rlp.read(Hash256).to(T)
proc append*(writer: var RlpWriter, val: NodeTag|NodeKey) =
writer.append(val.to(Hash256))
# ------------------------------------------------------------------------------
# Public `NodeTag` and `NodeTagRange` functions
# ------------------------------------------------------------------------------
proc u256*(lp: NodeTag): UInt256 = lp.UInt256
proc low*(T: type NodeTag): T = low(UInt256).T
proc high*(T: type NodeTag): T = high(UInt256).T
proc `+`*(a: NodeTag; b: UInt256): NodeTag = (a.u256+b).NodeTag
proc `-`*(a: NodeTag; b: UInt256): NodeTag = (a.u256-b).NodeTag
proc `-`*(a, b: NodeTag): UInt256 = (a.u256 - b.u256)
proc `==`*(a, b: NodeTag): bool = a.u256 == b.u256
proc `<=`*(a, b: NodeTag): bool = a.u256 <= b.u256
proc `<`*(a, b: NodeTag): bool = a.u256 < b.u256
proc cmp*(x, y: NodeTag): int = cmp(x.UInt256, y.UInt256)
proc hash*(a: NodeTag): Hash =
## Mixin for `Table` or `keyedQueue`
a.to(Hash256).data.hash
proc digestTo*(data: Blob; T: type NodeTag): T =
## Hash the `data` argument
keccakHash(data).to(T)
const
# Cannot be defined earlier: `NodeTag` operations needed
FullNodeTagRange* = NodeTagRange.new(low(NodeTag),high(NodeTag))
# ------------------------------------------------------------------------------
# Public functions: `NodeTagRange` helpers
# ------------------------------------------------------------------------------
proc isEmpty*(lrs: NodeTagRangeSet): bool =
## Returns `true` if the argument set `lrs` of intervals is empty
lrs.chunks == 0
proc isEmpty*(lrs: openArray[NodeTagRangeSet]): bool =
## Variant of `isEmpty()` where intervals are distributed across several
## sets.
for ivSet in lrs:
if 0 < ivSet.chunks:
return false
true
proc isEmpty*(iv: NodeTagRange): bool =
## Ditto for an interval range.
false # trivially by definition
proc isFull*(lrs: NodeTagRangeSet): bool =
## Returns `true` if the argument set `lrs` contains of the single
## interval [low(NodeTag),high(NodeTag)].
lrs.total == 0 and 0 < lrs.chunks
proc isFull*(lrs: openArray[NodeTagRangeSet]): bool =
## Variant of `isFull()` where intervals are distributed across several
## sets. This function makes sense only if the interval sets are mutually
## disjunct.
var accu: NodeTag
for ivSet in lrs:
if 0 < ivSet.total:
if high(NodeTag) - ivSet.total < accu:
return true
accu = accu + ivSet.total
elif 0 < ivSet.chunks:
# number of points in `ivSet` is `2^256 + 1`
return true
proc isFull*(iv: NodeTagRange): bool =
## Ditto for an interval range.
iv == FullNodeTagRange
proc emptyFactor*(lrs: NodeTagRangeSet): float =
## Relative uncovered total, i.e. `#points-not-covered / 2^256` to be used
## in statistics or triggers.
if 0 < lrs.total:
((high(NodeTag) - lrs.total).u256 + 1).to(float) / (2.0^256)
elif lrs.chunks == 0:
1.0 # `total` represents the residue class `mod 2^256` from `0`..`(2^256-1)`
else:
0.0 # number of points in `lrs` is `2^256 + 1`
proc emptyFactor*(lrs: openArray[NodeTagRangeSet]): float =
## Variant of `emptyFactor()` where intervals are distributed across several
## sets. This function makes sense only if the interval sets are mutually
## disjunct.
var accu: NodeTag
for ivSet in lrs:
if 0 < ivSet.total:
if high(NodeTag) - ivSet.total < accu:
return 0.0
accu = accu + ivSet.total
elif ivSet.chunks == 0:
discard
else: # number of points in `ivSet` is `2^256 + 1`
return 0.0
# Calculate: (2^256 - accu) / 2^256
if accu == 0.to(NodeTag):
1.0
else:
((high(NodeTag) - accu).u256 + 1).to(float) / (2.0^256)
proc fullFactor*(lrs: NodeTagRangeSet): float =
## Relative covered total, i.e. `#points-covered / 2^256` to be used
## in statistics or triggers
if 0 < lrs.total:
lrs.total.u256.to(float) / (2.0^256)
elif lrs.chunks == 0:
0.0 # `total` represents the residue class `mod 2^256` from `0`..`(2^256-1)`
else:
1.0 # number of points in `lrs` is `2^256 + 1`
proc fullFactor*(lrs: openArray[NodeTagRangeSet]): float =
## Variant of `fullFactor()` where intervals are distributed across several
## sets. This function makes sense only if the interval sets are mutually
## disjunct.
var accu: NodeTag
for ivSet in lrs:
if 0 < ivSet.total:
if high(NodeTag) - ivSet.total < accu:
return 1.0
accu = accu + ivSet.total
elif ivSet.chunks == 0:
discard
else: # number of points in `ivSet` is `2^256 + 1`
return 1.0
accu.u256.to(float) / (2.0^256)
proc fullFactor*(iv: NodeTagRange): float =
## Relative covered length of an inetrval, i.e. `#points-covered / 2^256`
if 0 < iv.len:
iv.len.u256.to(float) / (2.0^256)
else:
1.0 # number of points in `iv` is `2^256 + 1`
# ------------------------------------------------------------------------------
# Public functions: printing & pretty printing
# ------------------------------------------------------------------------------
proc `$`*(nodeTag: NodeTag): string =
if nodeTag == high(NodeTag):
"2^256-1"
elif nodeTag == 0.u256.NodeTag:
"0"
elif nodeTag == 2.u256.pow(255).NodeTag:
"2^255" # 800...
elif nodeTag == 2.u256.pow(254).NodeTag:
"2^254" # 400..
elif nodeTag == 2.u256.pow(253).NodeTag:
"2^253" # 200...
elif nodeTag == 2.u256.pow(251).NodeTag:
"2^252" # 100...
else:
nodeTag.UInt256.toHex
proc `$`*(nodeKey: NodeKey): string =
$nodeKey.to(NodeTag)
proc leafRangePp*(a, b: NodeTag): string =
## Needed for macro generated DSL files like `snap.nim` because the
## `distinct` flavour of `NodeTag` is discarded there.
result = "[" & $a
if a != b:
result &= ',' & $b
result &= "]"
proc leafRangePp*(iv: NodeTagRange): string =
## Variant of `leafRangePp()`
leafRangePp(iv.minPt, iv.maxPt)
proc `$`*(a, b: NodeTag): string =
## Prettyfied prototype
leafRangePp(a,b)
proc `$`*(iv: NodeTagRange): string =
leafRangePp iv
proc fullPC3*(w: NodeTagRangeSet|NodeTagRange): string =
## Pretty print fill state of range sets.
if w.isEmpty:
"0%"
elif w.isFull:
"100%"
else:
let ff = w.fullFactor
if ff <= 0.99999:
ff.toPC(3)
else:
"99.999"
proc fullPC3*(w: openArray[NodeTagRangeSet]): string =
## Variant of `fullPC3()` where intervals are distributed across several
## sets. This function makes sense only if the interval sets are mutually
## disjunct.
if w.isEmpty:
"0%"
else:
let partition = "~" & $w.mapIt(it.chunks).foldl(a+b)
if w.isFull:
"100%" & partition
else:
let ff = w.fullFactor
if ff <= 0.99999:
ff.toPC(3) & partition
else:
"99.999" & partition
proc dump*(
ranges: openArray[NodeTagRangeSet];
moan: proc(overlap: UInt256; iv: NodeTagRange) {.gcsafe.};
printRangesMax = high(int);
): string =
## Dump/anlalyse range sets
var
cache: NodeTagRangeSet
ivTotal = 0.u256
ivCarry = false
if ranges.len == 1:
cache = ranges[0]
ivTotal = cache.total
if ivTotal == 0.u256 and 0 < cache.chunks:
ivCarry = true
else:
cache = NodeTagRangeSet.init()
for ivSet in ranges:
if ivSet.total == 0.u256 and 0 < ivSet.chunks:
ivCarry = true
elif ivTotal <= high(UInt256) - ivSet.total:
ivTotal += ivSet.total
else:
ivCarry = true
for iv in ivSet.increasing():
let n = cache.merge(iv)
if n != iv.len and not moan.isNil:
moan(iv.len - n, iv)
if 0 == cache.total and 0 < cache.chunks:
result = "2^256"
if not ivCarry:
result &= ":" & $ivTotal
else:
result = $cache.total
if ivCarry:
result &= ":2^256"
elif ivTotal != cache.total:
result &= ":" & $ivTotal
result &= ":"
if cache.chunks <= printRangesMax:
result &= toSeq(cache.increasing).mapIt($it).join(",")
else:
result &= toSeq(cache.increasing).mapIt($it)[0 ..< printRangesMax].join(",")
result &= " " & $(cache.chunks - printRangesMax) & " more .."
proc dump*(
range: NodeTagRangeSet;
printRangesMax = high(int);
): string =
## Ditto
[range].dump(nil, printRangesMax)
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------