227 lines
7.9 KiB
Nim
Raw Normal View History

# nimbus-eth1
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
{.push raises: [].}
import
std/[algorithm, sequtils, sets, tables, typetraits],
Aristo db update for short nodes key edge cases (#1887) * Aristo: Provide key-value list signature calculator detail: Simple wrappers around `Aristo` core functionality * Update new API for `CoreDb` details: + Renamed new API functions `contains()` => `hasKey()` or `hasPath()` which disables the `in` operator on non-boolean `contains()` functions + The functions `get()` and `fetch()` always return a not-found error if there is no item, available. The new functions `getOrEmpty()` and `mergeOrEmpty()` return an an empty `Blob` if there is no such key found. * Rewrite `core_apps.nim` using new API from `CoreDb` * Use `Aristo` functionality for calculating Merkle signatures details: For debugging, the `VerifyAristoForMerkleRootCalc` can be set so that `Aristo` results will be verified against the legacy versions. * Provide general interface for Merkle signing key-value tables details: Export `Aristo` wrappers * Activate `CoreDb` tests why: Now, API seems to be stable enough for general tests. * Update `toHex()` usage why: Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join` * Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify` why: + Different modules for different purposes + `aristo_serialise`: RLP encoding/decoding + `aristo_blobify`: Aristo database encoding/decoding * Compacted representation of small nodes' links instead of Keccak hashes why: Ethereum MPTs use Keccak hashes as node links if the size of an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded node value is used as a pseudo node link (rather than a hash.) Such a node is nor stored on key-value database. Rather the RLP encoded node value is stored instead of a lode link in a parent node instead. Only for the root hash, the top level node is always referred to by the hash. This feature needed an abstraction of the `HashKey` object which is now either a hash or a blob of length at most 31 bytes. This leaves two ways of representing an empty/void `HashKey` type, either as an empty blob of zero length, or the hash of an empty blob. * Update `CoreDb` interface (mainly reducing logger noise) * Fix copyright years (to make `Lint` happy)
2023-11-08 12:18:32 +00:00
eth/[common, trie/nibbles],
stew/interval_set,
../../aristo,
../aristo_walk/persistent,
".."/[aristo_desc, aristo_get, aristo_layers, aristo_vid]
const
Vid2 = @[VertexID(LEAST_FREE_VID)].toHashSet
# ------------------------------------------------------------------------------
# Private helper
# ------------------------------------------------------------------------------
proc to(s: IntervalSetRef[VertexID,uint64]; T: type HashSet[VertexID]): T =
## Convert the argument list `s` to a set of vertex IDs as it would appear
## with a vertex generator state list.
if s.total < high(uint64):
for w in s.increasing:
if w.maxPt == high(VertexID):
result.incl w.minPt # last interval
else:
for pt in w.minPt .. w.maxPt:
if LEAST_FREE_VID <= pt.distinctBase:
result.incl pt
proc toNodeBE(
vtx: VertexRef; # Vertex to convert
db: AristoDbRef; # Database, top layer
): Result[NodeRef,VertexID] =
## Similar to `toNode()` but fetching from the backend only
case vtx.vType:
of Leaf:
let node = NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData)
if vtx.lData.pType == AccountData:
let vid = vtx.lData.account.storageID
if vid.isValid:
let rc = db.getKeyBE vid
if rc.isErr or not rc.value.isValid:
return err(vid)
node.key[0] = rc.value
return ok node
of Branch:
let node = NodeRef(vType: Branch, bVid: vtx.bVid)
var missing: seq[VertexID]
for n in 0 .. 15:
let vid = vtx.bVid[n]
if vid.isValid:
let rc = db.getKeyBE vid
if rc.isOk and rc.value.isValid:
node.key[n] = rc.value
else:
return err(vid)
else:
node.key[n] = VOID_HASH_KEY
return ok node
of Extension:
let
vid = vtx.eVid
rc = db.getKeyBE vid
if rc.isOk and rc.value.isValid:
let node = NodeRef(vType: Extension, ePfx: vtx.ePfx, eVid: vid)
node.key[0] = rc.value
return ok node
return err(vid)
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef](
_: type T;
db: AristoDbRef; # Database, top layer
relax: bool; # Not compiling hashes if `true`
cache: bool; # Also verify against top layer cache
fifos = true; # Also verify cascaded filter fifos
): Result[void,(VertexID,AristoError)] =
## Make sure that each vertex has a Merkle hash and vice versa. Also check
## the vertex ID generator state.
let vids = IntervalSetRef[VertexID,uint64].init()
discard vids.merge Interval[VertexID,uint64].new(
VertexID(LEAST_FREE_VID),high(VertexID))
for (vid,vtx) in T.walkVtxBE db:
if not vtx.isValid:
return err((vid,CheckBeVtxInvalid))
let rc = db.getKeyBE vid
if rc.isErr or not rc.value.isValid:
return err((vid,CheckBeKeyMissing))
Aristo db update for short nodes key edge cases (#1887) * Aristo: Provide key-value list signature calculator detail: Simple wrappers around `Aristo` core functionality * Update new API for `CoreDb` details: + Renamed new API functions `contains()` => `hasKey()` or `hasPath()` which disables the `in` operator on non-boolean `contains()` functions + The functions `get()` and `fetch()` always return a not-found error if there is no item, available. The new functions `getOrEmpty()` and `mergeOrEmpty()` return an an empty `Blob` if there is no such key found. * Rewrite `core_apps.nim` using new API from `CoreDb` * Use `Aristo` functionality for calculating Merkle signatures details: For debugging, the `VerifyAristoForMerkleRootCalc` can be set so that `Aristo` results will be verified against the legacy versions. * Provide general interface for Merkle signing key-value tables details: Export `Aristo` wrappers * Activate `CoreDb` tests why: Now, API seems to be stable enough for general tests. * Update `toHex()` usage why: Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join` * Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify` why: + Different modules for different purposes + `aristo_serialise`: RLP encoding/decoding + `aristo_blobify`: Aristo database encoding/decoding * Compacted representation of small nodes' links instead of Keccak hashes why: Ethereum MPTs use Keccak hashes as node links if the size of an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded node value is used as a pseudo node link (rather than a hash.) Such a node is nor stored on key-value database. Rather the RLP encoded node value is stored instead of a lode link in a parent node instead. Only for the root hash, the top level node is always referred to by the hash. This feature needed an abstraction of the `HashKey` object which is now either a hash or a blob of length at most 31 bytes. This leaves two ways of representing an empty/void `HashKey` type, either as an empty blob of zero length, or the hash of an empty blob. * Update `CoreDb` interface (mainly reducing logger noise) * Fix copyright years (to make `Lint` happy)
2023-11-08 12:18:32 +00:00
case vtx.vType:
of Leaf:
discard
of Branch:
block check42Links:
var seen = false
for n in 0 .. 15:
if vtx.bVid[n].isValid:
if seen:
break check42Links
seen = true
return err((vid,CheckBeVtxBranchLinksMissing))
of Extension:
if vtx.ePfx.len == 0:
return err((vid,CheckBeVtxExtPfxMissing))
for (vid,key) in T.walkKeyBE db:
if not key.isvalid:
return err((vid,CheckBeKeyInvalid))
let vtx = db.getVtxBE(vid).valueOr:
return err((vid,CheckBeVtxMissing))
let node = vtx.toNodeBE(db).valueOr: # backend links only
return err((vid,CheckBeKeyCantCompile))
if not relax:
let expected = node.digestTo(HashKey)
if expected != key:
return err((vid,CheckBeKeyMismatch))
discard vids.reduce Interval[VertexID,uint64].new(vid,vid)
# Compare calculated state against database state
block:
# Extract vertex ID generator state
let vGen = block:
let rc = db.getIdgBE()
if rc.isOk:
rc.value.toHashSet
elif rc.error == GetIdgNotFound:
EmptyVidSeq.toHashSet
else:
return err((VertexID(0),rc.error))
let
vGenExpected = vids.to(HashSet[VertexID])
delta = vGenExpected -+- vGen # symmetric difference
if 0 < delta.len:
# Exclude fringe case when there is a single root vertex only
if vGenExpected != Vid2 or 0 < vGen.len:
return err((delta.toSeq.sorted[^1],CheckBeGarbledVGen))
# Check top layer cache against backend
if cache:
if db.dirty:
return err((VertexID(0),CheckBeCacheIsDirty))
# Check structural table
for (vid,vtx) in db.layersWalkVtx:
let lbl = db.layersGetLabel(vid).valueOr:
# A `kMap[]` entry must exist.
return err((vid,CheckBeCacheKeyMissing))
if vtx.isValid:
# Register existing vid against backend generator state
discard vids.reduce Interval[VertexID,uint64].new(vid,vid)
else:
# Some vertex is to be deleted, the key must be empty
if lbl.isValid:
return err((vid,CheckBeCacheKeyNonEmpty))
# There must be a representation on the backend DB unless in a TX
if db.getVtxBE(vid).isErr and db.stack.len == 0:
return err((vid,CheckBeCacheVidUnsynced))
# Register deleted vid against backend generator state
discard vids.merge Interval[VertexID,uint64].new(vid,vid)
# Check cascaded fifos
Aristo db api extensions for use as core db backend (#1754) * Update docu * Update Aristo/Kvt constructor prototype why: Previous version used an `enum` value to indicate what backend is to be used. This was replaced by using the backend object type. * Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]` why: Better code maintenance. Previously, the `Hike` object was returned. It had an internal error field so partial success was also available on a failure. This error field has been removed. * Use `openArray[byte]` rather than `Blob` in functions prototypes * Provide synchronised multi instance transactions why: The `CoreDB` object was geared towards the legacy DB which used a single transaction for the key-value backend DB. Different state roots are provided by the backend database, so all instances work directly on the same backend. Aristo db instances have different in-memory mappings (aka different state roots) and the transactions are on top of there mappings. So each instance might run different transactions. Multi instance transactions are a compromise to converge towards the legacy behaviour. The synchronised transactions span over all instances available at the time when base transaction was opened. Instances created later are unaffected. * Provide key-value pair database iterator why: Needed in `CoreDB` for `replicate()` emulation also: Some update of internal code * Extend API (i.e. prototype variants) why: Needed for `CoreDB` geared towards the legacy backend which has a more basic API than Aristo.
2023-09-15 16:23:53 +01:00
if fifos and
not db.backend.isNil and
not db.backend.filters.isNil:
var lastTrg = db.getKeyUBE(VertexID(1)).get(otherwise = VOID_HASH_KEY)
Aristo db update for short nodes key edge cases (#1887) * Aristo: Provide key-value list signature calculator detail: Simple wrappers around `Aristo` core functionality * Update new API for `CoreDb` details: + Renamed new API functions `contains()` => `hasKey()` or `hasPath()` which disables the `in` operator on non-boolean `contains()` functions + The functions `get()` and `fetch()` always return a not-found error if there is no item, available. The new functions `getOrEmpty()` and `mergeOrEmpty()` return an an empty `Blob` if there is no such key found. * Rewrite `core_apps.nim` using new API from `CoreDb` * Use `Aristo` functionality for calculating Merkle signatures details: For debugging, the `VerifyAristoForMerkleRootCalc` can be set so that `Aristo` results will be verified against the legacy versions. * Provide general interface for Merkle signing key-value tables details: Export `Aristo` wrappers * Activate `CoreDb` tests why: Now, API seems to be stable enough for general tests. * Update `toHex()` usage why: Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join` * Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify` why: + Different modules for different purposes + `aristo_serialise`: RLP encoding/decoding + `aristo_blobify`: Aristo database encoding/decoding * Compacted representation of small nodes' links instead of Keccak hashes why: Ethereum MPTs use Keccak hashes as node links if the size of an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded node value is used as a pseudo node link (rather than a hash.) Such a node is nor stored on key-value database. Rather the RLP encoded node value is stored instead of a lode link in a parent node instead. Only for the root hash, the top level node is always referred to by the hash. This feature needed an abstraction of the `HashKey` object which is now either a hash or a blob of length at most 31 bytes. This leaves two ways of representing an empty/void `HashKey` type, either as an empty blob of zero length, or the hash of an empty blob. * Update `CoreDb` interface (mainly reducing logger noise) * Fix copyright years (to make `Lint` happy)
2023-11-08 12:18:32 +00:00
.to(Hash256)
Aristo db api extensions for use as core db backend (#1754) * Update docu * Update Aristo/Kvt constructor prototype why: Previous version used an `enum` value to indicate what backend is to be used. This was replaced by using the backend object type. * Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]` why: Better code maintenance. Previously, the `Hike` object was returned. It had an internal error field so partial success was also available on a failure. This error field has been removed. * Use `openArray[byte]` rather than `Blob` in functions prototypes * Provide synchronised multi instance transactions why: The `CoreDB` object was geared towards the legacy DB which used a single transaction for the key-value backend DB. Different state roots are provided by the backend database, so all instances work directly on the same backend. Aristo db instances have different in-memory mappings (aka different state roots) and the transactions are on top of there mappings. So each instance might run different transactions. Multi instance transactions are a compromise to converge towards the legacy behaviour. The synchronised transactions span over all instances available at the time when base transaction was opened. Instances created later are unaffected. * Provide key-value pair database iterator why: Needed in `CoreDB` for `replicate()` emulation also: Some update of internal code * Extend API (i.e. prototype variants) why: Needed for `CoreDB` geared towards the legacy backend which has a more basic API than Aristo.
2023-09-15 16:23:53 +01:00
for (qid,filter) in db.backend.T.walkFifoBe: # walk in fifo order
if filter.src != lastTrg:
return err((VertexID(0),CheckBeFifoSrcTrgMismatch))
Aristo db update for short nodes key edge cases (#1887) * Aristo: Provide key-value list signature calculator detail: Simple wrappers around `Aristo` core functionality * Update new API for `CoreDb` details: + Renamed new API functions `contains()` => `hasKey()` or `hasPath()` which disables the `in` operator on non-boolean `contains()` functions + The functions `get()` and `fetch()` always return a not-found error if there is no item, available. The new functions `getOrEmpty()` and `mergeOrEmpty()` return an an empty `Blob` if there is no such key found. * Rewrite `core_apps.nim` using new API from `CoreDb` * Use `Aristo` functionality for calculating Merkle signatures details: For debugging, the `VerifyAristoForMerkleRootCalc` can be set so that `Aristo` results will be verified against the legacy versions. * Provide general interface for Merkle signing key-value tables details: Export `Aristo` wrappers * Activate `CoreDb` tests why: Now, API seems to be stable enough for general tests. * Update `toHex()` usage why: Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join` * Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify` why: + Different modules for different purposes + `aristo_serialise`: RLP encoding/decoding + `aristo_blobify`: Aristo database encoding/decoding * Compacted representation of small nodes' links instead of Keccak hashes why: Ethereum MPTs use Keccak hashes as node links if the size of an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded node value is used as a pseudo node link (rather than a hash.) Such a node is nor stored on key-value database. Rather the RLP encoded node value is stored instead of a lode link in a parent node instead. Only for the root hash, the top level node is always referred to by the hash. This feature needed an abstraction of the `HashKey` object which is now either a hash or a blob of length at most 31 bytes. This leaves two ways of representing an empty/void `HashKey` type, either as an empty blob of zero length, or the hash of an empty blob. * Update `CoreDb` interface (mainly reducing logger noise) * Fix copyright years (to make `Lint` happy)
2023-11-08 12:18:32 +00:00
if filter.trg != filter.kMap.getOrVoid(VertexID 1).to(Hash256):
return err((VertexID(1),CheckBeFifoTrgNotStateRoot))
lastTrg = filter.trg
# Check key table
var list: seq[VertexID]
for (vid,lbl) in db.layersWalkLabel:
list.add vid
let vtx = db.getVtx vid
if db.layersGetVtx(vid).isErr and not vtx.isValid:
return err((vid,CheckBeCacheKeyDangling))
if not lbl.isValid or relax:
continue
if not vtx.isValid:
return err((vid,CheckBeCacheVtxDangling))
let node = vtx.toNode(db).valueOr: # compile cache first
return err((vid,CheckBeCacheKeyCantCompile))
let expected = node.digestTo(HashKey)
if expected != lbl.key:
return err((vid,CheckBeCacheKeyMismatch))
# Check vGen
let
vGen = db.vGen.vidReorg.toHashSet
vGenExpected = vids.to(HashSet[VertexID])
delta = vGenExpected -+- vGen # symmetric difference
if 0 < delta.len:
if vGen == Vid2 and vGenExpected.len == 0:
# Fringe case when the database is empty
discard
elif vGen.len == 0 and vGenExpected == Vid2:
# Fringe case when there is a single root vertex only
discard
else:
Aristo db update for short nodes key edge cases (#1887) * Aristo: Provide key-value list signature calculator detail: Simple wrappers around `Aristo` core functionality * Update new API for `CoreDb` details: + Renamed new API functions `contains()` => `hasKey()` or `hasPath()` which disables the `in` operator on non-boolean `contains()` functions + The functions `get()` and `fetch()` always return a not-found error if there is no item, available. The new functions `getOrEmpty()` and `mergeOrEmpty()` return an an empty `Blob` if there is no such key found. * Rewrite `core_apps.nim` using new API from `CoreDb` * Use `Aristo` functionality for calculating Merkle signatures details: For debugging, the `VerifyAristoForMerkleRootCalc` can be set so that `Aristo` results will be verified against the legacy versions. * Provide general interface for Merkle signing key-value tables details: Export `Aristo` wrappers * Activate `CoreDb` tests why: Now, API seems to be stable enough for general tests. * Update `toHex()` usage why: Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join` * Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify` why: + Different modules for different purposes + `aristo_serialise`: RLP encoding/decoding + `aristo_blobify`: Aristo database encoding/decoding * Compacted representation of small nodes' links instead of Keccak hashes why: Ethereum MPTs use Keccak hashes as node links if the size of an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded node value is used as a pseudo node link (rather than a hash.) Such a node is nor stored on key-value database. Rather the RLP encoded node value is stored instead of a lode link in a parent node instead. Only for the root hash, the top level node is always referred to by the hash. This feature needed an abstraction of the `HashKey` object which is now either a hash or a blob of length at most 31 bytes. This leaves two ways of representing an empty/void `HashKey` type, either as an empty blob of zero length, or the hash of an empty blob. * Update `CoreDb` interface (mainly reducing logger noise) * Fix copyright years (to make `Lint` happy)
2023-11-08 12:18:32 +00:00
let delta = delta.toSeq
if delta.len != 1 or
delta[0] != VertexID(1) or VertexID(1) in vGen:
Aristo db update for short nodes key edge cases (#1887) * Aristo: Provide key-value list signature calculator detail: Simple wrappers around `Aristo` core functionality * Update new API for `CoreDb` details: + Renamed new API functions `contains()` => `hasKey()` or `hasPath()` which disables the `in` operator on non-boolean `contains()` functions + The functions `get()` and `fetch()` always return a not-found error if there is no item, available. The new functions `getOrEmpty()` and `mergeOrEmpty()` return an an empty `Blob` if there is no such key found. * Rewrite `core_apps.nim` using new API from `CoreDb` * Use `Aristo` functionality for calculating Merkle signatures details: For debugging, the `VerifyAristoForMerkleRootCalc` can be set so that `Aristo` results will be verified against the legacy versions. * Provide general interface for Merkle signing key-value tables details: Export `Aristo` wrappers * Activate `CoreDb` tests why: Now, API seems to be stable enough for general tests. * Update `toHex()` usage why: Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join` * Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify` why: + Different modules for different purposes + `aristo_serialise`: RLP encoding/decoding + `aristo_blobify`: Aristo database encoding/decoding * Compacted representation of small nodes' links instead of Keccak hashes why: Ethereum MPTs use Keccak hashes as node links if the size of an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded node value is used as a pseudo node link (rather than a hash.) Such a node is nor stored on key-value database. Rather the RLP encoded node value is stored instead of a lode link in a parent node instead. Only for the root hash, the top level node is always referred to by the hash. This feature needed an abstraction of the `HashKey` object which is now either a hash or a blob of length at most 31 bytes. This leaves two ways of representing an empty/void `HashKey` type, either as an empty blob of zero length, or the hash of an empty blob. * Update `CoreDb` interface (mainly reducing logger noise) * Fix copyright years (to make `Lint` happy)
2023-11-08 12:18:32 +00:00
return err((delta.sorted[^1],CheckBeCacheGarbledVGen))
ok()
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------