nimbus-eth1/tests/replay/pp_light.nim

227 lines
6.8 KiB
Nim
Raw Normal View History

# Nimbus
Core db and aristo updates for destructor and tx logic (#1894) * Disable `TransactionID` related functions from `state_db.nim` why: Functions `getCommittedStorage()` and `updateOriginalRoot()` from the `state_db` module are nowhere used. The emulation of a legacy `TransactionID` type functionality is administratively expensive to provide by `Aristo` (the legacy DB version is only partially implemented, anyway). As there is no other place where `TransactionID`s are used, they will not be provided by the `Aristo` variant of the `CoreDb`. For the legacy DB API, nothing will change. * Fix copyright headers in source code * Get rid of compiler warning * Update Aristo code, remove unused `merge()` variant, export `hashify()` why: Adapt to upcoming `CoreDb` wrapper * Remove synced tx feature from `Aristo` why: + This feature allowed to synchronise transaction methods like begin, commit, and rollback for a group of descriptors. + The feature is over engineered and not needed for `CoreDb`, neither is it complete (some convergence features missing.) * Add debugging helpers to `Kvt` also: Update database iterator, add count variable yield argument similar to `Aristo`. * Provide optional destructors for `CoreDb` API why; For the upcoming Aristo wrapper, this allows to control when certain smart destruction and update can take place. The auto destructor works fine in general when the storage/cache strategy is known and acceptable when creating descriptors. * Add update option for `CoreDb` API function `hash()` why; The hash function is typically used to get the state root of the MPT. Due to lazy hashing, this might be not available on the `Aristo` DB. So the `update` function asks for re-hashing the gurrent state changes if needed. * Update API tracking log mode: `info` => `debug * Use shared `Kvt` descriptor in new Ledger API why: No need to create a new descriptor all the time
2023-11-16 19:35:03 +00:00
# Copyright (c) 2022-2023 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed except
# according to those terms.
## Pretty printing, an alternative to `$` for debugging
## ----------------------------------------------------
##
## minimal dependencies, avoiding circular import
import
std/[sequtils, strformat, strutils, tables, times],
eth/common,
stew/byteutils,
../../nimbus/constants
export
sequtils, strformat, strutils
# ------------------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------------------
func reGroup(q: openArray[int]; itemsPerSegment = 16): seq[seq[int]] =
var top = 0
while top < q.len:
let w = top
top = min(w + itemsPerSegment, q.len)
result.add q[w ..< top]
# ------------------------------------------------------------------------------
# Public functions, units pretty printer
# ------------------------------------------------------------------------------
func ppUs*(elapsed: Duration): string =
result = $elapsed.inMicroseconds
let ns = elapsed.inNanoseconds mod 1_000 # fraction of a micro second
if ns != 0:
# to rounded deca milli seconds
let du = (ns + 5i64) div 10i64
result &= &".{du:02}"
result &= "us"
func ppMs*(elapsed: Duration): string =
2022-04-08 04:54:11 +00:00
result = $elapsed.inMilliseconds
let ns = elapsed.inNanoseconds mod 1_000_000 # fraction of a milli second
if ns != 0:
# to rounded deca milli seconds
let dm = (ns + 5_000i64) div 10_000i64
result &= &".{dm:02}"
result &= "ms"
func ppSecs*(elapsed: Duration): string =
result = $elapsed.inSeconds
let ns = elapsed.inNanoseconds mod 1_000_000_000 # fraction of a second
if ns != 0:
# round up
let ds = (ns + 5_000_000i64) div 10_000_000i64
result &= &".{ds:02}"
result &= "s"
func ppMins*(elapsed: Duration): string =
result = $elapsed.inMinutes
let ns = elapsed.inNanoseconds mod 60_000_000_000 # fraction of a minute
if ns != 0:
# round up
let dm = (ns + 500_000_000i64) div 1_000_000_000i64
result &= &":{dm:02}"
result &= "m"
func toKMG*[T](s: T): string =
func subst(s: var string; tag, new: string): bool =
if tag.len < s.len and s[s.len - tag.len ..< s.len] == tag:
s = s[0 ..< s.len - tag.len] & new
return true
result = $s
for w in [("000", "K"),("000K","M"),("000M","G"),("000G","T"),
("000T","P"),("000P","E"),("000E","Z"),("000Z","Y")]:
if not result.subst(w[0],w[1]):
return
func pp*(elapsed: Duration): string =
try:
if 0 < times.inMinutes(elapsed):
result = elapsed.ppMins
elif 0 < times.inSeconds(elapsed):
result = elapsed.ppSecs
elif 0 < times.inMilliSeconds(elapsed):
result = elapsed.ppMs
Core db aristo hasher profiling and timing improvement (#1938) * Explicitly use shared `Kvt` table on `Ledger` and `Clique` lookup. why: Speeds up lookup time with `Aristo` backend. For writing `Clique` data, the `Companion` model allows to write `Clique` data past the database locked by evm transactions. * Implement `CoreDb` profiling with API tracking why: Chasing time spent per APT procs ... * Implement `Ledger` profiling with API tracking why: Chasing time spent per APT procs ... * Always hashify when commiting or storing why: A dirty cache makes no sense when committing * Make sure that a zero key is created when adding/updating vertices why: This is an error fix mainly for edge cases. A typical error was that the root key got deleted when there were only a few vertices left on the DB. * Need all created and changed vertices zero-keyed on the cache why: A zero key (i.e. empty Merkle hash) indicates that a vertex key needs to be updated. This would not be needed immediately after a merge as there is an actual leaf path on the cache layer. But after subsequent merge and delete operations this information might get blurred. * Re-org hashing algorithm why: Apart from errors, the previous implementation was too slow for two reasons: + some control hashes were calculated for debugging (now all verification is done in `aristo_check` module) + the leaf paths stored on the cache are used to build the labelling (aka hashing) schedule; there paths were accumulated over successive hash sessions although it is clear that all keys were generated, already
2023-12-12 17:47:41 +00:00
elif 0 < times.inMicroSeconds(elapsed):
result = elapsed.ppUs
Core db aristo hasher profiling and timing improvement (#1938) * Explicitly use shared `Kvt` table on `Ledger` and `Clique` lookup. why: Speeds up lookup time with `Aristo` backend. For writing `Clique` data, the `Companion` model allows to write `Clique` data past the database locked by evm transactions. * Implement `CoreDb` profiling with API tracking why: Chasing time spent per APT procs ... * Implement `Ledger` profiling with API tracking why: Chasing time spent per APT procs ... * Always hashify when commiting or storing why: A dirty cache makes no sense when committing * Make sure that a zero key is created when adding/updating vertices why: This is an error fix mainly for edge cases. A typical error was that the root key got deleted when there were only a few vertices left on the DB. * Need all created and changed vertices zero-keyed on the cache why: A zero key (i.e. empty Merkle hash) indicates that a vertex key needs to be updated. This would not be needed immediately after a merge as there is an actual leaf path on the cache layer. But after subsequent merge and delete operations this information might get blurred. * Re-org hashing algorithm why: Apart from errors, the previous implementation was too slow for two reasons: + some control hashes were calculated for debugging (now all verification is done in `aristo_check` module) + the leaf paths stored on the cache are used to build the labelling (aka hashing) schedule; there paths were accumulated over successive hash sessions although it is clear that all keys were generated, already
2023-12-12 17:47:41 +00:00
else:
result = $elapsed.inNanoSeconds & "ns"
except ValueError:
result = $elapsed
# ------------------------------------------------------------------------------
# Public functions, pretty printer
# ------------------------------------------------------------------------------
func pp*(s: string; hex = false): string =
if hex:
let n = (s.len + 1) div 2
(if s.len < 20: s else: s[0 .. 5] & ".." & s[s.len-8 .. s.len-1]) &
"[" & (if 0 < n: "#" & $n else: "") & "]"
elif s.len <= 30:
s
else:
(if (s.len and 1) == 0: s[0 ..< 8] else: "0" & s[0 ..< 7]) &
"..(" & $s.len & ").." & s[s.len-16 ..< s.len]
func pp*(q: openArray[int]; itemsPerLine: int; lineSep: string): string =
doAssert q == q.reGroup(itemsPerLine).concat
q.reGroup(itemsPerLine)
.mapIt(it.mapIt(&"0x{it:02x}").join(", "))
.join("," & lineSep)
func pp*(a: MDigest[256]; collapse = true): string =
if not collapse:
Aristo db update for short nodes key edge cases (#1887) * Aristo: Provide key-value list signature calculator detail: Simple wrappers around `Aristo` core functionality * Update new API for `CoreDb` details: + Renamed new API functions `contains()` => `hasKey()` or `hasPath()` which disables the `in` operator on non-boolean `contains()` functions + The functions `get()` and `fetch()` always return a not-found error if there is no item, available. The new functions `getOrEmpty()` and `mergeOrEmpty()` return an an empty `Blob` if there is no such key found. * Rewrite `core_apps.nim` using new API from `CoreDb` * Use `Aristo` functionality for calculating Merkle signatures details: For debugging, the `VerifyAristoForMerkleRootCalc` can be set so that `Aristo` results will be verified against the legacy versions. * Provide general interface for Merkle signing key-value tables details: Export `Aristo` wrappers * Activate `CoreDb` tests why: Now, API seems to be stable enough for general tests. * Update `toHex()` usage why: Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join` * Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify` why: + Different modules for different purposes + `aristo_serialise`: RLP encoding/decoding + `aristo_blobify`: Aristo database encoding/decoding * Compacted representation of small nodes' links instead of Keccak hashes why: Ethereum MPTs use Keccak hashes as node links if the size of an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded node value is used as a pseudo node link (rather than a hash.) Such a node is nor stored on key-value database. Rather the RLP encoded node value is stored instead of a lode link in a parent node instead. Only for the root hash, the top level node is always referred to by the hash. This feature needed an abstraction of the `HashKey` object which is now either a hash or a blob of length at most 31 bytes. This leaves two ways of representing an empty/void `HashKey` type, either as an empty blob of zero length, or the hash of an empty blob. * Update `CoreDb` interface (mainly reducing logger noise) * Fix copyright years (to make `Lint` happy)
2023-11-08 12:18:32 +00:00
a.data.toHex
elif a == ZERO_HASH256:
"ZERO_HASH256"
elif a == EMPTY_ROOT_HASH:
"EMPTY_ROOT_HASH"
elif a == EMPTY_UNCLE_HASH:
"EMPTY_UNCLE_HASH"
elif a == EMPTY_SHA3:
"EMPTY_SHA3"
elif a == ZERO_HASH256:
"ZERO_HASH256"
else:
Aristo db update for short nodes key edge cases (#1887) * Aristo: Provide key-value list signature calculator detail: Simple wrappers around `Aristo` core functionality * Update new API for `CoreDb` details: + Renamed new API functions `contains()` => `hasKey()` or `hasPath()` which disables the `in` operator on non-boolean `contains()` functions + The functions `get()` and `fetch()` always return a not-found error if there is no item, available. The new functions `getOrEmpty()` and `mergeOrEmpty()` return an an empty `Blob` if there is no such key found. * Rewrite `core_apps.nim` using new API from `CoreDb` * Use `Aristo` functionality for calculating Merkle signatures details: For debugging, the `VerifyAristoForMerkleRootCalc` can be set so that `Aristo` results will be verified against the legacy versions. * Provide general interface for Merkle signing key-value tables details: Export `Aristo` wrappers * Activate `CoreDb` tests why: Now, API seems to be stable enough for general tests. * Update `toHex()` usage why: Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join` * Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify` why: + Different modules for different purposes + `aristo_serialise`: RLP encoding/decoding + `aristo_blobify`: Aristo database encoding/decoding * Compacted representation of small nodes' links instead of Keccak hashes why: Ethereum MPTs use Keccak hashes as node links if the size of an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded node value is used as a pseudo node link (rather than a hash.) Such a node is nor stored on key-value database. Rather the RLP encoded node value is stored instead of a lode link in a parent node instead. Only for the root hash, the top level node is always referred to by the hash. This feature needed an abstraction of the `HashKey` object which is now either a hash or a blob of length at most 31 bytes. This leaves two ways of representing an empty/void `HashKey` type, either as an empty blob of zero length, or the hash of an empty blob. * Update `CoreDb` interface (mainly reducing logger noise) * Fix copyright years (to make `Lint` happy)
2023-11-08 12:18:32 +00:00
a.data.toHex.join[56 .. 63]
func pp*(a: openArray[MDigest[256]]; collapse = true): string =
"@[" & a.toSeq.mapIt(it.pp).join(" ") & "]"
func pp*(q: openArray[int]; itemsPerLine: int; indent: int): string =
q.pp(itemsPerLine = itemsPerLine, lineSep = "\n" & " ".repeat(max(1,indent)))
func pp*(q: openArray[byte]; noHash = false): string =
if q.len == 32 and not noHash:
var a: array[32,byte]
for n in 0..31: a[n] = q[n]
MDigest[256](data: a).pp
else:
Aristo db update for short nodes key edge cases (#1887) * Aristo: Provide key-value list signature calculator detail: Simple wrappers around `Aristo` core functionality * Update new API for `CoreDb` details: + Renamed new API functions `contains()` => `hasKey()` or `hasPath()` which disables the `in` operator on non-boolean `contains()` functions + The functions `get()` and `fetch()` always return a not-found error if there is no item, available. The new functions `getOrEmpty()` and `mergeOrEmpty()` return an an empty `Blob` if there is no such key found. * Rewrite `core_apps.nim` using new API from `CoreDb` * Use `Aristo` functionality for calculating Merkle signatures details: For debugging, the `VerifyAristoForMerkleRootCalc` can be set so that `Aristo` results will be verified against the legacy versions. * Provide general interface for Merkle signing key-value tables details: Export `Aristo` wrappers * Activate `CoreDb` tests why: Now, API seems to be stable enough for general tests. * Update `toHex()` usage why: Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join` * Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify` why: + Different modules for different purposes + `aristo_serialise`: RLP encoding/decoding + `aristo_blobify`: Aristo database encoding/decoding * Compacted representation of small nodes' links instead of Keccak hashes why: Ethereum MPTs use Keccak hashes as node links if the size of an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded node value is used as a pseudo node link (rather than a hash.) Such a node is nor stored on key-value database. Rather the RLP encoded node value is stored instead of a lode link in a parent node instead. Only for the root hash, the top level node is always referred to by the hash. This feature needed an abstraction of the `HashKey` object which is now either a hash or a blob of length at most 31 bytes. This leaves two ways of representing an empty/void `HashKey` type, either as an empty blob of zero length, or the hash of an empty blob. * Update `CoreDb` interface (mainly reducing logger noise) * Fix copyright years (to make `Lint` happy)
2023-11-08 12:18:32 +00:00
q.toHex.pp(hex = true)
# ------------------------------------------------------------------------------
# Elapsed time pretty printer
# ------------------------------------------------------------------------------
template showElapsed*(noisy: bool; info: string; code: untyped) =
block:
Core db aristo hasher profiling and timing improvement (#1938) * Explicitly use shared `Kvt` table on `Ledger` and `Clique` lookup. why: Speeds up lookup time with `Aristo` backend. For writing `Clique` data, the `Companion` model allows to write `Clique` data past the database locked by evm transactions. * Implement `CoreDb` profiling with API tracking why: Chasing time spent per APT procs ... * Implement `Ledger` profiling with API tracking why: Chasing time spent per APT procs ... * Always hashify when commiting or storing why: A dirty cache makes no sense when committing * Make sure that a zero key is created when adding/updating vertices why: This is an error fix mainly for edge cases. A typical error was that the root key got deleted when there were only a few vertices left on the DB. * Need all created and changed vertices zero-keyed on the cache why: A zero key (i.e. empty Merkle hash) indicates that a vertex key needs to be updated. This would not be needed immediately after a merge as there is an actual leaf path on the cache layer. But after subsequent merge and delete operations this information might get blurred. * Re-org hashing algorithm why: Apart from errors, the previous implementation was too slow for two reasons: + some control hashes were calculated for debugging (now all verification is done in `aristo_check` module) + the leaf paths stored on the cache are used to build the labelling (aka hashing) schedule; there paths were accumulated over successive hash sessions although it is clear that all keys were generated, already
2023-12-12 17:47:41 +00:00
let seStartTrackling = getTime()
block:
code
if noisy:
Core db aristo hasher profiling and timing improvement (#1938) * Explicitly use shared `Kvt` table on `Ledger` and `Clique` lookup. why: Speeds up lookup time with `Aristo` backend. For writing `Clique` data, the `Companion` model allows to write `Clique` data past the database locked by evm transactions. * Implement `CoreDb` profiling with API tracking why: Chasing time spent per APT procs ... * Implement `Ledger` profiling with API tracking why: Chasing time spent per APT procs ... * Always hashify when commiting or storing why: A dirty cache makes no sense when committing * Make sure that a zero key is created when adding/updating vertices why: This is an error fix mainly for edge cases. A typical error was that the root key got deleted when there were only a few vertices left on the DB. * Need all created and changed vertices zero-keyed on the cache why: A zero key (i.e. empty Merkle hash) indicates that a vertex key needs to be updated. This would not be needed immediately after a merge as there is an actual leaf path on the cache layer. But after subsequent merge and delete operations this information might get blurred. * Re-org hashing algorithm why: Apart from errors, the previous implementation was too slow for two reasons: + some control hashes were calculated for debugging (now all verification is done in `aristo_check` module) + the leaf paths stored on the cache are used to build the labelling (aka hashing) schedule; there paths were accumulated over successive hash sessions although it is clear that all keys were generated, already
2023-12-12 17:47:41 +00:00
let elpd {.inject.} = getTime() - seStartTrackling
if 0 < times.inSeconds(elpd):
echo "*** ", info, &": {elpd.ppSecs:>4}"
else:
echo "*** ", info, &": {elpd.ppMs:>4}"
template showElapsed*(
noisy: bool;
info: string;
elapsed: Duration;
code: untyped) =
block:
Core db aristo hasher profiling and timing improvement (#1938) * Explicitly use shared `Kvt` table on `Ledger` and `Clique` lookup. why: Speeds up lookup time with `Aristo` backend. For writing `Clique` data, the `Companion` model allows to write `Clique` data past the database locked by evm transactions. * Implement `CoreDb` profiling with API tracking why: Chasing time spent per APT procs ... * Implement `Ledger` profiling with API tracking why: Chasing time spent per APT procs ... * Always hashify when commiting or storing why: A dirty cache makes no sense when committing * Make sure that a zero key is created when adding/updating vertices why: This is an error fix mainly for edge cases. A typical error was that the root key got deleted when there were only a few vertices left on the DB. * Need all created and changed vertices zero-keyed on the cache why: A zero key (i.e. empty Merkle hash) indicates that a vertex key needs to be updated. This would not be needed immediately after a merge as there is an actual leaf path on the cache layer. But after subsequent merge and delete operations this information might get blurred. * Re-org hashing algorithm why: Apart from errors, the previous implementation was too slow for two reasons: + some control hashes were calculated for debugging (now all verification is done in `aristo_check` module) + the leaf paths stored on the cache are used to build the labelling (aka hashing) schedule; there paths were accumulated over successive hash sessions although it is clear that all keys were generated, already
2023-12-12 17:47:41 +00:00
let seStartTrackling = getTime()
block:
code
block:
Core db aristo hasher profiling and timing improvement (#1938) * Explicitly use shared `Kvt` table on `Ledger` and `Clique` lookup. why: Speeds up lookup time with `Aristo` backend. For writing `Clique` data, the `Companion` model allows to write `Clique` data past the database locked by evm transactions. * Implement `CoreDb` profiling with API tracking why: Chasing time spent per APT procs ... * Implement `Ledger` profiling with API tracking why: Chasing time spent per APT procs ... * Always hashify when commiting or storing why: A dirty cache makes no sense when committing * Make sure that a zero key is created when adding/updating vertices why: This is an error fix mainly for edge cases. A typical error was that the root key got deleted when there were only a few vertices left on the DB. * Need all created and changed vertices zero-keyed on the cache why: A zero key (i.e. empty Merkle hash) indicates that a vertex key needs to be updated. This would not be needed immediately after a merge as there is an actual leaf path on the cache layer. But after subsequent merge and delete operations this information might get blurred. * Re-org hashing algorithm why: Apart from errors, the previous implementation was too slow for two reasons: + some control hashes were calculated for debugging (now all verification is done in `aristo_check` module) + the leaf paths stored on the cache are used to build the labelling (aka hashing) schedule; there paths were accumulated over successive hash sessions although it is clear that all keys were generated, already
2023-12-12 17:47:41 +00:00
elapsed = getTime() - seStartTrackling
if noisy:
let elpd {.inject.} = elapsed
if 0 < times.inSeconds(elpd):
echo "*** ", info, &": {elpd.ppSecs:>4}"
else:
echo "*** ", info, &": {elpd.ppMs:>4}"
Core db aristo hasher profiling and timing improvement (#1938) * Explicitly use shared `Kvt` table on `Ledger` and `Clique` lookup. why: Speeds up lookup time with `Aristo` backend. For writing `Clique` data, the `Companion` model allows to write `Clique` data past the database locked by evm transactions. * Implement `CoreDb` profiling with API tracking why: Chasing time spent per APT procs ... * Implement `Ledger` profiling with API tracking why: Chasing time spent per APT procs ... * Always hashify when commiting or storing why: A dirty cache makes no sense when committing * Make sure that a zero key is created when adding/updating vertices why: This is an error fix mainly for edge cases. A typical error was that the root key got deleted when there were only a few vertices left on the DB. * Need all created and changed vertices zero-keyed on the cache why: A zero key (i.e. empty Merkle hash) indicates that a vertex key needs to be updated. This would not be needed immediately after a merge as there is an actual leaf path on the cache layer. But after subsequent merge and delete operations this information might get blurred. * Re-org hashing algorithm why: Apart from errors, the previous implementation was too slow for two reasons: + some control hashes were calculated for debugging (now all verification is done in `aristo_check` module) + the leaf paths stored on the cache are used to build the labelling (aka hashing) schedule; there paths were accumulated over successive hash sessions although it is clear that all keys were generated, already
2023-12-12 17:47:41 +00:00
template profileSection*(
noisy: bool;
info: string;
state: var (Duration, int);
suspend: bool;
code: untyped) =
block:
let psStartProfiling = getTime()
block:
code
if not suspend:
let elpd = getTime() - psStartProfiling
if noisy:
echo "*** ", info, ": ", elpd.pp
state[0] += elpd
state[1].inc
template profileSection*(
noisy: bool;
info: string;
state: var (Duration, int);
code: untyped) =
noisy.profileSection(info, state, suspend=false):
code
template catchException*(info: string; trace: bool; code: untyped) =
block:
try:
code
except CatchableError as e:
if trace:
echo "*** ", info, ": exception ", e.name, "(", e.msg, ")"
echo " ", e.getStackTrace.strip.replace("\n","\n ")
template catchException*(info: string; code: untyped) =
catchException(info, false, code)
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------