615 lines
17 KiB
Nim
Raw Normal View History

Core db and aristo updates for destructor and tx logic (#1894) * Disable `TransactionID` related functions from `state_db.nim` why: Functions `getCommittedStorage()` and `updateOriginalRoot()` from the `state_db` module are nowhere used. The emulation of a legacy `TransactionID` type functionality is administratively expensive to provide by `Aristo` (the legacy DB version is only partially implemented, anyway). As there is no other place where `TransactionID`s are used, they will not be provided by the `Aristo` variant of the `CoreDb`. For the legacy DB API, nothing will change. * Fix copyright headers in source code * Get rid of compiler warning * Update Aristo code, remove unused `merge()` variant, export `hashify()` why: Adapt to upcoming `CoreDb` wrapper * Remove synced tx feature from `Aristo` why: + This feature allowed to synchronise transaction methods like begin, commit, and rollback for a group of descriptors. + The feature is over engineered and not needed for `CoreDb`, neither is it complete (some convergence features missing.) * Add debugging helpers to `Kvt` also: Update database iterator, add count variable yield argument similar to `Aristo`. * Provide optional destructors for `CoreDb` API why; For the upcoming Aristo wrapper, this allows to control when certain smart destruction and update can take place. The auto destructor works fine in general when the storage/cache strategy is known and acceptable when creating descriptors. * Add update option for `CoreDb` API function `hash()` why; The hash function is typically used to get the state root of the MPT. Due to lazy hashing, this might be not available on the `Aristo` DB. So the `update` function asks for re-hashing the gurrent state changes if needed. * Update API tracking log mode: `info` => `debug * Use shared `Kvt` descriptor in new Ledger API why: No need to create a new descriptor all the time
2023-11-16 19:35:03 +00:00
# Nimbus
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or
# distributed except according to those terms.
## Aristo (aka Patricia) DB records transaction based merge test
import
std/[algorithm, bitops, sequtils, sets, tables],
eth/common,
results,
unittest2,
stew/endians2,
../../nimbus/db/opts,
../../nimbus/db/aristo/[
aristo_check,
aristo_debug,
aristo_delete,
aristo_desc,
aristo_get,
aristo_hike,
aristo_init/persistent,
aristo_layers,
aristo_merge,
aristo_nearby,
aristo_tx],
../replay/xcheck,
./test_helpers
type
PrngDesc = object
prng: uint32 ## random state
KnownHasherFailure* = seq[(string,(int,AristoError))]
## (<sample-name> & "#" <instance>, (<vertex-id>,<error-symbol>))
const
MaxFilterBulk = 150_000
## Policy settig for `pack()`
# ------------------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------------------
proc posixPrngRand(state: var uint32): byte =
## POSIX.1-2001 example of a rand() implementation, see manual page rand(3).
state = state * 1103515245 + 12345;
let val = (state shr 16) and 32767 # mod 2^31
(val shr 8).byte # Extract second byte
proc rand[W: SomeInteger|VertexID](ap: var PrngDesc; T: type W): T =
var a: array[sizeof T,byte]
for n in 0 ..< sizeof T:
a[n] = ap.prng.posixPrngRand().byte
when sizeof(T) == 1:
let w = uint8.fromBytesBE(a).T
when sizeof(T) == 2:
let w = uint16.fromBytesBE(a).T
when sizeof(T) == 4:
let w = uint32.fromBytesBE(a).T
else:
let w = uint64.fromBytesBE(a).T
when T is SomeUnsignedInt:
# That way, `fromBytesBE()` can be applied to `uint`
result = w
else:
# That way the result is independent of endianness
(addr result).copyMem(unsafeAddr w, sizeof w)
proc init(T: type PrngDesc; seed: int): PrngDesc =
result.prng = (seed and 0x7fffffff).uint32
proc rand(td: var PrngDesc; top: int): int =
if 0 < top:
let mask = (1 shl (8 * sizeof(int) - top.countLeadingZeroBits)) - 1
for _ in 0 ..< 100:
let w = mask and td.rand(typeof(result))
if w < top:
return w
raiseAssert "Not here (!)"
# -----------------------
proc randomisedLeafs(
db: AristoDbRef;
ltys: HashSet[LeafTie];
td: var PrngDesc;
): Result[seq[(LeafTie,VertexID)],(VertexID,AristoError)] =
var lvp: seq[(LeafTie,VertexID)]
for lty in ltys:
let hike = lty.hikeUp(db).valueOr:
return err((error[0],error[1]))
lvp.add (lty,hike.legs[^1].wp.vid)
var lvp2 = lvp.sorted(
cmp = proc(a,b: (LeafTie,VertexID)): int = cmp(a[0],b[0]))
if 2 < lvp2.len:
for n in 0 ..< lvp2.len-1:
let r = n + td.rand(lvp2.len - n)
lvp2[n].swap lvp2[r]
ok lvp2
proc innerCleanUp(db: var AristoDbRef): bool {.discardable.} =
## Defer action
if not db.isNil:
let rx = db.txTop()
if rx.isOk:
let rc = rx.value.collapse(commit=false)
xCheckRc rc.error == 0
db.finish(flush=true)
db = AristoDbRef(nil)
proc schedStow(
db: AristoDbRef; # Database
chunkedMpt = false; # Partial data (e.g. from `snap`)
): Result[void,AristoError] =
## Scheduled storage
let
layersMeter = db.nLayersVtx() + db.nLayersKey()
filterMeter = if db.balancer.isNil: 0
else: db.balancer.sTab.len + db.balancer.kMap.len
persistent = MaxFilterBulk < max(layersMeter, filterMeter)
if persistent:
db.persist(chunkedMpt=chunkedMpt)
else:
db.stow(chunkedMpt=chunkedMpt)
proc saveToBackend(
tx: var AristoTxRef;
chunkedMpt: bool;
relax: bool;
noisy: bool;
debugID: int;
): bool =
var db = tx.to(AristoDbRef)
# Verify context: nesting level must be 2 (i.e. two transactions)
xCheck tx.level == 2
block:
Core db aristo hasher profiling and timing improvement (#1938) * Explicitly use shared `Kvt` table on `Ledger` and `Clique` lookup. why: Speeds up lookup time with `Aristo` backend. For writing `Clique` data, the `Companion` model allows to write `Clique` data past the database locked by evm transactions. * Implement `CoreDb` profiling with API tracking why: Chasing time spent per APT procs ... * Implement `Ledger` profiling with API tracking why: Chasing time spent per APT procs ... * Always hashify when commiting or storing why: A dirty cache makes no sense when committing * Make sure that a zero key is created when adding/updating vertices why: This is an error fix mainly for edge cases. A typical error was that the root key got deleted when there were only a few vertices left on the DB. * Need all created and changed vertices zero-keyed on the cache why: A zero key (i.e. empty Merkle hash) indicates that a vertex key needs to be updated. This would not be needed immediately after a merge as there is an actual leaf path on the cache layer. But after subsequent merge and delete operations this information might get blurred. * Re-org hashing algorithm why: Apart from errors, the previous implementation was too slow for two reasons: + some control hashes were calculated for debugging (now all verification is done in `aristo_check` module) + the leaf paths stored on the cache are used to build the labelling (aka hashing) schedule; there paths were accumulated over successive hash sessions although it is clear that all keys were generated, already
2023-12-12 17:47:41 +00:00
let rc = db.checkTop()
xCheckRc rc.error == (0,0)
# Commit and hashify the current layer
block:
let rc = tx.commit()
Aristo db api extensions for use as core db backend (#1754) * Update docu * Update Aristo/Kvt constructor prototype why: Previous version used an `enum` value to indicate what backend is to be used. This was replaced by using the backend object type. * Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]` why: Better code maintenance. Previously, the `Hike` object was returned. It had an internal error field so partial success was also available on a failure. This error field has been removed. * Use `openArray[byte]` rather than `Blob` in functions prototypes * Provide synchronised multi instance transactions why: The `CoreDB` object was geared towards the legacy DB which used a single transaction for the key-value backend DB. Different state roots are provided by the backend database, so all instances work directly on the same backend. Aristo db instances have different in-memory mappings (aka different state roots) and the transactions are on top of there mappings. So each instance might run different transactions. Multi instance transactions are a compromise to converge towards the legacy behaviour. The synchronised transactions span over all instances available at the time when base transaction was opened. Instances created later are unaffected. * Provide key-value pair database iterator why: Needed in `CoreDB` for `replicate()` emulation also: Some update of internal code * Extend API (i.e. prototype variants) why: Needed for `CoreDB` geared towards the legacy backend which has a more basic API than Aristo.
2023-09-15 16:23:53 +01:00
xCheckRc rc.error == 0
# Make sure MPT hashes are OK
xCheck db.dirty.len == 0
block:
let rc = db.txTop()
xCheckRc rc.error == 0
tx = rc.value
# Verify context: nesting level must be 1 (i.e. one transaction)
xCheck tx.level == 1
block:
let rc = db.checkBE(relax=true)
xCheckRc rc.error == (0,0)
# Commit and save to backend
block:
let rc = tx.commit()
Aristo db api extensions for use as core db backend (#1754) * Update docu * Update Aristo/Kvt constructor prototype why: Previous version used an `enum` value to indicate what backend is to be used. This was replaced by using the backend object type. * Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]` why: Better code maintenance. Previously, the `Hike` object was returned. It had an internal error field so partial success was also available on a failure. This error field has been removed. * Use `openArray[byte]` rather than `Blob` in functions prototypes * Provide synchronised multi instance transactions why: The `CoreDB` object was geared towards the legacy DB which used a single transaction for the key-value backend DB. Different state roots are provided by the backend database, so all instances work directly on the same backend. Aristo db instances have different in-memory mappings (aka different state roots) and the transactions are on top of there mappings. So each instance might run different transactions. Multi instance transactions are a compromise to converge towards the legacy behaviour. The synchronised transactions span over all instances available at the time when base transaction was opened. Instances created later are unaffected. * Provide key-value pair database iterator why: Needed in `CoreDB` for `replicate()` emulation also: Some update of internal code * Extend API (i.e. prototype variants) why: Needed for `CoreDB` geared towards the legacy backend which has a more basic API than Aristo.
2023-09-15 16:23:53 +01:00
xCheckRc rc.error == 0
# Make sure MPT hashes are OK
xCheck db.dirty.len == 0
block:
let rc = db.txTop()
xCheckErr rc.value.level < 0 # force error
block:
let rc = db.schedStow(chunkedMpt=chunkedMpt)
Aristo db api extensions for use as core db backend (#1754) * Update docu * Update Aristo/Kvt constructor prototype why: Previous version used an `enum` value to indicate what backend is to be used. This was replaced by using the backend object type. * Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]` why: Better code maintenance. Previously, the `Hike` object was returned. It had an internal error field so partial success was also available on a failure. This error field has been removed. * Use `openArray[byte]` rather than `Blob` in functions prototypes * Provide synchronised multi instance transactions why: The `CoreDB` object was geared towards the legacy DB which used a single transaction for the key-value backend DB. Different state roots are provided by the backend database, so all instances work directly on the same backend. Aristo db instances have different in-memory mappings (aka different state roots) and the transactions are on top of there mappings. So each instance might run different transactions. Multi instance transactions are a compromise to converge towards the legacy behaviour. The synchronised transactions span over all instances available at the time when base transaction was opened. Instances created later are unaffected. * Provide key-value pair database iterator why: Needed in `CoreDB` for `replicate()` emulation also: Some update of internal code * Extend API (i.e. prototype variants) why: Needed for `CoreDB` geared towards the legacy backend which has a more basic API than Aristo.
2023-09-15 16:23:53 +01:00
xCheckRc rc.error == 0
block:
let rc = db.checkBE(relax=relax)
xCheckRc rc.error == (0,0):
noisy.say "***", "saveToBackend (8)", " debugID=", debugID
# Update layers to original level
tx = db.txBegin().value.to(AristoDbRef).txBegin().value
true
proc saveToBackendWithOops(
tx: var AristoTxRef;
chunkedMpt: bool;
noisy: bool;
debugID: int;
oops: (int,AristoError);
): bool =
var db = tx.to(AristoDbRef)
# Verify context: nesting level must be 2 (i.e. two transactions)
xCheck tx.level == 2
# Commit and hashify the current layer
block:
let rc = tx.commit()
Aristo db api extensions for use as core db backend (#1754) * Update docu * Update Aristo/Kvt constructor prototype why: Previous version used an `enum` value to indicate what backend is to be used. This was replaced by using the backend object type. * Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]` why: Better code maintenance. Previously, the `Hike` object was returned. It had an internal error field so partial success was also available on a failure. This error field has been removed. * Use `openArray[byte]` rather than `Blob` in functions prototypes * Provide synchronised multi instance transactions why: The `CoreDB` object was geared towards the legacy DB which used a single transaction for the key-value backend DB. Different state roots are provided by the backend database, so all instances work directly on the same backend. Aristo db instances have different in-memory mappings (aka different state roots) and the transactions are on top of there mappings. So each instance might run different transactions. Multi instance transactions are a compromise to converge towards the legacy behaviour. The synchronised transactions span over all instances available at the time when base transaction was opened. Instances created later are unaffected. * Provide key-value pair database iterator why: Needed in `CoreDB` for `replicate()` emulation also: Some update of internal code * Extend API (i.e. prototype variants) why: Needed for `CoreDB` geared towards the legacy backend which has a more basic API than Aristo.
2023-09-15 16:23:53 +01:00
xCheckRc rc.error == 0
# Make sure MPT hashes are OK
xCheck db.dirty.len == 0
block:
let rc = db.txTop()
xCheckRc rc.error == 0
tx = rc.value
# Verify context: nesting level must be 1 (i.e. one transaction)
xCheck tx.level == 1
# Commit and save to backend
block:
let rc = tx.commit()
Aristo db api extensions for use as core db backend (#1754) * Update docu * Update Aristo/Kvt constructor prototype why: Previous version used an `enum` value to indicate what backend is to be used. This was replaced by using the backend object type. * Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]` why: Better code maintenance. Previously, the `Hike` object was returned. It had an internal error field so partial success was also available on a failure. This error field has been removed. * Use `openArray[byte]` rather than `Blob` in functions prototypes * Provide synchronised multi instance transactions why: The `CoreDB` object was geared towards the legacy DB which used a single transaction for the key-value backend DB. Different state roots are provided by the backend database, so all instances work directly on the same backend. Aristo db instances have different in-memory mappings (aka different state roots) and the transactions are on top of there mappings. So each instance might run different transactions. Multi instance transactions are a compromise to converge towards the legacy behaviour. The synchronised transactions span over all instances available at the time when base transaction was opened. Instances created later are unaffected. * Provide key-value pair database iterator why: Needed in `CoreDB` for `replicate()` emulation also: Some update of internal code * Extend API (i.e. prototype variants) why: Needed for `CoreDB` geared towards the legacy backend which has a more basic API than Aristo.
2023-09-15 16:23:53 +01:00
xCheckRc rc.error == 0
# Make sure MPT hashes are OK
xCheck db.dirty.len == 0
block:
let rc = db.txTop()
xCheckErr rc.value.level < 0 # force error
block:
let rc = db.schedStow(chunkedMpt=chunkedMpt)
Aristo db api extensions for use as core db backend (#1754) * Update docu * Update Aristo/Kvt constructor prototype why: Previous version used an `enum` value to indicate what backend is to be used. This was replaced by using the backend object type. * Rewrite `hikeUp()` return code into `Result[Hike,(Hike,AristoError)]` why: Better code maintenance. Previously, the `Hike` object was returned. It had an internal error field so partial success was also available on a failure. This error field has been removed. * Use `openArray[byte]` rather than `Blob` in functions prototypes * Provide synchronised multi instance transactions why: The `CoreDB` object was geared towards the legacy DB which used a single transaction for the key-value backend DB. Different state roots are provided by the backend database, so all instances work directly on the same backend. Aristo db instances have different in-memory mappings (aka different state roots) and the transactions are on top of there mappings. So each instance might run different transactions. Multi instance transactions are a compromise to converge towards the legacy behaviour. The synchronised transactions span over all instances available at the time when base transaction was opened. Instances created later are unaffected. * Provide key-value pair database iterator why: Needed in `CoreDB` for `replicate()` emulation also: Some update of internal code * Extend API (i.e. prototype variants) why: Needed for `CoreDB` geared towards the legacy backend which has a more basic API than Aristo.
2023-09-15 16:23:53 +01:00
xCheckRc rc.error == 0
# Update layers to original level
tx = db.txBegin().value.to(AristoDbRef).txBegin().value
true
proc fwdWalkVerify(
db: AristoDbRef;
root: VertexID;
leftOver: HashSet[LeafTie];
noisy: bool;
debugID: int;
): bool =
let
nLeafs = leftOver.len
var
leftOver = leftOver
last = LeafTie()
n = 0
for (key,_) in db.rightPairs low(LeafTie,root):
xCheck key in leftOver:
noisy.say "*** fwdWalkVerify", "id=", n + (nLeafs + 1) * debugID
leftOver.excl key
last = key
n.inc
# Verify stop condition
if last.root == VertexID(0):
last = low(LeafTie,root)
elif last != high(LeafTie,root):
last = last.next
let rc = last.right db
xCheck rc.isErr
xCheck rc.error[1] == NearbyBeyondRange
xCheck n == nLeafs
true
proc revWalkVerify(
db: AristoDbRef;
root: VertexID;
leftOver: HashSet[LeafTie];
noisy: bool;
debugID: int;
): bool =
let
nLeafs = leftOver.len
var
leftOver = leftOver
last = LeafTie()
n = 0
for (key,_) in db.leftPairs high(LeafTie,root):
xCheck key in leftOver:
noisy.say "*** revWalkVerify", " id=", n + (nLeafs + 1) * debugID
leftOver.excl key
last = key
n.inc
# Verify stop condition
if last.root == VertexID(0):
last = high(LeafTie,root)
elif last != low(LeafTie,root):
last = last.prev
let rc = last.left db
xCheck rc.isErr
xCheck rc.error[1] == NearbyBeyondRange
xCheck n == nLeafs
true
Core db and aristo updates for destructor and tx logic (#1894) * Disable `TransactionID` related functions from `state_db.nim` why: Functions `getCommittedStorage()` and `updateOriginalRoot()` from the `state_db` module are nowhere used. The emulation of a legacy `TransactionID` type functionality is administratively expensive to provide by `Aristo` (the legacy DB version is only partially implemented, anyway). As there is no other place where `TransactionID`s are used, they will not be provided by the `Aristo` variant of the `CoreDb`. For the legacy DB API, nothing will change. * Fix copyright headers in source code * Get rid of compiler warning * Update Aristo code, remove unused `merge()` variant, export `hashify()` why: Adapt to upcoming `CoreDb` wrapper * Remove synced tx feature from `Aristo` why: + This feature allowed to synchronise transaction methods like begin, commit, and rollback for a group of descriptors. + The feature is over engineered and not needed for `CoreDb`, neither is it complete (some convergence features missing.) * Add debugging helpers to `Kvt` also: Update database iterator, add count variable yield argument similar to `Aristo`. * Provide optional destructors for `CoreDb` API why; For the upcoming Aristo wrapper, this allows to control when certain smart destruction and update can take place. The auto destructor works fine in general when the storage/cache strategy is known and acceptable when creating descriptors. * Add update option for `CoreDb` API function `hash()` why; The hash function is typically used to get the state root of the MPT. Due to lazy hashing, this might be not available on the `Aristo` DB. So the `update` function asks for re-hashing the gurrent state changes if needed. * Update API tracking log mode: `info` => `debug * Use shared `Kvt` descriptor in new Ledger API why: No need to create a new descriptor all the time
2023-11-16 19:35:03 +00:00
proc mergeRlpData*(
db: AristoDbRef; # Database, top layer
path: PathID; # Path into database
rlpData: openArray[byte]; # RLP encoded payload data
): Result[void,AristoError] =
block body:
discard db.mergeLeaf(
LeafTiePayload(
leafTie: LeafTie(
root: VertexID(1),
path: path.normal),
payload: PayloadRef(
pType: RlpData,
rlpBlob: @rlpData))).valueOr:
if error in {MergeLeafPathCachedAlready,MergeLeafPathOnBackendAlready}:
break body
return err(error)
Core db and aristo updates for destructor and tx logic (#1894) * Disable `TransactionID` related functions from `state_db.nim` why: Functions `getCommittedStorage()` and `updateOriginalRoot()` from the `state_db` module are nowhere used. The emulation of a legacy `TransactionID` type functionality is administratively expensive to provide by `Aristo` (the legacy DB version is only partially implemented, anyway). As there is no other place where `TransactionID`s are used, they will not be provided by the `Aristo` variant of the `CoreDb`. For the legacy DB API, nothing will change. * Fix copyright headers in source code * Get rid of compiler warning * Update Aristo code, remove unused `merge()` variant, export `hashify()` why: Adapt to upcoming `CoreDb` wrapper * Remove synced tx feature from `Aristo` why: + This feature allowed to synchronise transaction methods like begin, commit, and rollback for a group of descriptors. + The feature is over engineered and not needed for `CoreDb`, neither is it complete (some convergence features missing.) * Add debugging helpers to `Kvt` also: Update database iterator, add count variable yield argument similar to `Aristo`. * Provide optional destructors for `CoreDb` API why; For the upcoming Aristo wrapper, this allows to control when certain smart destruction and update can take place. The auto destructor works fine in general when the storage/cache strategy is known and acceptable when creating descriptors. * Add update option for `CoreDb` API function `hash()` why; The hash function is typically used to get the state root of the MPT. Due to lazy hashing, this might be not available on the `Aristo` DB. So the `update` function asks for re-hashing the gurrent state changes if needed. * Update API tracking log mode: `info` => `debug * Use shared `Kvt` descriptor in new Ledger API why: No need to create a new descriptor all the time
2023-11-16 19:35:03 +00:00
ok()
# ------------------------------------------------------------------------------
# Public test function
# ------------------------------------------------------------------------------
proc testTxMergeAndDeleteOneByOne*(
noisy: bool;
list: openArray[ProofTrieData];
rdbPath: string; # Rocks DB storage directory
): bool =
var
prng = PrngDesc.init 42
db = AristoDbRef(nil)
fwdRevVfyToggle = true
defer:
if not db.isNil:
db.finish(flush=true)
for n,w in list:
# Start with brand new persistent database.
db = block:
if 0 < rdbPath.len:
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
xCheckRc rc.error == 0
rc.value
else:
AristoDbRef.init(MemBackendRef)
# Start transaction (double frame for testing)
xCheck db.txTop.isErr
var tx = db.txBegin().value.to(AristoDbRef).txBegin().value
xCheck tx.isTop()
xCheck tx.level == 2
# Reset database so that the next round has a clean setup
defer: db.innerCleanUp
# Merge leaf data into main trie (w/vertex ID 1)
let kvpLeafs = block:
var lst = w.kvpLst.mapRootVid VertexID(1)
# The list might be reduced for isolation of particular properties,
# e.g. lst.setLen(min(5,lst.len))
lst
for i,leaf in kvpLeafs:
let rc = db.mergeLeaf leaf
xCheckRc rc.error == 0
# List of all leaf entries that should be on the database
var leafsLeft = kvpLeafs.mapIt(it.leafTie).toHashSet
# Provide a (reproducible) peudo-random copy of the leafs list
let leafVidPairs = block:
let rc = db.randomisedLeafs(leafsLeft, prng)
xCheckRc rc.error == (0,0)
rc.value
# Trigger subsequent saving tasks in loop below
let (saveMod, saveRest, relax) = block:
if leafVidPairs.len < 17: (7, 3, false)
elif leafVidPairs.len < 31: (11, 7, false)
else: (leafVidPairs.len div 5, 11, true)
# === Loop over leafs ===
for u,lvp in leafVidPairs:
let
runID = n + list.len * u
tailWalkVerify = 7 # + 999
doSaveBeOk = ((u mod saveMod) == saveRest)
(leaf, lid) = lvp
if doSaveBeOk:
let saveBeOk = tx.saveToBackend(
chunkedMpt=false, relax=relax, noisy=noisy, runID)
xCheck saveBeOk:
noisy.say "***", "del(2)",
" u=", u,
" n=", n, "/", list.len,
"\n leaf=", leaf.pp(db),
"\n db\n ", db.pp(backendOk=true),
""
# Delete leaf
block:
Core db and aristo maintenance update (#2014) * Aristo: Update error return code why: Failing of `Aristo` function `delete()` might fail because there is no such data item on the db. This must return a single error code as is done with `fetch()`. * Ledger: Better error handling why: The `expect()` clauses have been replaced by raising asserts indicating the error from the database backend. Also, `delete()` failures are legitimate if the item to delete does not exist. * Aristo: Delete function must always leave a label on DB for `hashify()` why: The `hashify()` uses the labels left bu `merge()` and `delete()` to compile (and optimise) a scheduler for subsequent hashing. Originally, the labels were not used for deleted entries and `delete()` still had some edge case where the deletion label was not properly handled. * Aristo: Update `hashify()` scheduler, remove buggy optimisation why: Was left over from version without virtual state roots which did not know about account payload leaf vertices referring to storage roots. * Aristo: Label storage trie account in `delete()` similar to `merge()` details; The `delete()` function applied to a non-static state root (assumed to be a storage root) will check the payload of an accounts leaf and mark its Merkle keys to be re-checked when runninh `hashify()` * Aristo: Clean up and re-org recycled vertex IDs in `hashify()` why: Re-organising the recycled vertex IDs list intends to reduce the size of the list. This list is organised as a LIFO (or stack.) By reorganising it in a way so that the least vertex ID numbers are on top, the list will be kept smaller as observed on some examples (less than 30%.) * CoreDb: Accept storage trie deletion requests in non-initialised state why: Due to lazy initialisation, the root vertex ID might not yet exist. So the `Aristo` database handlers would reject this call with an error and this condition needs to be handled by the API (which realises the lazy feature.) * Cosmetics & code massage, prettify logging * fix missing import
2024-02-08 16:32:16 +00:00
let rc = db.delete(leaf, VOID_PATH_ID)
xCheckRc rc.error == (0,0)
# Update list of remaininf leafs
leafsLeft.excl leaf
let deletedVtx = tx.db.getVtx lid
xCheck deletedVtx.isValid == false
# Walking the database is too slow for large tables. So the hope is that
# potential errors will not go away and rather pop up later, as well.
if leafsLeft.len <= tailWalkVerify:
if u < leafVidPairs.len-1:
if fwdRevVfyToggle:
fwdRevVfyToggle = false
if not db.fwdWalkVerify(leaf.root, leafsLeft, noisy, runID):
return
else:
fwdRevVfyToggle = true
if not db.revWalkVerify(leaf.root, leafsLeft, noisy, runID):
return
when true and false:
noisy.say "***", "del(9) n=", n, "/", list.len, " nLeafs=", kvpLeafs.len
true
proc testTxMergeAndDeleteSubTree*(
noisy: bool;
list: openArray[ProofTrieData];
rdbPath: string; # Rocks DB storage directory
): bool =
Aristo avoid storage trie update race conditions (#2251) * Update TDD suite logger output format choices why: New format is not practical for TDD as it just dumps data across a wide range (considerably larder than 80 columns.) So the new format can be turned on by function argument. * Update unit tests samples configuration why: Slightly changed the way to find the `era1` directory * Remove compiler warnings (fix deprecated expressions and phrases) * Update `Aristo` debugging tools * Always update the `storageID` field of account leaf vertices why: Storage tries are weekly linked to an account leaf object in that the `storageID` field is updated by the application. Previously, `Aristo` verified that leaf objects make sense when passed to the database. As a consequence * the database was inconsistent for a short while * the burden for correctness was all on the application which led to delayed error handling which is hard to debug. So `Aristo` will internally update the account leaf objects so that there are no race conditions due to the storage trie handling * Aristo: Let `stow()`/`persist()` bail out unless there is a `VertexID(1)` why: The journal and filter logic depends on the hash of the `VertexID(1)` which is commonly known as the state root. This implies that all changes to the database are somehow related to that. * Make sure that a `Ledger` account does not overwrite the storage trie reference why: Due to the abstraction of a sub-trie (now referred to as column with a hash describing its state) there was a weakness in the `Aristo` handler where an account leaf could be overwritten though changing the validity of the database. This has been changed and the database will now reject such changes. This patch fixes the behaviour on the application layer. In particular, the column handle returned by the `CoreDb` needs to be updated by the `Aristo` database state. This mitigates the problem that a storage trie might have vanished or re-apperaed with a different vertex ID. * Fix sub-trie deletion test why: Was originally hinged on `VertexID(1)` which cannot be wholesale deleted anymore after the last Aristo update. Also, running with `VertexID(2)` needs an artificial `VertexID(1)` for making `stow()` or `persist()` work. * Cosmetics * Activate `test_generalstate_json` * Temporarily `deactivate test_tracer_json` * Fix copyright header --------- Co-authored-by: jordan <jordan@dry.pudding> Co-authored-by: Jacek Sieka <jacek@status.im>
2024-05-30 18:48:38 +01:00
const
# Need to reconfigure for the test, root ID 1 cannot be deleted as a trie
testRootVid = VertexID(2)
var
prng = PrngDesc.init 42
db = AristoDbRef(nil)
defer:
if not db.isNil:
db.finish(flush=true)
for n,w in list:
# Start with brand new persistent database.
db = block:
if 0 < rdbPath.len:
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
xCheckRc rc.error == 0
rc.value
else:
AristoDbRef.init(MemBackendRef)
Aristo avoid storage trie update race conditions (#2251) * Update TDD suite logger output format choices why: New format is not practical for TDD as it just dumps data across a wide range (considerably larder than 80 columns.) So the new format can be turned on by function argument. * Update unit tests samples configuration why: Slightly changed the way to find the `era1` directory * Remove compiler warnings (fix deprecated expressions and phrases) * Update `Aristo` debugging tools * Always update the `storageID` field of account leaf vertices why: Storage tries are weekly linked to an account leaf object in that the `storageID` field is updated by the application. Previously, `Aristo` verified that leaf objects make sense when passed to the database. As a consequence * the database was inconsistent for a short while * the burden for correctness was all on the application which led to delayed error handling which is hard to debug. So `Aristo` will internally update the account leaf objects so that there are no race conditions due to the storage trie handling * Aristo: Let `stow()`/`persist()` bail out unless there is a `VertexID(1)` why: The journal and filter logic depends on the hash of the `VertexID(1)` which is commonly known as the state root. This implies that all changes to the database are somehow related to that. * Make sure that a `Ledger` account does not overwrite the storage trie reference why: Due to the abstraction of a sub-trie (now referred to as column with a hash describing its state) there was a weakness in the `Aristo` handler where an account leaf could be overwritten though changing the validity of the database. This has been changed and the database will now reject such changes. This patch fixes the behaviour on the application layer. In particular, the column handle returned by the `CoreDb` needs to be updated by the `Aristo` database state. This mitigates the problem that a storage trie might have vanished or re-apperaed with a different vertex ID. * Fix sub-trie deletion test why: Was originally hinged on `VertexID(1)` which cannot be wholesale deleted anymore after the last Aristo update. Also, running with `VertexID(2)` needs an artificial `VertexID(1)` for making `stow()` or `persist()` work. * Cosmetics * Activate `test_generalstate_json` * Temporarily `deactivate test_tracer_json` * Fix copyright header --------- Co-authored-by: jordan <jordan@dry.pudding> Co-authored-by: Jacek Sieka <jacek@status.im>
2024-05-30 18:48:38 +01:00
if testRootVid != VertexID(1):
# Add a dummy entry so the journal logic can be triggered
discard db.merge(VertexID(1), @[n.byte], @[42.byte], VOID_PATH_ID)
# Start transaction (double frame for testing)
xCheck db.txTop.isErr
var tx = db.txBegin().value.to(AristoDbRef).txBegin().value
xCheck tx.isTop()
xCheck tx.level == 2
# Reset database so that the next round has a clean setup
defer: db.innerCleanUp
Aristo avoid storage trie update race conditions (#2251) * Update TDD suite logger output format choices why: New format is not practical for TDD as it just dumps data across a wide range (considerably larder than 80 columns.) So the new format can be turned on by function argument. * Update unit tests samples configuration why: Slightly changed the way to find the `era1` directory * Remove compiler warnings (fix deprecated expressions and phrases) * Update `Aristo` debugging tools * Always update the `storageID` field of account leaf vertices why: Storage tries are weekly linked to an account leaf object in that the `storageID` field is updated by the application. Previously, `Aristo` verified that leaf objects make sense when passed to the database. As a consequence * the database was inconsistent for a short while * the burden for correctness was all on the application which led to delayed error handling which is hard to debug. So `Aristo` will internally update the account leaf objects so that there are no race conditions due to the storage trie handling * Aristo: Let `stow()`/`persist()` bail out unless there is a `VertexID(1)` why: The journal and filter logic depends on the hash of the `VertexID(1)` which is commonly known as the state root. This implies that all changes to the database are somehow related to that. * Make sure that a `Ledger` account does not overwrite the storage trie reference why: Due to the abstraction of a sub-trie (now referred to as column with a hash describing its state) there was a weakness in the `Aristo` handler where an account leaf could be overwritten though changing the validity of the database. This has been changed and the database will now reject such changes. This patch fixes the behaviour on the application layer. In particular, the column handle returned by the `CoreDb` needs to be updated by the `Aristo` database state. This mitigates the problem that a storage trie might have vanished or re-apperaed with a different vertex ID. * Fix sub-trie deletion test why: Was originally hinged on `VertexID(1)` which cannot be wholesale deleted anymore after the last Aristo update. Also, running with `VertexID(2)` needs an artificial `VertexID(1)` for making `stow()` or `persist()` work. * Cosmetics * Activate `test_generalstate_json` * Temporarily `deactivate test_tracer_json` * Fix copyright header --------- Co-authored-by: jordan <jordan@dry.pudding> Co-authored-by: Jacek Sieka <jacek@status.im>
2024-05-30 18:48:38 +01:00
# Merge leaf data into main trie (w/vertex ID 2)
let kvpLeafs = block:
Aristo avoid storage trie update race conditions (#2251) * Update TDD suite logger output format choices why: New format is not practical for TDD as it just dumps data across a wide range (considerably larder than 80 columns.) So the new format can be turned on by function argument. * Update unit tests samples configuration why: Slightly changed the way to find the `era1` directory * Remove compiler warnings (fix deprecated expressions and phrases) * Update `Aristo` debugging tools * Always update the `storageID` field of account leaf vertices why: Storage tries are weekly linked to an account leaf object in that the `storageID` field is updated by the application. Previously, `Aristo` verified that leaf objects make sense when passed to the database. As a consequence * the database was inconsistent for a short while * the burden for correctness was all on the application which led to delayed error handling which is hard to debug. So `Aristo` will internally update the account leaf objects so that there are no race conditions due to the storage trie handling * Aristo: Let `stow()`/`persist()` bail out unless there is a `VertexID(1)` why: The journal and filter logic depends on the hash of the `VertexID(1)` which is commonly known as the state root. This implies that all changes to the database are somehow related to that. * Make sure that a `Ledger` account does not overwrite the storage trie reference why: Due to the abstraction of a sub-trie (now referred to as column with a hash describing its state) there was a weakness in the `Aristo` handler where an account leaf could be overwritten though changing the validity of the database. This has been changed and the database will now reject such changes. This patch fixes the behaviour on the application layer. In particular, the column handle returned by the `CoreDb` needs to be updated by the `Aristo` database state. This mitigates the problem that a storage trie might have vanished or re-apperaed with a different vertex ID. * Fix sub-trie deletion test why: Was originally hinged on `VertexID(1)` which cannot be wholesale deleted anymore after the last Aristo update. Also, running with `VertexID(2)` needs an artificial `VertexID(1)` for making `stow()` or `persist()` work. * Cosmetics * Activate `test_generalstate_json` * Temporarily `deactivate test_tracer_json` * Fix copyright header --------- Co-authored-by: jordan <jordan@dry.pudding> Co-authored-by: Jacek Sieka <jacek@status.im>
2024-05-30 18:48:38 +01:00
var lst = w.kvpLst.mapRootVid testRootVid
# The list might be reduced for isolation of particular properties,
# e.g. lst.setLen(min(5,lst.len))
lst
for i,leaf in kvpLeafs:
let rc = db.mergeLeaf leaf
xCheckRc rc.error == 0
# List of all leaf entries that should be on the database
var leafsLeft = kvpLeafs.mapIt(it.leafTie).toHashSet
# Provide a (reproducible) peudo-random copy of the leafs list
let leafVidPairs = block:
let rc = db.randomisedLeafs(leafsLeft, prng)
xCheckRc rc.error == (0,0)
rc.value
discard leafVidPairs
# === delete sub-tree ===
block:
let saveBeOk = tx.saveToBackend(
chunkedMpt=false, relax=false, noisy=noisy, 1 + list.len * n)
xCheck saveBeOk:
noisy.say "***", "del(1)",
" n=", n, "/", list.len,
"\n db\n ", db.pp(backendOk=true),
""
# Delete sub-tree
block:
Aristo avoid storage trie update race conditions (#2251) * Update TDD suite logger output format choices why: New format is not practical for TDD as it just dumps data across a wide range (considerably larder than 80 columns.) So the new format can be turned on by function argument. * Update unit tests samples configuration why: Slightly changed the way to find the `era1` directory * Remove compiler warnings (fix deprecated expressions and phrases) * Update `Aristo` debugging tools * Always update the `storageID` field of account leaf vertices why: Storage tries are weekly linked to an account leaf object in that the `storageID` field is updated by the application. Previously, `Aristo` verified that leaf objects make sense when passed to the database. As a consequence * the database was inconsistent for a short while * the burden for correctness was all on the application which led to delayed error handling which is hard to debug. So `Aristo` will internally update the account leaf objects so that there are no race conditions due to the storage trie handling * Aristo: Let `stow()`/`persist()` bail out unless there is a `VertexID(1)` why: The journal and filter logic depends on the hash of the `VertexID(1)` which is commonly known as the state root. This implies that all changes to the database are somehow related to that. * Make sure that a `Ledger` account does not overwrite the storage trie reference why: Due to the abstraction of a sub-trie (now referred to as column with a hash describing its state) there was a weakness in the `Aristo` handler where an account leaf could be overwritten though changing the validity of the database. This has been changed and the database will now reject such changes. This patch fixes the behaviour on the application layer. In particular, the column handle returned by the `CoreDb` needs to be updated by the `Aristo` database state. This mitigates the problem that a storage trie might have vanished or re-apperaed with a different vertex ID. * Fix sub-trie deletion test why: Was originally hinged on `VertexID(1)` which cannot be wholesale deleted anymore after the last Aristo update. Also, running with `VertexID(2)` needs an artificial `VertexID(1)` for making `stow()` or `persist()` work. * Cosmetics * Activate `test_generalstate_json` * Temporarily `deactivate test_tracer_json` * Fix copyright header --------- Co-authored-by: jordan <jordan@dry.pudding> Co-authored-by: Jacek Sieka <jacek@status.im>
2024-05-30 18:48:38 +01:00
let rc = db.delTree(testRootVid, VOID_PATH_ID)
xCheckRc rc.error == (0,0):
noisy.say "***", "del(2)",
" n=", n, "/", list.len,
"\n db\n ", db.pp(backendOk=true),
""
Aristo avoid storage trie update race conditions (#2251) * Update TDD suite logger output format choices why: New format is not practical for TDD as it just dumps data across a wide range (considerably larder than 80 columns.) So the new format can be turned on by function argument. * Update unit tests samples configuration why: Slightly changed the way to find the `era1` directory * Remove compiler warnings (fix deprecated expressions and phrases) * Update `Aristo` debugging tools * Always update the `storageID` field of account leaf vertices why: Storage tries are weekly linked to an account leaf object in that the `storageID` field is updated by the application. Previously, `Aristo` verified that leaf objects make sense when passed to the database. As a consequence * the database was inconsistent for a short while * the burden for correctness was all on the application which led to delayed error handling which is hard to debug. So `Aristo` will internally update the account leaf objects so that there are no race conditions due to the storage trie handling * Aristo: Let `stow()`/`persist()` bail out unless there is a `VertexID(1)` why: The journal and filter logic depends on the hash of the `VertexID(1)` which is commonly known as the state root. This implies that all changes to the database are somehow related to that. * Make sure that a `Ledger` account does not overwrite the storage trie reference why: Due to the abstraction of a sub-trie (now referred to as column with a hash describing its state) there was a weakness in the `Aristo` handler where an account leaf could be overwritten though changing the validity of the database. This has been changed and the database will now reject such changes. This patch fixes the behaviour on the application layer. In particular, the column handle returned by the `CoreDb` needs to be updated by the `Aristo` database state. This mitigates the problem that a storage trie might have vanished or re-apperaed with a different vertex ID. * Fix sub-trie deletion test why: Was originally hinged on `VertexID(1)` which cannot be wholesale deleted anymore after the last Aristo update. Also, running with `VertexID(2)` needs an artificial `VertexID(1)` for making `stow()` or `persist()` work. * Cosmetics * Activate `test_generalstate_json` * Temporarily `deactivate test_tracer_json` * Fix copyright header --------- Co-authored-by: jordan <jordan@dry.pudding> Co-authored-by: Jacek Sieka <jacek@status.im>
2024-05-30 18:48:38 +01:00
if testRootVid != VertexID(1):
# Update dummy entry so the journal logic can be triggered
discard db.merge(VertexID(1), @[n.byte], @[43.byte], VOID_PATH_ID)
block:
let saveBeOk = tx.saveToBackend(
chunkedMpt=false, relax=false, noisy=noisy, 2 + list.len * n)
xCheck saveBeOk:
noisy.say "***", "del(3)",
" n=", n, "/", list.len,
"\n db\n ", db.pp(backendOk=true),
""
when true and false:
noisy.say "***", "del(9) n=", n, "/", list.len, " nLeafs=", kvpLeafs.len
true
proc testTxMergeProofAndKvpList*(
noisy: bool;
list: openArray[ProofTrieData];
rdbPath: string; # Rocks DB storage directory
resetDb = false;
idPfx = "";
oops: KnownHasherFailure = @[];
): bool =
let
oopsTab = oops.toTable
var
db = AristoDbRef(nil)
tx = AristoTxRef(nil)
Aristo db update for short nodes key edge cases (#1887) * Aristo: Provide key-value list signature calculator detail: Simple wrappers around `Aristo` core functionality * Update new API for `CoreDb` details: + Renamed new API functions `contains()` => `hasKey()` or `hasPath()` which disables the `in` operator on non-boolean `contains()` functions + The functions `get()` and `fetch()` always return a not-found error if there is no item, available. The new functions `getOrEmpty()` and `mergeOrEmpty()` return an an empty `Blob` if there is no such key found. * Rewrite `core_apps.nim` using new API from `CoreDb` * Use `Aristo` functionality for calculating Merkle signatures details: For debugging, the `VerifyAristoForMerkleRootCalc` can be set so that `Aristo` results will be verified against the legacy versions. * Provide general interface for Merkle signing key-value tables details: Export `Aristo` wrappers * Activate `CoreDb` tests why: Now, API seems to be stable enough for general tests. * Update `toHex()` usage why: Byteutils' `toHex()` is superior to `toSeq.mapIt(it.toHex(2)).join` * Split `aristo_transcode` => `aristo_serialise` + `aristo_blobify` why: + Different modules for different purposes + `aristo_serialise`: RLP encoding/decoding + `aristo_blobify`: Aristo database encoding/decoding * Compacted representation of small nodes' links instead of Keccak hashes why: Ethereum MPTs use Keccak hashes as node links if the size of an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded node value is used as a pseudo node link (rather than a hash.) Such a node is nor stored on key-value database. Rather the RLP encoded node value is stored instead of a lode link in a parent node instead. Only for the root hash, the top level node is always referred to by the hash. This feature needed an abstraction of the `HashKey` object which is now either a hash or a blob of length at most 31 bytes. This leaves two ways of representing an empty/void `HashKey` type, either as an empty blob of zero length, or the hash of an empty blob. * Update `CoreDb` interface (mainly reducing logger noise) * Fix copyright years (to make `Lint` happy)
2023-11-08 12:18:32 +00:00
rootKey: Hash256
count = 0
defer:
if not db.isNil:
db.finish(flush=true)
for n,w in list:
# Start new database upon request
if resetDb or w.root != rootKey or w.proof.len == 0:
db.innerCleanUp
db = block:
# New DB with disabled filter slots management
if 0 < rdbPath.len:
let rc = AristoDbRef.init(RdbBackendRef, rdbPath, DbOptions.init())
xCheckRc rc.error == 0
rc.value
else:
AristoDbRef.init(MemBackendRef)
# Start transaction (double frame for testing)
tx = db.txBegin().value.to(AristoDbRef).txBegin().value
xCheck tx.isTop()
# Update root
rootKey = w.root
count = 0
count.inc
let
testId = idPfx & "#" & $w.id & "." & $n
runID = n
sTabLen = db.nLayersVtx()
leafs = w.kvpLst.mapRootVid VertexID(1) # merge into main trie
if 0 < w.proof.len:
let root = block:
let rc = db.mergeProof(rootKey, VertexID(1))
xCheckRc rc.error == 0
rc.value
let nMerged = block:
let rc = db.mergeProof(w.proof, root)
xCheckRc rc.error == 0
rc.value
xCheck w.proof.len == nMerged
xCheck db.nLayersVtx() <= nMerged + sTabLen
let merged = db.mergeList leafs
xCheck merged.error in {AristoError(0), MergeLeafPathCachedAlready}
xCheck merged.merged + merged.dups == leafs.len
block:
let oops = oopsTab.getOrDefault(testId,(0,AristoError(0)))
if not tx.saveToBackendWithOops(
chunkedMpt=true, noisy=noisy, debugID=runID, oops):
return
when true and false:
noisy.say "***", "testTxMergeProofAndKvpList (1)",
" <", n, "/", list.len-1, ">",
" runID=", runID,
" groups=", count, " merged=", merged
true
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------