Aristo db update vertex caching when merging (#1606)

* Added missing deferred cleanup directive to sub-test functions

why:
  Rocksdb keeps the files locked for a short while leading to errors. This
  was previously solved my using different db sub-directories

* Provide vertex deep-copy function globally.

why:
  is just handy

* Avoid unnecessary vertex caching when merging proof nodes

also:
  Run all merge tests on the rocksdb backend
  Previously, proof node tests were run without backend
This commit is contained in:
Jordan Hrycaj 2023-06-22 20:21:33 +01:00 committed by GitHub
parent 83dbe87159
commit 15cc9f962e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 154 additions and 78 deletions

View File

@ -193,25 +193,57 @@ proc ppXMap*(
indent: int; indent: int;
): string = ): string =
let dups = pAmk.values.toSeq.toCountTable.pairs.toSeq let
pfx = indent.toPfx(1)
dups = pAmk.values.toSeq.toCountTable.pairs.toSeq
.filterIt(1 < it[1]).toTable .filterIt(1 < it[1]).toTable
revOnly = pAmk.pairs.toSeq.filterIt(not kMap.hasKey it[1])
.mapIt((it[1],it[0])).toTable
proc ppNtry(n: uint64): string = proc ppNtry(n: uint64): string =
var s = "(" & VertexID(n).ppVid var s = VertexID(n).ppVid
let lbl = kMap.getOrVoid VertexID(n) let lbl = kMap.getOrVoid VertexID(n)
if lbl.isValid: if lbl.isValid:
let vid = pAmk.getOrVoid lbl let vid = pAmk.getOrVoid lbl
if not vid.isValid: if not vid.isValid:
s &= "," & lbl.ppLabel(db) & "" s = "(" & s & "," & lbl.ppLabel(db) & ""
elif vid != VertexID(n): elif vid != VertexID(n):
s &= "," & lbl.ppLabel(db) & "," & vid.ppVid s = "(" & s & "," & lbl.ppLabel(db) & "," & vid.ppVid
let count = dups.getOrDefault(VertexID(n), 0) let count = dups.getOrDefault(VertexID(n), 0)
if 0 < count: if 0 < count:
if s[0] != '(':
s &= "(" & s
s &= ",*" & $count s &= ",*" & $count
else: else:
s &= "£r(!)" s &= "£r(!)"
s & ")," if s[0] == '(':
s &= ")"
s & ","
result = "{"
# Extra reverse lookups
let revKeys = revOnly.keys.toSeq.mapIt(it.uint64).sorted.mapIt(it.VertexID)
if 0 < revKeys.len:
proc ppRevlabel(vid: VertexID): string =
"(ø," & revOnly.getOrVoid(vid).ppLabel(db) & ")"
var (i, r) = (0, revKeys[0])
result &= revKeys[0].ppRevlabel
for n in 1 ..< revKeys.len:
let vid = revKeys[n]
r.inc
if r != vid:
if i+1 != n:
result &= ".. " & revKeys[n-1].ppRevlabel
result &= pfx & vid.ppRevlabel
(i, r) = (n, vid)
if i < revKeys.len - 1:
if i+1 != revKeys.len - 1:
result &= ".. "
else:
result &= pfx
result &= revKeys[^1].ppRevlabel
# Forward lookups
var cache: seq[(uint64,uint64,bool)] var cache: seq[(uint64,uint64,bool)]
for vid in kMap.sortedKeys: for vid in kMap.sortedKeys:
let lbl = kMap.getOrVoid vid let lbl = kMap.getOrVoid vid
@ -223,12 +255,10 @@ proc ppXMap*(
else: else:
cache.add (vid.uint64, 0u64, true) cache.add (vid.uint64, 0u64, true)
result = "{"
if 0 < cache.len: if 0 < cache.len:
let var (i, r) = (0, cache[0])
pfx = indent.toPfx(1) if 0 < revKeys.len:
var result &= pfx
(i, r) = (0, cache[0])
result &= cache[i][0].ppNtry result &= cache[i][0].ppNtry
for n in 1 ..< cache.len: for n in 1 ..< cache.len:
let w = cache[n] let w = cache[n]

View File

@ -128,22 +128,40 @@ proc convertTo*(payload: PayloadRef; T: type Blob): T =
of AccountData: of AccountData:
result = rlp.encode payload.account result = rlp.encode payload.account
proc to*(node: NodeRef; T: type VertexRef): T = proc dup*(pld: PayloadRef): PayloadRef =
## Extract a copy of the `VertexRef` part from a `NodeRef`. For a leaf ## Duplicate payload.
## type, the `lData` payload reference will be a shallow copy, i.e. only case pld.pType:
## the reference pointer is copied. of BlobData:
case node.vType: PayloadRef(
pType: BlobData,
blob: pld.blob)
of AccountData:
PayloadRef(
pType: AccountData,
account: pld.account)
proc dup*(vtx: VertexRef): VertexRef =
## Duplicate vertex.
# Not using `deepCopy()` here (some `gc` needs `--deepcopy:on`.)
case vtx.vType:
of Leaf: of Leaf:
T(vType: Leaf, VertexRef(
lPfx: node.lPfx, vType: Leaf,
lData: node.lData) lPfx: vtx.lPfx,
lData: vtx.ldata.dup)
of Extension: of Extension:
T(vType: Extension, VertexRef(
ePfx: node.ePfx, vType: Extension,
eVid: node.eVid) ePfx: vtx.ePfx,
eVid: vtx.eVid)
of Branch: of Branch:
T(vType: Branch, VertexRef(
bVid: node.bVid) vType: Branch,
bVid: vtx.bVid)
proc to*(node: NodeRef; T: type VertexRef): T =
## Extract a copy of the `VertexRef` part from a `NodeRef`.
node.VertexRef.dup
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# End # End

View File

@ -77,10 +77,12 @@ proc init*(
# ----------------- # -----------------
proc finish*(db: var AristoDb; flush = false) = proc finish*(db: var AristoDb; flush = false) =
## backend destructor. The argument `flush` indicates that a full database ## Backend destructor. The argument `flush` indicates that a full database
## deletion is requested. If set ot left `false` the outcome might differ ## deletion is requested. If set ot left `false` the outcome might differ
## depending on the type of backend (e.g. the `BackendMemory` backend will ## depending on the type of backend (e.g. the `BackendMemory` backend will
## always flush on close.) ## always flush on close.)
##
## This distructor may be used on already *destructed* descriptors.
if not db.backend.isNil: if not db.backend.isNil:
db.backend.closeFn flush db.backend.closeFn flush
db.backend = AristoBackendRef(nil) db.backend = AristoBackendRef(nil)

View File

@ -65,12 +65,6 @@ proc endSession(hdl: PutHdlRef; db: MemBackendRef): MemPutHdlRef =
hdl.TypedPutHdlRef.finishSession db hdl.TypedPutHdlRef.finishSession db
hdl.MemPutHdlRef hdl.MemPutHdlRef
proc cpy(vtx: VertexRef): VertexRef =
new result
result[] = vtx[]
if vtx.vType == Leaf:
result.lData[] = vtx.lData[]
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
# Private functions: interface # Private functions: interface
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
@ -80,7 +74,7 @@ proc getVtxFn(db: MemBackendRef): GetVtxFn =
proc(vid: VertexID): Result[VertexRef,AristoError] = proc(vid: VertexID): Result[VertexRef,AristoError] =
let vtx = db.sTab.getOrVoid vid let vtx = db.sTab.getOrVoid vid
if vtx.isValid: if vtx.isValid:
return ok cpy(vtx) return ok vtx.dup
err(GetVtxNotFound) err(GetVtxNotFound)
proc getKeyFn(db: MemBackendRef): GetKeyFn = proc getKeyFn(db: MemBackendRef): GetKeyFn =
@ -109,7 +103,7 @@ proc putVtxFn(db: MemBackendRef): PutVtxFn =
proc(hdl: PutHdlRef; vrps: openArray[(VertexID,VertexRef)]) = proc(hdl: PutHdlRef; vrps: openArray[(VertexID,VertexRef)]) =
let hdl = hdl.getSession db let hdl = hdl.getSession db
for (vid,vtx) in vrps: for (vid,vtx) in vrps:
hdl.sTab[vid] = cpy(vtx) hdl.sTab[vid] = vtx.dup
proc putKeyFn(db: MemBackendRef): PutKeyFn = proc putKeyFn(db: MemBackendRef): PutKeyFn =
result = result =

View File

@ -461,7 +461,7 @@ proc mergeNodeImpl(
# Make sure that the `vid<->hashLbl` reverse mapping has been cached, # Make sure that the `vid<->hashLbl` reverse mapping has been cached,
# already. This is provided for if the `nodes` are processed in the right # already. This is provided for if the `nodes` are processed in the right
# order `root->.. ->leaf`. # order `root->.. ->leaf`.
var let
hashLbl = HashLabel(root: rootVid, key: hashKey) hashLbl = HashLabel(root: rootVid, key: hashKey)
vid = db.top.pAmk.getOrVoid hashLbl vid = db.top.pAmk.getOrVoid hashLbl
if not vid.isValid: if not vid.isValid:
@ -517,7 +517,13 @@ proc mergeNodeImpl(
vtx.bVid[n] = db.vidAttach bLbl vtx.bVid[n] = db.vidAttach bLbl
db.top.pPrf.incl vid db.top.pPrf.incl vid
if hasVtx:
let key = db.getKey vid
if key != hashKey:
db.top.sTab[vid] = vtx db.top.sTab[vid] = vtx
else:
db.top.sTab[vid] = vtx
ok vid ok vid
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------

View File

@ -199,6 +199,7 @@ proc accountsRunner(
fileInfo = sample.file.splitPath.tail.replace(".txt.gz","") fileInfo = sample.file.splitPath.tail.replace(".txt.gz","")
listMode = if resetDb: "" else: ", merged data lists" listMode = if resetDb: "" else: ", merged data lists"
baseDir = getTmpDir() / sample.name & "-accounts" baseDir = getTmpDir() / sample.name & "-accounts"
dbDir = baseDir / "tmp"
defer: defer:
try: baseDir.removeDir except CatchableError: discard try: baseDir.removeDir except CatchableError: discard
@ -206,27 +207,21 @@ proc accountsRunner(
suite &"Aristo: accounts data dump from {fileInfo}{listMode}": suite &"Aristo: accounts data dump from {fileInfo}{listMode}":
test &"Merge {accLst.len} account lists to database": test &"Merge {accLst.len} account lists to database":
# Local sud-directories needed as DB might be kept locked after close
let dbDir = baseDir / "tmp1"
check noisy.test_mergeKvpList(accLst, dbDir, resetDb) check noisy.test_mergeKvpList(accLst, dbDir, resetDb)
test &"Merge {accLst.len} proof & account lists to database": test &"Merge {accLst.len} proof & account lists to database":
let dbDir = baseDir / "tmp2" check noisy.test_mergeProofAndKvpList(accLst, dbDir, resetDb)
check noisy.test_mergeProofAndKvpList(accLst, resetDb)
test &"Compare {accLst.len} account lists on database backends": test &"Compare {accLst.len} account lists on database backends":
if cmpBackends: if cmpBackends:
let dbDir = baseDir / "tmp3"
check noisy.test_backendConsistency(accLst, dbDir, resetDb) check noisy.test_backendConsistency(accLst, dbDir, resetDb)
else: else:
skip() skip()
test &"Traverse accounts database w/{accLst.len} account lists": test &"Traverse accounts database w/{accLst.len} account lists":
let dbDir = baseDir / "tmp4"
check noisy.test_nearbyKvpList(accLst, resetDb) check noisy.test_nearbyKvpList(accLst, resetDb)
test &"Delete accounts database, successively {accLst.len} entries": test &"Delete accounts database, successively {accLst.len} entries":
let dbDir = baseDir / "tmp5"
check noisy.test_delete accLst check noisy.test_delete accLst
@ -242,6 +237,7 @@ proc storagesRunner(
fileInfo = sample.file.splitPath.tail.replace(".txt.gz","") fileInfo = sample.file.splitPath.tail.replace(".txt.gz","")
listMode = if resetDb: "" else: ", merged data lists" listMode = if resetDb: "" else: ", merged data lists"
baseDir = getTmpDir() / sample.name & "-storage" baseDir = getTmpDir() / sample.name & "-storage"
dbDir = baseDir / "tmp"
defer: defer:
try: baseDir.removeDir except CatchableError: discard try: baseDir.removeDir except CatchableError: discard
@ -249,28 +245,22 @@ proc storagesRunner(
suite &"Aristo: storages data dump from {fileInfo}{listMode}": suite &"Aristo: storages data dump from {fileInfo}{listMode}":
test &"Merge {stoLst.len} storage slot lists to database": test &"Merge {stoLst.len} storage slot lists to database":
# Local sud-directories needed as DB might be kept locked after close
let dbDir = baseDir / "tmp1"
check noisy.test_mergeKvpList(stoLst, dbDir, resetDb) check noisy.test_mergeKvpList(stoLst, dbDir, resetDb)
test &"Merge {stoLst.len} proof & slots lists to database": test &"Merge {stoLst.len} proof & slots lists to database":
let dbDir = baseDir / "tmp2"
check noisy.test_mergeProofAndKvpList( check noisy.test_mergeProofAndKvpList(
stoLst, resetDb, fileInfo, oops) stoLst, dbDir, resetDb, fileInfo, oops)
test &"Compare {stoLst.len} slot lists on database backends": test &"Compare {stoLst.len} slot lists on database backends":
let dbDir = baseDir / "tmp3"
if cmpBackends: if cmpBackends:
check noisy.test_backendConsistency(stoLst, dbDir, resetDb) check noisy.test_backendConsistency(stoLst, dbDir, resetDb)
else: else:
skip() skip()
test &"Traverse storage slots database w/{stoLst.len} account lists": test &"Traverse storage slots database w/{stoLst.len} account lists":
let dbDir = baseDir / "tmp4"
check noisy.test_nearbyKvpList(stoLst, resetDb) check noisy.test_nearbyKvpList(stoLst, resetDb)
test &"Delete storage database, successively {stoLst.len} entries": test &"Delete storage database, successively {stoLst.len} entries":
let dbDir = baseDir / "tmp5"
check noisy.test_delete stoLst check noisy.test_delete stoLst
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
@ -293,7 +283,7 @@ when isMainModule:
noisy.miscRunner() noisy.miscRunner()
# Borrowed from `test_sync_snap.nim` # Borrowed from `test_sync_snap.nim`
when true and false: when true: # and false:
for n,sam in snapTestList: for n,sam in snapTestList:
noisy.transcodeRunner(sam) noisy.transcodeRunner(sam)
for n,sam in snapTestStorageList: for n,sam in snapTestStorageList:

View File

@ -135,8 +135,10 @@ proc test_mergeKvpList*(
rdbPath: string; # Rocks DB storage directory rdbPath: string; # Rocks DB storage directory
resetDb = false; resetDb = false;
): bool = ): bool =
var
var db: AristoDb db: AristoDb
defer:
db.finish(flush=true)
for n,w in list: for n,w in list:
if resetDb or db.top.isNil: if resetDb or db.top.isNil:
db.finish(flush=true) db.finish(flush=true)
@ -206,7 +208,6 @@ proc test_mergeKvpList*(
check rc == Result[void,(VertexID,AristoError)].ok() check rc == Result[void,(VertexID,AristoError)].ok()
return return
block:
let rdbHist = block: let rdbHist = block:
let rc = db.save let rc = db.save
if rc.isErr: if rc.isErr:
@ -230,6 +231,7 @@ proc test_mergeKvpList*(
proc test_mergeProofAndKvpList*( proc test_mergeProofAndKvpList*(
noisy: bool; noisy: bool;
list: openArray[ProofTrieData]; list: openArray[ProofTrieData];
rdbPath: string; # Rocks DB storage directory
resetDb = false; resetDb = false;
idPfx = ""; idPfx = "";
oops: KnownHasherFailure = @[]; oops: KnownHasherFailure = @[];
@ -240,9 +242,17 @@ proc test_mergeProofAndKvpList*(
db: AristoDb db: AristoDb
rootKey = HashKey.default rootKey = HashKey.default
count = 0 count = 0
defer:
db.finish(flush=true)
for n,w in list: for n,w in list:
if resetDb or w.root != rootKey or w.proof.len == 0: if resetDb or w.root != rootKey or w.proof.len == 0:
db.top = AristoLayerRef() db.finish(flush=true)
db = block:
let rc = AristoDb.init(BackendRocksDB,rdbPath)
if rc.isErr:
check rc.error == AristoError(0)
return
rc.value
rootKey = w.root rootKey = w.root
count = 0 count = 0
count.inc count.inc
@ -255,29 +265,42 @@ proc test_mergeProofAndKvpList*(
leafs = w.kvpLst.mapRootVid VertexID(1) # merge into main trie leafs = w.kvpLst.mapRootVid VertexID(1) # merge into main trie
when true and false: when true and false:
noisy.say "***", "sample(1) <", n, "/", lstLen-1, ">", noisy.say "***", "proofs(1) <", n, "/", lstLen-1, ">",
" groups=", count, " nLeafs=", leafs.len, " groups=", count, " nLeafs=", leafs.len,
" db-dump\n ", db.pp "\n cache\n ", db.pp,
"\n backend\n ", db.to(RdbBackendRef).pp(db),
"\n --------"
var proved: tuple[merged: int, dups: int, error: AristoError] var
proved: tuple[merged: int, dups: int, error: AristoError]
preDb: string
if 0 < w.proof.len: if 0 < w.proof.len:
let rc = db.merge(rootKey, VertexID(1)) let rc = db.merge(rootKey, VertexID(1))
if rc.isErr: if rc.isErr:
check rc.error == AristoError(0) check rc.error == AristoError(0)
return return
proved = db.merge(w.proof, rc.value)
preDb = db.pp
proved = db.merge(w.proof, rc.value) # , noisy)
check proved.error in {AristoError(0),MergeHashKeyCachedAlready} check proved.error in {AristoError(0),MergeHashKeyCachedAlready}
check w.proof.len == proved.merged + proved.dups check w.proof.len == proved.merged + proved.dups
check db.top.lTab.len == lTabLen check db.top.lTab.len == lTabLen
check db.top.sTab.len == proved.merged + sTabLen check db.top.sTab.len <= proved.merged + sTabLen
check proved.merged < db.top.pAmk.len check proved.merged < db.top.pAmk.len
check proved.merged < db.top.kMap.len
when true and false: when true and false:
if 0 < w.proof.len: if 0 < w.proof.len:
noisy.say "***", "sample(2) <", n, "/", lstLen-1, ">", noisy.say "***", "proofs(2) <", n, "/", lstLen-1, ">",
" groups=", count, " nLeafs=", leafs.len, " proved=", proved, " groups=", count,
" db-dump\n ", db.pp " nLeafs=", leafs.len,
" proved=", proved,
"\n pre-DB\n ", preDb,
"\n --------",
"\n cache\n ", db.pp,
"\n backend\n ", db.to(RdbBackendRef).pp(db),
"\n --------"
return
let let
merged = db.merge leafs merged = db.merge leafs
@ -293,9 +316,11 @@ proc test_mergeProofAndKvpList*(
return return
when true and false: when true and false:
noisy.say "***", "sample(3) <", n, "/", lstLen-1, ">", noisy.say "***", "proofs(3) <", n, "/", lstLen-1, ">",
" groups=", count, " nLeafs=", leafs.len, " merged=", merged, " groups=", count, " nLeafs=", leafs.len, " merged=", merged,
" db-dump\n ", db.pp "\n cache\n ", db.pp,
"\n backend\n ", db.to(RdbBackendRef).pp(db),
"\n --------"
block: block:
let let
@ -314,23 +339,34 @@ proc test_mergeProofAndKvpList*(
# Otherwise, check for correctness # Otherwise, check for correctness
elif rc.isErr: elif rc.isErr:
noisy.say "***", "<", n, "/", lstLen-1, ">", noisy.say "***", "proofs(4) <", n, "/", lstLen-1, ">",
" testId=", testId, " testId=", testId,
" groups=", count, " groups=", count,
"\n pre-DB", "\n pre-DB",
"\n ", preDb, "\n ", preDb,
"\n --------", "\n --------",
"\n ", db.pp "\n cache\n ", db.pp,
"\n backend\n ", db.to(RdbBackendRef).pp(db),
"\n --------"
check rc.error == (VertexID(0),AristoError(0)) check rc.error == (VertexID(0),AristoError(0))
return return
when true and false: let rdbHist = block:
noisy.say "***", "sample(4) <", n, "/", lstLen-1, ">", let rc = db.save
" groups=", count, if rc.isErr:
" db-dump\n ", db.pp check rc.error == AristoError(0)
return
rc.value
when true and false: when true and false:
noisy.say "***", "sample(5) <", n, "/", lstLen-1, ">", noisy.say "***", "proofs(5) <", n, "/", lstLen-1, ">",
" groups=", count,
"\n cache\n ", db.pp,
"\n backend\n ", db.to(RdbBackendRef).pp(db),
"\n --------"
when true and false:
noisy.say "***", "proofs(6) <", n, "/", lstLen-1, ">",
" groups=", count, " proved=", proved.pp, " merged=", merged.pp " groups=", count, " proved=", proved.pp, " merged=", merged.pp
true true