From 15cc9f962e19ca872469bf4a51bba59699f19b4d Mon Sep 17 00:00:00 2001 From: Jordan Hrycaj Date: Thu, 22 Jun 2023 20:21:33 +0100 Subject: [PATCH] Aristo db update vertex caching when merging (#1606) * Added missing deferred cleanup directive to sub-test functions why: Rocksdb keeps the files locked for a short while leading to errors. This was previously solved my using different db sub-directories * Provide vertex deep-copy function globally. why: is just handy * Avoid unnecessary vertex caching when merging proof nodes also: Run all merge tests on the rocksdb backend Previously, proof node tests were run without backend --- nimbus/db/aristo/aristo_debug.nim | 52 ++++++++--- .../aristo_desc/aristo_types_structural.nim | 44 ++++++--- nimbus/db/aristo/aristo_init.nim | 4 +- .../db/aristo/aristo_init/aristo_memory.nim | 10 +- nimbus/db/aristo/aristo_merge.nim | 10 +- tests/test_aristo.nim | 20 +--- tests/test_aristo/test_merge.nim | 92 +++++++++++++------ 7 files changed, 154 insertions(+), 78 deletions(-) diff --git a/nimbus/db/aristo/aristo_debug.nim b/nimbus/db/aristo/aristo_debug.nim index 9c46b599f..8a33b57c5 100644 --- a/nimbus/db/aristo/aristo_debug.nim +++ b/nimbus/db/aristo/aristo_debug.nim @@ -193,25 +193,57 @@ proc ppXMap*( indent: int; ): string = - let dups = pAmk.values.toSeq.toCountTable.pairs.toSeq - .filterIt(1 < it[1]).toTable + let + pfx = indent.toPfx(1) + dups = pAmk.values.toSeq.toCountTable.pairs.toSeq + .filterIt(1 < it[1]).toTable + revOnly = pAmk.pairs.toSeq.filterIt(not kMap.hasKey it[1]) + .mapIt((it[1],it[0])).toTable proc ppNtry(n: uint64): string = - var s = "(" & VertexID(n).ppVid + var s = VertexID(n).ppVid let lbl = kMap.getOrVoid VertexID(n) if lbl.isValid: let vid = pAmk.getOrVoid lbl if not vid.isValid: - s &= "," & lbl.ppLabel(db) & ",ø" + s = "(" & s & "," & lbl.ppLabel(db) & ",ø" elif vid != VertexID(n): - s &= "," & lbl.ppLabel(db) & "," & vid.ppVid + s = "(" & s & "," & lbl.ppLabel(db) & "," & vid.ppVid let count = dups.getOrDefault(VertexID(n), 0) if 0 < count: + if s[0] != '(': + s &= "(" & s s &= ",*" & $count else: s &= "£r(!)" - s & ")," + if s[0] == '(': + s &= ")" + s & "," + result = "{" + # Extra reverse lookups + let revKeys = revOnly.keys.toSeq.mapIt(it.uint64).sorted.mapIt(it.VertexID) + if 0 < revKeys.len: + proc ppRevlabel(vid: VertexID): string = + "(ø," & revOnly.getOrVoid(vid).ppLabel(db) & ")" + var (i, r) = (0, revKeys[0]) + result &= revKeys[0].ppRevlabel + for n in 1 ..< revKeys.len: + let vid = revKeys[n] + r.inc + if r != vid: + if i+1 != n: + result &= ".. " & revKeys[n-1].ppRevlabel + result &= pfx & vid.ppRevlabel + (i, r) = (n, vid) + if i < revKeys.len - 1: + if i+1 != revKeys.len - 1: + result &= ".. " + else: + result &= pfx + result &= revKeys[^1].ppRevlabel + + # Forward lookups var cache: seq[(uint64,uint64,bool)] for vid in kMap.sortedKeys: let lbl = kMap.getOrVoid vid @@ -223,12 +255,10 @@ proc ppXMap*( else: cache.add (vid.uint64, 0u64, true) - result = "{" if 0 < cache.len: - let - pfx = indent.toPfx(1) - var - (i, r) = (0, cache[0]) + var (i, r) = (0, cache[0]) + if 0 < revKeys.len: + result &= pfx result &= cache[i][0].ppNtry for n in 1 ..< cache.len: let w = cache[n] diff --git a/nimbus/db/aristo/aristo_desc/aristo_types_structural.nim b/nimbus/db/aristo/aristo_desc/aristo_types_structural.nim index 1bee68f16..57b2e8e27 100644 --- a/nimbus/db/aristo/aristo_desc/aristo_types_structural.nim +++ b/nimbus/db/aristo/aristo_desc/aristo_types_structural.nim @@ -128,22 +128,40 @@ proc convertTo*(payload: PayloadRef; T: type Blob): T = of AccountData: result = rlp.encode payload.account -proc to*(node: NodeRef; T: type VertexRef): T = - ## Extract a copy of the `VertexRef` part from a `NodeRef`. For a leaf - ## type, the `lData` payload reference will be a shallow copy, i.e. only - ## the reference pointer is copied. - case node.vType: +proc dup*(pld: PayloadRef): PayloadRef = + ## Duplicate payload. + case pld.pType: + of BlobData: + PayloadRef( + pType: BlobData, + blob: pld.blob) + of AccountData: + PayloadRef( + pType: AccountData, + account: pld.account) + +proc dup*(vtx: VertexRef): VertexRef = + ## Duplicate vertex. + # Not using `deepCopy()` here (some `gc` needs `--deepcopy:on`.) + case vtx.vType: of Leaf: - T(vType: Leaf, - lPfx: node.lPfx, - lData: node.lData) + VertexRef( + vType: Leaf, + lPfx: vtx.lPfx, + lData: vtx.ldata.dup) of Extension: - T(vType: Extension, - ePfx: node.ePfx, - eVid: node.eVid) + VertexRef( + vType: Extension, + ePfx: vtx.ePfx, + eVid: vtx.eVid) of Branch: - T(vType: Branch, - bVid: node.bVid) + VertexRef( + vType: Branch, + bVid: vtx.bVid) + +proc to*(node: NodeRef; T: type VertexRef): T = + ## Extract a copy of the `VertexRef` part from a `NodeRef`. + node.VertexRef.dup # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/aristo/aristo_init.nim b/nimbus/db/aristo/aristo_init.nim index 2fcbac221..6fda2dc4e 100644 --- a/nimbus/db/aristo/aristo_init.nim +++ b/nimbus/db/aristo/aristo_init.nim @@ -77,10 +77,12 @@ proc init*( # ----------------- proc finish*(db: var AristoDb; flush = false) = - ## backend destructor. The argument `flush` indicates that a full database + ## Backend destructor. The argument `flush` indicates that a full database ## deletion is requested. If set ot left `false` the outcome might differ ## depending on the type of backend (e.g. the `BackendMemory` backend will ## always flush on close.) + ## + ## This distructor may be used on already *destructed* descriptors. if not db.backend.isNil: db.backend.closeFn flush db.backend = AristoBackendRef(nil) diff --git a/nimbus/db/aristo/aristo_init/aristo_memory.nim b/nimbus/db/aristo/aristo_init/aristo_memory.nim index ee397942b..297fb11da 100644 --- a/nimbus/db/aristo/aristo_init/aristo_memory.nim +++ b/nimbus/db/aristo/aristo_init/aristo_memory.nim @@ -65,12 +65,6 @@ proc endSession(hdl: PutHdlRef; db: MemBackendRef): MemPutHdlRef = hdl.TypedPutHdlRef.finishSession db hdl.MemPutHdlRef -proc cpy(vtx: VertexRef): VertexRef = - new result - result[] = vtx[] - if vtx.vType == Leaf: - result.lData[] = vtx.lData[] - # ------------------------------------------------------------------------------ # Private functions: interface # ------------------------------------------------------------------------------ @@ -80,7 +74,7 @@ proc getVtxFn(db: MemBackendRef): GetVtxFn = proc(vid: VertexID): Result[VertexRef,AristoError] = let vtx = db.sTab.getOrVoid vid if vtx.isValid: - return ok cpy(vtx) + return ok vtx.dup err(GetVtxNotFound) proc getKeyFn(db: MemBackendRef): GetKeyFn = @@ -109,7 +103,7 @@ proc putVtxFn(db: MemBackendRef): PutVtxFn = proc(hdl: PutHdlRef; vrps: openArray[(VertexID,VertexRef)]) = let hdl = hdl.getSession db for (vid,vtx) in vrps: - hdl.sTab[vid] = cpy(vtx) + hdl.sTab[vid] = vtx.dup proc putKeyFn(db: MemBackendRef): PutKeyFn = result = diff --git a/nimbus/db/aristo/aristo_merge.nim b/nimbus/db/aristo/aristo_merge.nim index c7e11dd42..f82d55325 100644 --- a/nimbus/db/aristo/aristo_merge.nim +++ b/nimbus/db/aristo/aristo_merge.nim @@ -461,7 +461,7 @@ proc mergeNodeImpl( # Make sure that the `vid<->hashLbl` reverse mapping has been cached, # already. This is provided for if the `nodes` are processed in the right # order `root->.. ->leaf`. - var + let hashLbl = HashLabel(root: rootVid, key: hashKey) vid = db.top.pAmk.getOrVoid hashLbl if not vid.isValid: @@ -517,7 +517,13 @@ proc mergeNodeImpl( vtx.bVid[n] = db.vidAttach bLbl db.top.pPrf.incl vid - db.top.sTab[vid] = vtx + if hasVtx: + let key = db.getKey vid + if key != hashKey: + db.top.sTab[vid] = vtx + else: + db.top.sTab[vid] = vtx + ok vid # ------------------------------------------------------------------------------ diff --git a/tests/test_aristo.nim b/tests/test_aristo.nim index 68017710e..69b2a003e 100644 --- a/tests/test_aristo.nim +++ b/tests/test_aristo.nim @@ -199,6 +199,7 @@ proc accountsRunner( fileInfo = sample.file.splitPath.tail.replace(".txt.gz","") listMode = if resetDb: "" else: ", merged data lists" baseDir = getTmpDir() / sample.name & "-accounts" + dbDir = baseDir / "tmp" defer: try: baseDir.removeDir except CatchableError: discard @@ -206,27 +207,21 @@ proc accountsRunner( suite &"Aristo: accounts data dump from {fileInfo}{listMode}": test &"Merge {accLst.len} account lists to database": - # Local sud-directories needed as DB might be kept locked after close - let dbDir = baseDir / "tmp1" check noisy.test_mergeKvpList(accLst, dbDir, resetDb) test &"Merge {accLst.len} proof & account lists to database": - let dbDir = baseDir / "tmp2" - check noisy.test_mergeProofAndKvpList(accLst, resetDb) + check noisy.test_mergeProofAndKvpList(accLst, dbDir, resetDb) test &"Compare {accLst.len} account lists on database backends": if cmpBackends: - let dbDir = baseDir / "tmp3" check noisy.test_backendConsistency(accLst, dbDir, resetDb) else: skip() test &"Traverse accounts database w/{accLst.len} account lists": - let dbDir = baseDir / "tmp4" check noisy.test_nearbyKvpList(accLst, resetDb) test &"Delete accounts database, successively {accLst.len} entries": - let dbDir = baseDir / "tmp5" check noisy.test_delete accLst @@ -242,6 +237,7 @@ proc storagesRunner( fileInfo = sample.file.splitPath.tail.replace(".txt.gz","") listMode = if resetDb: "" else: ", merged data lists" baseDir = getTmpDir() / sample.name & "-storage" + dbDir = baseDir / "tmp" defer: try: baseDir.removeDir except CatchableError: discard @@ -249,28 +245,22 @@ proc storagesRunner( suite &"Aristo: storages data dump from {fileInfo}{listMode}": test &"Merge {stoLst.len} storage slot lists to database": - # Local sud-directories needed as DB might be kept locked after close - let dbDir = baseDir / "tmp1" check noisy.test_mergeKvpList(stoLst, dbDir, resetDb) test &"Merge {stoLst.len} proof & slots lists to database": - let dbDir = baseDir / "tmp2" check noisy.test_mergeProofAndKvpList( - stoLst, resetDb, fileInfo, oops) + stoLst, dbDir, resetDb, fileInfo, oops) test &"Compare {stoLst.len} slot lists on database backends": - let dbDir = baseDir / "tmp3" if cmpBackends: check noisy.test_backendConsistency(stoLst, dbDir, resetDb) else: skip() test &"Traverse storage slots database w/{stoLst.len} account lists": - let dbDir = baseDir / "tmp4" check noisy.test_nearbyKvpList(stoLst, resetDb) test &"Delete storage database, successively {stoLst.len} entries": - let dbDir = baseDir / "tmp5" check noisy.test_delete stoLst # ------------------------------------------------------------------------------ @@ -293,7 +283,7 @@ when isMainModule: noisy.miscRunner() # Borrowed from `test_sync_snap.nim` - when true and false: + when true: # and false: for n,sam in snapTestList: noisy.transcodeRunner(sam) for n,sam in snapTestStorageList: diff --git a/tests/test_aristo/test_merge.nim b/tests/test_aristo/test_merge.nim index 09b7ab0d1..e3dd74cf8 100644 --- a/tests/test_aristo/test_merge.nim +++ b/tests/test_aristo/test_merge.nim @@ -135,8 +135,10 @@ proc test_mergeKvpList*( rdbPath: string; # Rocks DB storage directory resetDb = false; ): bool = - - var db: AristoDb + var + db: AristoDb + defer: + db.finish(flush=true) for n,w in list: if resetDb or db.top.isNil: db.finish(flush=true) @@ -206,13 +208,12 @@ proc test_mergeKvpList*( check rc == Result[void,(VertexID,AristoError)].ok() return - block: - let rdbHist = block: - let rc = db.save - if rc.isErr: - check rc.error == AristoError(0) - return - rc.value + let rdbHist = block: + let rc = db.save + if rc.isErr: + check rc.error == AristoError(0) + return + rc.value when true and false: noisy.say "*** kvp(5)", "<", n, "/", lstLen-1, ">", @@ -230,6 +231,7 @@ proc test_mergeKvpList*( proc test_mergeProofAndKvpList*( noisy: bool; list: openArray[ProofTrieData]; + rdbPath: string; # Rocks DB storage directory resetDb = false; idPfx = ""; oops: KnownHasherFailure = @[]; @@ -240,9 +242,17 @@ proc test_mergeProofAndKvpList*( db: AristoDb rootKey = HashKey.default count = 0 + defer: + db.finish(flush=true) for n,w in list: if resetDb or w.root != rootKey or w.proof.len == 0: - db.top = AristoLayerRef() + db.finish(flush=true) + db = block: + let rc = AristoDb.init(BackendRocksDB,rdbPath) + if rc.isErr: + check rc.error == AristoError(0) + return + rc.value rootKey = w.root count = 0 count.inc @@ -255,29 +265,42 @@ proc test_mergeProofAndKvpList*( leafs = w.kvpLst.mapRootVid VertexID(1) # merge into main trie when true and false: - noisy.say "***", "sample(1) <", n, "/", lstLen-1, ">", + noisy.say "***", "proofs(1) <", n, "/", lstLen-1, ">", " groups=", count, " nLeafs=", leafs.len, - " db-dump\n ", db.pp + "\n cache\n ", db.pp, + "\n backend\n ", db.to(RdbBackendRef).pp(db), + "\n --------" - var proved: tuple[merged: int, dups: int, error: AristoError] + var + proved: tuple[merged: int, dups: int, error: AristoError] + preDb: string if 0 < w.proof.len: let rc = db.merge(rootKey, VertexID(1)) if rc.isErr: check rc.error == AristoError(0) return - proved = db.merge(w.proof, rc.value) + + preDb = db.pp + proved = db.merge(w.proof, rc.value) # , noisy) + check proved.error in {AristoError(0),MergeHashKeyCachedAlready} check w.proof.len == proved.merged + proved.dups check db.top.lTab.len == lTabLen - check db.top.sTab.len == proved.merged + sTabLen + check db.top.sTab.len <= proved.merged + sTabLen check proved.merged < db.top.pAmk.len - check proved.merged < db.top.kMap.len when true and false: if 0 < w.proof.len: - noisy.say "***", "sample(2) <", n, "/", lstLen-1, ">", - " groups=", count, " nLeafs=", leafs.len, " proved=", proved, - " db-dump\n ", db.pp + noisy.say "***", "proofs(2) <", n, "/", lstLen-1, ">", + " groups=", count, + " nLeafs=", leafs.len, + " proved=", proved, + "\n pre-DB\n ", preDb, + "\n --------", + "\n cache\n ", db.pp, + "\n backend\n ", db.to(RdbBackendRef).pp(db), + "\n --------" + return let merged = db.merge leafs @@ -293,9 +316,11 @@ proc test_mergeProofAndKvpList*( return when true and false: - noisy.say "***", "sample(3) <", n, "/", lstLen-1, ">", + noisy.say "***", "proofs(3) <", n, "/", lstLen-1, ">", " groups=", count, " nLeafs=", leafs.len, " merged=", merged, - " db-dump\n ", db.pp + "\n cache\n ", db.pp, + "\n backend\n ", db.to(RdbBackendRef).pp(db), + "\n --------" block: let @@ -314,23 +339,34 @@ proc test_mergeProofAndKvpList*( # Otherwise, check for correctness elif rc.isErr: - noisy.say "***", "<", n, "/", lstLen-1, ">", + noisy.say "***", "proofs(4) <", n, "/", lstLen-1, ">", " testId=", testId, " groups=", count, "\n pre-DB", "\n ", preDb, "\n --------", - "\n ", db.pp + "\n cache\n ", db.pp, + "\n backend\n ", db.to(RdbBackendRef).pp(db), + "\n --------" check rc.error == (VertexID(0),AristoError(0)) return - when true and false: - noisy.say "***", "sample(4) <", n, "/", lstLen-1, ">", - " groups=", count, - " db-dump\n ", db.pp + let rdbHist = block: + let rc = db.save + if rc.isErr: + check rc.error == AristoError(0) + return + rc.value when true and false: - noisy.say "***", "sample(5) <", n, "/", lstLen-1, ">", + noisy.say "***", "proofs(5) <", n, "/", lstLen-1, ">", + " groups=", count, + "\n cache\n ", db.pp, + "\n backend\n ", db.to(RdbBackendRef).pp(db), + "\n --------" + + when true and false: + noisy.say "***", "proofs(6) <", n, "/", lstLen-1, ">", " groups=", count, " proved=", proved.pp, " merged=", merged.pp true