diff --git a/nimbus/db/aristo/aristo_check.nim b/nimbus/db/aristo/aristo_check.nim index f3afd470e..e6f8ca918 100644 --- a/nimbus/db/aristo/aristo_check.nim +++ b/nimbus/db/aristo/aristo_check.nim @@ -1,5 +1,5 @@ # nimbus-eth1 -# Copyright (c) 2023 Status Research & Development GmbH +# Copyright (c) 2023-2024 Status Research & Development GmbH # Licensed under either of # * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or # http://www.apache.org/licenses/LICENSE-2.0) @@ -19,7 +19,7 @@ import stew/interval_set, results, ./aristo_walk/persistent, - "."/[aristo_desc, aristo_get, aristo_init, aristo_vid, aristo_utils], + "."/[aristo_desc, aristo_get, aristo_init, aristo_utils], ./aristo_check/[check_be, check_top] # ------------------------------------------------------------------------------ @@ -41,9 +41,6 @@ proc checkTop*( ## If `proofMode` is set `false`, the Merkle hashes are recompiled and must ## match. ## - ## * The hash table `kMap[]` and its inverse lookup table `pAmk[]` must - ## correnspond. - ## if proofMode: ? db.checkTopProofMode() else: diff --git a/nimbus/db/aristo/aristo_check/check_be.nim b/nimbus/db/aristo/aristo_check/check_be.nim index ea9e2f3c3..5c90162b9 100644 --- a/nimbus/db/aristo/aristo_check/check_be.nim +++ b/nimbus/db/aristo/aristo_check/check_be.nim @@ -16,7 +16,7 @@ import stew/interval_set, ../../aristo, ../aristo_walk/persistent, - ".."/[aristo_desc, aristo_get, aristo_layers, aristo_vid] + ".."/[aristo_desc, aristo_get, aristo_layers] const Vid2 = @[VertexID(LEAST_FREE_VID)].toHashSet @@ -76,6 +76,17 @@ proc toNodeBE( return ok node return err(vid) +proc vidReorgAlways(vGen: seq[VertexID]): seq[VertexID] = + ## See `vidReorg()`, this one always sorts and optimises + ## + if 1 < vGen.len: + let lst = vGen.mapIt(uint64(it)).sorted(Descending).mapIt(VertexID(it)) + for n in 0 .. lst.len-2: + if lst[n].uint64 != lst[n+1].uint64 + 1: + return lst[n+1 .. lst.len-1] & @[lst[n]] + return @[lst[^1]] + vGen + # ------------------------------------------------------------------------------ # Public functions # ------------------------------------------------------------------------------ @@ -134,7 +145,7 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef]( let vGen = block: let rc = db.getIdgBE() if rc.isOk: - rc.value.toHashSet + rc.value.vidReorgAlways.toHashSet elif rc.error == GetIdgNotFound: EmptyVidSeq.toHashSet else: @@ -149,7 +160,7 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef]( # Check top layer cache against backend if cache: - if db.dirty: + if 0 < db.dirty.len: return err((VertexID(0),CheckBeCacheIsDirty)) # Check structural table @@ -202,7 +213,7 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef]( # Check vGen let - vGen = db.vGen.vidReorg.toHashSet + vGen = db.vGen.vidReorgAlways.toHashSet vGenExpected = vids.to(HashSet[VertexID]) delta = vGenExpected -+- vGen # symmetric difference if 0 < delta.len: @@ -214,8 +225,7 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef]( discard else: let delta = delta.toSeq - if delta.len != 1 or - delta[0] != VertexID(1) or VertexID(1) in vGen: + if delta.len != 1 or delta[0] != VertexID(1) or VertexID(1) in vGen: return err((delta.sorted[^1],CheckBeCacheGarbledVGen)) ok() diff --git a/nimbus/db/aristo/aristo_check/check_top.nim b/nimbus/db/aristo/aristo_check/check_top.nim index 7a07362b9..af721cf36 100644 --- a/nimbus/db/aristo/aristo_check/check_top.nim +++ b/nimbus/db/aristo/aristo_check/check_top.nim @@ -42,13 +42,7 @@ proc checkTopStrict*( if key != node.digestTo(HashKey): return err((vid,CheckStkVtxKeyMismatch)) - let revVids = db.layersGetYekOrVoid key - if not revVids.isValid: - return err((vid,CheckStkRevKeyMissing)) - if vid notin revVids: - return err((vid,CheckStkRevKeyMismatch)) - - elif not db.dirty or db.layersGetKey(vid).isErr: + elif db.dirty.len == 0 or db.layersGetKey(vid).isErr: # So `vtx` exists but not `key`, so cache is supposed dirty and the # vertex has a zero entry. return err((vid,CheckStkVtxKeyMissing)) @@ -61,14 +55,6 @@ proc checkTopStrict*( if not db.getVtx(vid).isValid: return err((vid,CheckStkKeyStrayZeroEntry)) - let - pAmkVtxCount = db.layersWalkYek.toSeq.mapIt(it[1]).foldl(a + b.len, 0) - sTabVtxCount = db.layersWalkVtx.toSeq.mapIt(it[1]).filterIt(it.isValid).len - - # Non-zero values mist sum up the same - if pAmkVtxCount + zeroKeys.len < sTabVtxCount: - return err((VertexID(0),CheckStkVtxCountMismatch)) - ok() @@ -87,12 +73,6 @@ proc checkTopProofMode*( return err((vid,CheckRlxVtxKeyMissing)) if key != node.digestTo(HashKey): return err((vid,CheckRlxVtxKeyMismatch)) - - let revVids = db.layersGetYekOrVoid key - if not revVids.isValid: - return err((vid,CheckRlxRevKeyMissing)) - if vid notin revVids: - return err((vid,CheckRlxRevKeyMismatch)) else: for (vid,key) in db.layersWalkKey: if key.isValid: # Otherwise to be deleted @@ -102,14 +82,9 @@ proc checkTopProofMode*( continue if key != node.digestTo(HashKey): return err((vid,CheckRlxVtxKeyMismatch)) - - let revVids = db.layersGetYekOrVoid key - if not revVids.isValid: - return err((vid,CheckRlxRevKeyMissing)) - if vid notin revVids: - return err((vid,CheckRlxRevKeyMismatch)) ok() + proc checkTopCommon*( db: AristoDbRef; # Database, top layer ): Result[void,(VertexID,AristoError)] = @@ -150,18 +125,6 @@ proc checkTopCommon*( if kMapNilCount != 0 and kMapNilCount < nNilVtx: return err((VertexID(0),CheckAnyVtxEmptyKeyMismatch)) - let pAmkVtxCount = db.layersWalkYek.toSeq.mapIt(it[1]).foldl(a + b.len, 0) - if pAmkVtxCount != kMapCount: - var knownKeys: HashSet[VertexID] - for (key,vids) in db.layersWalkYek: - for vid in vids: - if db.layersGetKey(vid).isErr: - return err((vid,CheckAnyRevVtxMissing)) - if vid in knownKeys: - return err((vid,CheckAnyRevVtxDup)) - knownKeys.incl vid - return err((VertexID(0),CheckAnyRevCountMismatch)) # should not apply(!) - for vid in db.pPrf: if db.layersGetKey(vid).isErr: return err((vid,CheckAnyVtxLockWithoutKey)) diff --git a/nimbus/db/aristo/aristo_constants.nim b/nimbus/db/aristo/aristo_constants.nim index 48903ef78..d143979ad 100644 --- a/nimbus/db/aristo/aristo_constants.nim +++ b/nimbus/db/aristo/aristo_constants.nim @@ -29,7 +29,7 @@ const ## Useful shortcut VOID_CODE_HASH* = EMPTY_CODE_HASH - ## Equivalent of `nil` for `Account` object code hash + ## Equivalent of `nil` for `Account` object code hash field VOID_HASH_KEY* = HashKey() ## Void equivalent for Merkle hash value diff --git a/nimbus/db/aristo/aristo_debug.nim b/nimbus/db/aristo/aristo_debug.nim index f45615ffc..de04591ee 100644 --- a/nimbus/db/aristo/aristo_debug.nim +++ b/nimbus/db/aristo/aristo_debug.nim @@ -14,7 +14,7 @@ import std/[algorithm, sequtils, sets, strutils, tables], eth/[common, trie/nibbles], results, - stew/byteutils, + stew/[byteutils, interval_set], ./aristo_desc/desc_backend, ./aristo_init/[memory_db, memory_only, rocks_db], ./aristo_filter/filter_scheduler, @@ -27,20 +27,11 @@ import proc orDefault(db: AristoDbRef): AristoDbRef = if db.isNil: AristoDbRef(top: LayerRef.init()) else: db -proc del(xMap: var VidsByKeyTab; key: HashKey; vid: VertexID) = - # Update `xMap` - var vidsLen = -1 - xMap.withValue(key,value): - value[].excl vid - vidsLen = value[].len - if vidsLen == 0: - xMap.del key - -proc del(xMap: var VidsByKeyTab; key: HashKey; vids: HashSet[VertexID]) = - for vid in vids: - xMap.del(key, vid) - -proc add(xMap: var VidsByKeyTab; key: HashKey; vid: VertexID) = +proc add( + xMap: var Table[HashKey,HashSet[VertexID]]; + key: HashKey; + vid: VertexID; + ) = xMap.withValue(key,value): value[].incl vid do: # else if not found @@ -54,18 +45,11 @@ proc toHex(w: VertexID): string = proc toHexLsb(w: int8): string = $"0123456789abcdef"[w and 15] -proc sortedKeys(lTab: Table[LeafTie,VertexID]): seq[LeafTie] = - lTab.keys.toSeq.sorted(cmp = proc(a,b: LeafTie): int = cmp(a,b)) - proc sortedKeys[T](tab: Table[VertexID,T]): seq[VertexID] = - tab.keys.toSeq.mapIt(it.uint64).sorted.mapIt(it.VertexID) + tab.keys.toSeq.sorted proc sortedKeys(pPrf: HashSet[VertexID]): seq[VertexID] = - pPrf.toSeq.mapIt(it.uint64).sorted.mapIt(it.VertexID) - -proc sortedKeys[T](pAmk: Table[HashKey,T]): seq[HashKey] = - pAmk.keys.toSeq.sorted cmp - + pPrf.toSeq.sorted proc toPfx(indent: int; offset = 0): string = if 0 < indent+offset: "\n" & " ".repeat(indent+offset) else: "" @@ -106,10 +90,10 @@ proc stripZeros(a: string; toExp = false): string = proc vidCode(key: HashKey, db: AristoDbRef): uint64 = if key.isValid: block: - let vids = db.layersGetYekOrVoid key - if vids.isValid: - db.xMap.del(key, vids) - return vids.sortedKeys[0].uint64 + let vid = db.layerGetProofVidOrVoid key + if vid.isValid: + db.xMap.add(key, vid) + return vid.uint64 block: let vids = db.xMap.getOrVoid key if vids.isValid: @@ -123,13 +107,11 @@ proc ppKeyOk( vid: VertexID; ): string = if key.isValid and vid.isValid: - let - vids = db.layersGetYekOrVoid key - if vids.isValid: - db.xMap.del(key, vids) - if vid notin vids: - result = "(!)" - return + block: + let vid = db.layerGetProofVidOrVoid key + if vid.isValid: + db.xMap.add(key, vid) + return block: let vids = db.xMap.getOrVoid key if vids.isValid: @@ -195,16 +177,13 @@ proc ppQid(qid: QueueID): string = proc ppVidList(vGen: openArray[VertexID]): string = "[" & vGen.mapIt(it.ppVid).join(",") & "]" -#proc ppVidList(vGen: HashSet[VertexID]): string = -# "{" & vGen.sortedKeys.mapIt(it.ppVid).join(",") & "}" - proc ppKey(key: HashKey; db: AristoDbRef; pfx = true): string = proc getVids(): tuple[vids: HashSet[VertexID], xMapTag: string] = block: - let vids = db.layersGetYekOrVoid key - if vids.isValid: - db.xMap.del(key, vids) - return (vids, "") + let vid = db.layerGetProofVidOrVoid key + if vid.isValid: + db.xMap.add(key, vid) + return (@[vid].toHashSet, "") block: let vids = db.xMap.getOrVoid key if vids.isValid: @@ -290,23 +269,24 @@ proc ppSTab( .mapIt("(" & it[0].ppVid & "," & it[1].ppVtx(db,it[0]) & ")") .join(indent.toPfx(1)) & "}" -proc ppLTab( - lTab: Table[LeafTie,VertexID]; - db: AristoDbRef; - indent = 4; - ): string = - "{" & lTab.sortedKeys - .mapIt((it, lTab.getOrVoid it)) - .mapIt("(" & it[0].ppLeafTie(db) & "," & it[1].ppVid & ")") - .join(indent.toPfx(1)) & "}" - proc ppPPrf(pPrf: HashSet[VertexID]): string = - "{" & pPrf.sortedKeys.mapIt(it.ppVid).join(",") & "}" + result = "{" + if 0 < pPrf.len: + let isr = IntervalSetRef[VertexID,uint64].init() + for w in pPrf: + doAssert isr.merge(w,w) == 1 + for iv in isr.increasing(): + result &= iv.minPt.ppVid + if 1 < iv.len: + result &= ".. " & iv.maxPt.ppVid + result &= ", " + result.setlen(result.len - 2) + #result &= pPrf.sortedKeys.mapIt(it.ppVid).join(",") + result &= "}" proc ppXMap*( db: AristoDbRef; kMap: Table[VertexID,HashKey]; - pAmk: VidsByKeyTab; indent: int; ): string = @@ -317,7 +297,7 @@ proc ppXMap*( oops: HashSet[VertexID] block: var vids: HashSet[VertexID] - for w in pAmk.values: + for w in db.xMap.values: for v in w: if v in vids: oops.incl v @@ -328,18 +308,17 @@ proc ppXMap*( # Vertex IDs without forward mapping `kMap: VertexID -> HashKey` var revOnly: Table[VertexID,HashKey] - for (key,vids) in pAmk.pairs: + for (key,vids) in db.xMap.pairs: for vid in vids: if not kMap.hasKey vid: revOnly[vid] = key - let revKeys = - revOnly.keys.toSeq.mapIt(it.uint64).sorted.mapIt(it.VertexID) + let revKeys =revOnly.keys.toSeq.sorted proc ppNtry(n: uint64): string = var s = VertexID(n).ppVid let key = kMap.getOrVoid VertexID(n) if key.isValid: - let vids = pAmk.getOrVoid key + let vids = db.xMap.getOrVoid key if VertexID(n) notin vids or 1 < vids.len: s = "(" & s & "," & key.ppKey(db) elif key.len < 32: @@ -382,7 +361,7 @@ proc ppXMap*( let key = kMap.getOrVoid vid if key.isValid: cache.add (vid.uint64, key.vidCode(db), vid in multi) - let vids = pAmk.getOrVoid key + let vids = db.xMap.getOrVoid key if (0 < vids.len and vid notin vids) or key.len < 32: cache[^1][2] = true else: @@ -417,6 +396,16 @@ proc ppXMap*( else: result &= "}" +proc ppFRpp( + fRpp: Table[HashKey,VertexID]; + db: AristoDbRef; + indent = 4; + ): string = + let + xMap = fRpp.pairs.toSeq.mapIt((it[1],it[0])).toTable + xStr = db.ppXMap(xMap, indent) + "<" & xStr[1..^2] & ">" + proc ppFilter( fl: FilterRef; db: AristoDbRef; @@ -479,15 +468,15 @@ proc ppLayer( db: AristoDbRef; vGenOk: bool; sTabOk: bool; - lTabOk: bool; kMapOk: bool; pPrfOk: bool; + fRppOk: bool; indent = 4; ): string = let pfx1 = indent.toPfx(1) pfx2 = indent.toPfx(2) - nOKs = sTabOk.ord + lTabOk.ord + kMapOk.ord + pPrfOk.ord + vGenOk.ord + nOKs = vGenOk.ord + sTabOk.ord + kMapOk.ord + pPrfOk.ord + fRppOk.ord tagOk = 1 < nOKs var pfy = "" @@ -515,27 +504,28 @@ proc ppLayer( tLen = layer.delta.sTab.len info = "sTab(" & $tLen & ")" result &= info.doPrefix(0 < tLen) & layer.delta.sTab.ppSTab(db,indent+2) - if lTabOk: - let - tLen = layer.final.lTab.len - info = "lTab(" & $tLen & ")" - result &= info.doPrefix(0 < tLen) & layer.final.lTab.ppLTab(db,indent+2) if kMapOk: let tLen = layer.delta.kMap.len - uLen = layer.delta.pAmk.len + uLen = db.xMap.len lInf = if tLen == uLen: $tLen else: $tLen & "," & $uLen info = "kMap(" & lInf & ")" result &= info.doPrefix(0 < tLen + uLen) - result &= db.ppXMap(layer.delta.kMap, layer.delta.pAmk, indent+2) + result &= db.ppXMap(layer.delta.kMap, indent+2) if pPrfOk: let tLen = layer.final.pPrf.len info = "pPrf(" & $tLen & ")" result &= info.doPrefix(0 < tLen) & layer.final.pPrf.ppPPrf + if fRppOk: + let + tLen = layer.final.fRpp.len + info = "fRpp(" & $tLen & ")" + result &= info.doPrefix(0 < tLen) & layer.final.fRpp.ppFRpp(db,indent+2) if 0 < nOKs: let - info = if layer.final.dirty: "dirty" else: "clean" + info = if layer.final.dirty.len == 0: "clean" + else: "dirty{" & layer.final.dirty.ppVids & "}" result &= info.doPrefix(false) # ------------------------------------------------------------------------------ @@ -637,13 +627,6 @@ proc pp*( ): string = sTab.ppSTab(db.orDefault) -proc pp*( - lTab: Table[LeafTie,VertexID]; - db = AristoDbRef(nil); - indent = 4; - ): string = - lTab.ppLTab(db.orDefault, indent) - proc pp*(pPrf: HashSet[VertexID]): string = pPrf.ppPPrf @@ -654,7 +637,7 @@ proc pp*(leg: Leg; db = AristoDbRef(nil)): string = let key = db.layersGetKeyOrVoid leg.wp.vid if not key.isValid: result &= "ø" - elif leg.wp.vid notin db.layersGetYekOrVoid key: + elif leg.wp.vid notin db.xMap.getOrVoid key: result &= key.ppKey(db) result &= "," if 0 <= leg.nibble: @@ -683,25 +666,7 @@ proc pp*(kMap: Table[VertexID,HashKey]; indent = 4): string = .join("," & indent.toPfx(1)) & "}" proc pp*(kMap: Table[VertexID,HashKey]; db: AristoDbRef; indent = 4): string = - db.ppXMap(kMap, db.layersCc.delta.pAmk, indent) - -proc pp*( - pAmk: Table[HashKey,VertexID]; - db = AristoDbRef(nil); - indent = 4; - ): string = - let db = db.orDefault - "{" & pAmk.sortedKeys - .mapIt((it, pAmk.getOrVoid it)) - .mapIt("(" & it[0].ppKey(db) & "," & it[1].ppVid & ")") - .join("," & indent.toPfx(1)) & "}" - -proc pp*(pAmk: VidsByKeyTab; db = AristoDbRef(nil); indent = 4): string = - let db = db.orDefault - "{" & pAmk.sortedKeys - .mapIt((it, pAmk.getOrVoid it)) - .mapIt("(" & it[0].ppKey(db) & "," & it[1].ppVids & ")") - .join("," & indent.toPfx(1)) & "}" + db.ppXMap(kMap, indent) # --------------------- @@ -721,7 +686,7 @@ proc pp*( indent = 4; ): string = layer.ppLayer( - db, vGenOk=true, sTabOk=true, lTabOk=true, kMapOk=true, pPrfOk=true) + db, vGenOk=true, sTabOk=true, kMapOk=true, pPrfOk=true, fRppOk=true) proc pp*( layer: LayerRef; @@ -730,7 +695,7 @@ proc pp*( indent = 4; ): string = layer.ppLayer( - db, vGenOk=true, sTabOk=xTabOk, lTabOk=xTabOk, kMapOk=true, pPrfOk=true) + db, vGenOk=true, sTabOk=xTabOk, kMapOk=true, pPrfOk=true, fRppOk=true) proc pp*( layer: LayerRef; @@ -741,7 +706,7 @@ proc pp*( indent = 4; ): string = layer.ppLayer( - db, vGenOk=other, sTabOk=xTabOk, lTabOk=xTabOk, kMapOk=kMapOk, pPrfOk=other) + db, vGenOk=other, sTabOk=xTabOk, kMapOk=kMapOk, pPrfOk=other, fRppOk=other) proc pp*( @@ -798,13 +763,9 @@ proc pp*( m = layers.len - n - 1 l = db.layersCc m a = w.delta.kMap.values.toSeq.filterIt(not it.isValid).len - b = w.delta.pAmk.values.toSeq.filterIt(not it.isValid).len c = l.delta.kMap.values.toSeq.filterIt(not it.isValid).len - d = l.delta.pAmk.values.toSeq.filterIt(not it.isValid).len result &= " (" & $(w.delta.kMap.len - a) & "," & $a - result &= ";" & $(w.delta.pAmk.len - b) & "," & $b & ")" lStr &= " " & $m & "=(" & $(l.delta.kMap.len - c) & "," & $c - lStr &= ";" & $(l.delta.pAmk.len - d) & "," & $d & ")" result &= " --" & lStr result &= indent.toPfx if backendOk: diff --git a/nimbus/db/aristo/aristo_delete.nim b/nimbus/db/aristo/aristo_delete.nim index 7c4e58993..50ba00b59 100644 --- a/nimbus/db/aristo/aristo_delete.nim +++ b/nimbus/db/aristo/aristo_delete.nim @@ -16,7 +16,7 @@ {.push raises: [].} import - std/[sets, tables, typetraits], + std/[sets, typetraits], chronicles, eth/[common, trie/nibbles], results, @@ -62,20 +62,14 @@ proc branchStillNeeded(vtx: VertexRef): Result[int,void] = # ----------- -proc nullifyKey( - db: AristoDbRef; # Database, top layer - vid: VertexID; # Vertex IDs to clear - ) = - # Register for void hash (to be recompiled) - db.layersResKey vid - proc disposeOfVtx( db: AristoDbRef; # Database, top layer + root: VertexID; vid: VertexID; # Vertex IDs to clear ) = # Remove entry - db.layersResVtx vid - db.layersResKey vid + db.layersResVtx(root, vid) + db.layersResKey(root, vid) db.vidDispose vid # Recycle ID # ------------------------------------------------------------------------------ @@ -118,7 +112,7 @@ proc collapseBranch( of Extension: # (2) # Merge `br` into ^3 (update `xt`) - db.disposeOfVtx xt.vid + db.disposeOfVtx(hike.root, xt.vid) xt.vid = par.vid xt.vtx.ePfx = par.vtx.ePfx & xt.vtx.ePfx @@ -129,7 +123,7 @@ proc collapseBranch( # Replace `br` (use `xt` as-is) discard - db.layersPutVtx(xt.vid, xt.vtx) + db.layersPutVtx(hike.root, xt.vid, xt.vtx) ok() @@ -157,7 +151,7 @@ proc collapseExt( vType: Extension, ePfx: @[nibble].initNibbleRange.slice(1) & vtx.ePfx, eVid: vtx.eVid)) - db.disposeOfVtx br.vtx.bVid[nibble] # `vtx` is obsolete now + db.disposeOfVtx(hike.root, br.vtx.bVid[nibble]) # `vtx` is obsolete now if 2 < hike.legs.len: # (1) or (2) let par = hike.legs[^3].wp @@ -168,7 +162,7 @@ proc collapseExt( of Extension: # (2) # Replace ^3 by `^3 & ^2 & vtx` (update `xt`) - db.disposeOfVtx xt.vid + db.disposeOfVtx(hike.root, xt.vid) xt.vid = par.vid xt.vtx.ePfx = par.vtx.ePfx & xt.vtx.ePfx @@ -179,7 +173,7 @@ proc collapseExt( # Replace ^2 by `^2 & vtx` (use `xt` as-is) discard - db.layersPutVtx(xt.vid, xt.vtx) + db.layersPutVtx(hike.root, xt.vid, xt.vtx) ok() @@ -210,30 +204,29 @@ proc collapseLeaf( vType: Leaf, lPfx: @[nibble].initNibbleRange.slice(1) & vtx.lPfx, lData: vtx.lData)) - db.nullifyKey lf.vid # `vtx` was modified + db.layersResKey(hike.root, lf.vid) # `vtx` was modified if 2 < hike.legs.len: # (1), (2), or (3) - db.disposeOfVtx br.vid # `br` is obsolete now + db.disposeOfVtx(hike.root, br.vid) # `br` is obsolete now # Merge `br` into the leaf `vtx` and unlink `br`. let par = hike.legs[^3].wp.dup # Writable vertex case par.vtx.vType: of Branch: # (1) # Replace `vtx` by `^2 & vtx` (use `lf` as-is) par.vtx.bVid[hike.legs[^3].nibble] = lf.vid - db.layersPutVtx(par.vid, par.vtx) - db.layersPutVtx(lf.vid, lf.vtx) + db.layersPutVtx(hike.root, par.vid, par.vtx) + db.layersPutVtx(hike.root, lf.vid, lf.vtx) # Make sure that there is a cache enty in case the leaf was pulled from # the backend. let lfPath = hike.legsTo(hike.legs.len - 2, NibblesSeq) & lf.vtx.lPfx tag = lfPath.pathToTag.valueOr: return err((lf.vid,error)) - db.top.final.lTab[LeafTie(root: hike.root, path: tag)] = lf.vid return ok() of Extension: # (2) or (3) # Merge `^3` into `lf` but keep the leaf vertex ID unchanged. This - # avoids some `lTab[]` registry update. + # can avoid some extra updates. lf.vtx.lPfx = par.vtx.ePfx & lf.vtx.lPfx if 3 < hike.legs.len: # (2) @@ -241,21 +234,20 @@ proc collapseLeaf( let gpr = hike.legs[^4].wp.dup # Writable vertex if gpr.vtx.vType != Branch: return err((gpr.vid,DelBranchExpexted)) - db.disposeOfVtx par.vid # `par` is obsolete now + db.disposeOfVtx(hike.root, par.vid) # `par` is obsolete now gpr.vtx.bVid[hike.legs[^4].nibble] = lf.vid - db.layersPutVtx(gpr.vid, gpr.vtx) - db.layersPutVtx(lf.vid, lf.vtx) + db.layersPutVtx(hike.root, gpr.vid, gpr.vtx) + db.layersPutVtx(hike.root, lf.vid, lf.vtx) # Make sure that there is a cache enty in case the leaf was pulled from # the backend. let lfPath = hike.legsTo(hike.legs.len - 3, NibblesSeq) & lf.vtx.lPfx tag = lfPath.pathToTag.valueOr: return err((lf.vid,error)) - db.top.final.lTab[LeafTie(root: hike.root, path: tag)] = lf.vid return ok() # No grandparent, so ^3 is root vertex # (3) - db.layersPutVtx(par.vid, lf.vtx) + db.layersPutVtx(hike.root, par.vid, lf.vtx) # Continue below of Leaf: @@ -263,39 +255,18 @@ proc collapseLeaf( else: # (4) # Replace ^2 by `^2 & vtx` (use `lf` as-is) # `br` is root vertex - db.nullifyKey br.vid # root was changed - db.layersPutVtx(br.vid, lf.vtx) + db.layersResKey(hike.root, br.vid) # root was changed + db.layersPutVtx(hike.root, br.vid, lf.vtx) # Continue below - # Common part for setting up `lf` as root vertex # Rest of (3) or (4) - let rc = lf.vtx.lPfx.pathToTag - if rc.isErr: - return err((br.vid,rc.error)) - # - # No need to update the cache unless `lf` is present there. The leaf path - # as well as the value associated with the leaf path has not been changed. - let lfTie = LeafTie(root: hike.root, path: rc.value) - if db.lTab.hasKey lfTie: - db.top.final.lTab[lfTie] = lf.vid - # Clean up stale leaf vertex which has moved to root position - db.disposeOfVtx lf.vid - - # If some `Leaf` vertex was installed as root, there must be a an extra - # `LeafTie` lookup entry. - let rootVtx = db.getVtx hike.root - if rootVtx.isValid and - rootVtx != hike.legs[0].wp.vtx and - rootVtx.vType == Leaf: - let tag = rootVtx.lPfx.pathToTag.valueOr: - return err((hike.root,error)) - db.top.final.lTab[LeafTie(root: hike.root, path: tag)] = hike.root + db.disposeOfVtx(hike.root, lf.vid) ok() # ------------------------- -proc delSubTree( +proc delSubTreeImpl( db: AristoDbRef; # Database, top layer root: VertexID; # Root vertex accPath: PathID; # Needed for real storage tries @@ -330,7 +301,10 @@ proc delSubTree( # Mark nodes deleted for vid in dispose: - db.disposeOfVtx vid + db.disposeOfVtx(root, vid) + + # Squeze list of recycled vertex IDs + db.top.final.vGen = db.vGen.vidReorg() ok() @@ -361,7 +335,7 @@ proc deleteImpl( if vid.isValid and db.getVtx(vid).isValid: return err((vid,DelDanglingStoTrie)) - db.disposeOfVtx lf.vid + db.disposeOfVtx(hike.root, lf.vid) if 1 < hike.legs.len: # Get current `Branch` vertex `br` @@ -374,14 +348,14 @@ proc deleteImpl( # Unlink child vertex from structural table br.vtx.bVid[hike.legs[^2].nibble] = VertexID(0) - db.layersPutVtx(br.vid, br.vtx) + db.layersPutVtx(hike.root, br.vid, br.vtx) # Clear all keys up to the root key for n in 0 .. hike.legs.len - 2: let vid = hike.legs[n].wp.vid if vid in db.top.final.pPrf: return err((vid, DelBranchLocked)) - db.nullifyKey vid + db.layersResKey(hike.root, vid) let nibble = block: let rc = br.vtx.branchStillNeeded() @@ -407,9 +381,8 @@ proc deleteImpl( of Leaf: ? db.collapseLeaf(hike, nibble.byte, nxt.vtx) - # Make sure that there is a cache entry so the hasher can label this path - # at a later state. - db.top.final.lTab[lty] = VertexID(0) + # Squeze list of recycled vertex IDs + db.top.final.vGen = db.vGen.vidReorg() ok(not db.getVtx(hike.root).isValid) @@ -432,7 +405,7 @@ proc delete*( ## case, an account must exists. If there is payload of type `AccountData`, ## its `storageID` field must be unset or equal to the `hike.root` vertex ID. ## - db.delSubTree(root, accPath) + db.delSubTreeImpl(root, accPath) proc delete*( db: AristoDbRef; # Database, top layer @@ -450,7 +423,6 @@ proc delete*( ## ## The return code is `true` iff the trie has become empty. ## - # Need path in order to remove it from `lTab[]` let lty = LeafTie( root: hike.root, path: ? hike.to(NibblesSeq).pathToTag().mapErr toVae) diff --git a/nimbus/db/aristo/aristo_desc.nim b/nimbus/db/aristo/aristo_desc.nim index 78dfb7b7b..0d2c14e5d 100644 --- a/nimbus/db/aristo/aristo_desc.nim +++ b/nimbus/db/aristo/aristo_desc.nim @@ -78,7 +78,7 @@ type dudes: DudesRef ## Related DB descriptors # Debugging data below, might go away in future - xMap*: VidsByKeyTab ## For pretty printing, extends `pAmk` + xMap*: Table[HashKey,HashSet[VertexID]] ## For pretty printing/debugging AristoDbAction* = proc(db: AristoDbRef) {.gcsafe, raises: [].} ## Generic call back function/closure. @@ -123,10 +123,8 @@ func isValid*(root: Hash256): bool = root != EMPTY_ROOT_HASH func isValid*(key: HashKey): bool = - if key.len == 32: - key.to(Hash256).isValid - else: - 0 < key.len + assert key.len != 32 or key.to(Hash256).isValid + 0 < key.len func isValid*(vid: VertexID): bool = vid != VertexID(0) diff --git a/nimbus/db/aristo/aristo_desc/desc_error.nim b/nimbus/db/aristo/aristo_desc/desc_error.nim index 7285f533a..22edda412 100644 --- a/nimbus/db/aristo/aristo_desc/desc_error.nim +++ b/nimbus/db/aristo/aristo_desc/desc_error.nim @@ -98,15 +98,17 @@ type MergeAssemblyFailed # Ooops, internal error MergeHashKeyInvalid - MergeHashKeyCachedAlready MergeHashKeyDiffersFromCached MergeHashKeyRevLookUpGarbled MergeRootVidInvalid MergeRootKeyInvalid + MergeProofInitMissing MergeRevVidMustHaveBeenCached MergeNodeVtxDiffersFromExisting MergeRootKeyDiffersForVid MergeNodeVtxDuplicates + MergeRootKeyMissing + MergeRootArgsIncomplete # Utils UtilsAccPathMissing @@ -116,16 +118,12 @@ type UtilsStoRootMissing # Update `Merkle` hashes `hashify()` - HashifyExistingHashMismatch - HashifyNodeUnresolved - HashifyRootHashMismatch - HashifyRootNodeUnresolved + HashifyVtxUnresolved + HashifyRootVtxUnresolved + HashifyProofHashMismatch # Cache checker `checkCache()` CheckStkKeyStrayZeroEntry - CheckStkRevKeyMismatch - CheckStkRevKeyMissing - CheckStkVtxCountMismatch CheckStkVtxIncomplete CheckStkVtxKeyMismatch CheckStkVtxKeyMissing @@ -134,8 +132,6 @@ type CheckRlxVtxIncomplete CheckRlxVtxKeyMissing CheckRlxVtxKeyMismatch - CheckRlxRevKeyMissing - CheckRlxRevKeyMismatch CheckAnyVtxEmptyKeyMissing CheckAnyVtxEmptyKeyExpected @@ -143,9 +139,6 @@ type CheckAnyVtxBranchLinksMissing CheckAnyVtxExtPfxMissing CheckAnyVtxLockWithoutKey - CheckAnyRevVtxMissing - CheckAnyRevVtxDup - CheckAnyRevCountMismatch # Backend structural check `checkBE()` CheckBeVtxInvalid diff --git a/nimbus/db/aristo/aristo_desc/desc_identifiers.nim b/nimbus/db/aristo/aristo_desc/desc_identifiers.nim index cd12f5806..fff90a79f 100644 --- a/nimbus/db/aristo/aristo_desc/desc_identifiers.nim +++ b/nimbus/db/aristo/aristo_desc/desc_identifiers.nim @@ -15,7 +15,7 @@ {.push raises: [].} import - std/[sequtils, strutils, hashes], + std/[algorithm, sequtils, sets, strutils, hashes], eth/[common, trie/nibbles], stew/byteutils, chronicles, @@ -101,7 +101,10 @@ func `<`*(a, b: VertexID): bool {.borrow.} func `<=`*(a, b: VertexID): bool {.borrow.} func `==`*(a, b: VertexID): bool {.borrow.} func cmp*(a, b: VertexID): int {.borrow.} -func `$`*(a: VertexID): string {.borrow.} + +func `$`*(vid: VertexID): string = + "$" & (if vid == VertexID(0): "ø" + else: vid.uint64.toHex.strip(trailing=false,chars={'0'}).toLowerAscii) func `==`*(a: VertexID; b: static[uint]): bool = (a == VertexID(b)) @@ -323,7 +326,10 @@ func to*(lid: HashKey; T: type Hash256): T = func to*(key: Hash256; T: type HashKey): T = ## This is an efficient version of `HashKey.fromBytes(key.data).value`, not ## to be confused with `digestTo(HashKey)`. - T(isHash: true, key: key) + if key == EMPTY_ROOT_HASH: + T() + else: + T(isHash: true, key: key) func to*(n: SomeUnsignedInt|UInt256; T: type PathID): T = ## Representation of a scalar as `PathID` (preserving full information) @@ -374,6 +380,16 @@ func hash*(a: HashKey): Hash = # Miscellaneous helpers # ------------------------------------------------------------------------------ +func `$`*(vids: seq[VertexID]): string = + "[" & vids.toSeq.mapIt( + "$" & it.uint64.toHex.strip(trailing=false,chars={'0'}) + ).join(",") & "]" + +func `$`*(vids: HashSet[VertexID]): string = + "{" & vids.toSeq.sorted.mapIt( + "$" & it.uint64.toHex.strip(trailing=false,chars={'0'}) + ).join(",") & "}" + func `$`*(key: Hash256): string = let w = UInt256.fromBytesBE key.data if w == high(UInt256): diff --git a/nimbus/db/aristo/aristo_desc/desc_structural.nim b/nimbus/db/aristo/aristo_desc/desc_structural.nim index a161043d4..cb0bf6d65 100644 --- a/nimbus/db/aristo/aristo_desc/desc_structural.nim +++ b/nimbus/db/aristo/aristo_desc/desc_structural.nim @@ -76,9 +76,6 @@ type kMap*: Table[VertexID,HashKey] ## Filter Merkle hash key mapping vGen*: seq[VertexID] ## Filter unique vertex ID generator - VidsByKeyTab* = Table[HashKey,HashSet[VertexID]] - ## Reverse lookup searching `VertexID` by the hash key. - LayerDeltaRef* = ref object ## Delta layers are stacked implying a tables hierarchy. Table entries on ## a higher level take precedence over lower layer table entries. So an @@ -104,7 +101,6 @@ type ## sTab*: Table[VertexID,VertexRef] ## Structural vertex table kMap*: Table[VertexID,HashKey] ## Merkle hash key mapping - pAmk*: VidsByKeyTab ## Reverse `kMap` entries, hash key lookup LayerFinalRef* = ref object ## Final tables fully supersede tables on lower layers when stacked as a @@ -114,10 +110,10 @@ type ## These structures are used for tables which are typically smaller then ## the ones on the `LayerDelta` object. ## - lTab*: Table[LeafTie,VertexID] ## Access path to leaf vertex pPrf*: HashSet[VertexID] ## Locked vertices (proof nodes) - vGen*: seq[VertexID] ## Unique vertex ID generator - dirty*: bool ## Needs to be hashified if `true` + fRpp*: Table[HashKey,VertexID] ## Key lookup for `pPrf[]` (proof nodes) + vGen*: seq[VertexID] ## Recycling state for vertex IDs + dirty*: HashSet[VertexID] ## Start nodes to re-hashiy from LayerRef* = ref LayerObj LayerObj* = object @@ -305,8 +301,8 @@ func dup*(node: NodeRef): NodeRef = func dup*(final: LayerFinalRef): LayerFinalRef = ## Duplicate final layer. LayerFinalRef( - lTab: final.lTab, pPrf: final.pPrf, + fRpp: final.fRpp, vGen: final.vGen, dirty: final.dirty) diff --git a/nimbus/db/aristo/aristo_filter/filter_helpers.nim b/nimbus/db/aristo/aristo_filter/filter_helpers.nim index 8295acc68..eab431534 100644 --- a/nimbus/db/aristo/aristo_filter/filter_helpers.nim +++ b/nimbus/db/aristo/aristo_filter/filter_helpers.nim @@ -9,7 +9,7 @@ # except according to those terms. import - std/[sets, tables], + std/tables, eth/common, results, ".."/[aristo_desc, aristo_desc/desc_backend, aristo_get], @@ -68,13 +68,12 @@ proc getLayerStateRoots*( return ok(spr) if chunkedMpt: - if VertexID(1) in delta.pAmk.getOrVoid sprBeKey: + if sprBeKey == delta.kMap.getOrVoid VertexID(1): spr.fg = spr.be return ok(spr) if delta.sTab.len == 0 and - delta.kMap.len == 0 and - delta.pAmk.len == 0: + delta.kMap.len == 0: return err(FilPrettyPointlessLayer) err(FilStateRootMismatch) diff --git a/nimbus/db/aristo/aristo_get.nim b/nimbus/db/aristo/aristo_get.nim index 27dda0dcb..ec8f5892e 100644 --- a/nimbus/db/aristo/aristo_get.nim +++ b/nimbus/db/aristo/aristo_get.nim @@ -106,36 +106,6 @@ proc getKeyBE*( # ------------------ -proc getLeaf*( - db: AristoDbRef; - lty: LeafTie; - ): Result[VidVtxPair,AristoError] = - ## Get the leaf path from the cache layers and look up the database for a - ## leaf node. - let vid = db.lTab.getOrVoid lty - if not vid.isValid: - return err(GetLeafNotFound) - - block body: - let vtx = db.layersGetVtx(vid).valueOr: - break body - if vtx.isValid: - return ok(VidVtxPair(vid: vid, vtx: vtx)) - - # The leaf node cannot be on the backend. It was produced by a `merge()` - # action. So this is a system problem. - err(GetLeafMissing) - -proc getLeafVtx*(db: AristoDbRef; lty: LeafTie): VertexRef = - ## Variant of `getLeaf()` returning `nil` on error (while ignoring the - ## detailed error type information.) - ## - let rc = db.getLeaf lty - if rc.isOk: - return rc.value.vtx - -# ------------------ - proc getVtxRc*(db: AristoDbRef; vid: VertexID): Result[VertexRef,AristoError] = ## Cascaded attempt to fetch a vertex from the cache layers or the backend. ## diff --git a/nimbus/db/aristo/aristo_hashify.nim b/nimbus/db/aristo/aristo_hashify.nim index 4ea89c0b0..c435037d8 100644 --- a/nimbus/db/aristo/aristo_hashify.nim +++ b/nimbus/db/aristo/aristo_hashify.nim @@ -12,74 +12,42 @@ ## ======================================== ## ## For the current state of the `Patricia Trie`, keys (equivalent to hashes) -## are associated with the vertex IDs. Existing key associations are checked -## (i.e. recalculated and compared) unless the ID is locked. In the latter -## case, the key is assumed to be correct without checking. +## are associated with the vertex IDs. Existing key associations are taken +## as-is/unchecked unless the ID is marked a proof node. In the latter case, +## the key is assumed to be correct after re-calculation. ## -## The folllowing properties are required from the top layer cache. +## The labelling algorithm works roughly as follows: ## -## * All recently (i.e. not saved to backend) added entries must have an -## `lTab[]` entry with `(root-vertex,path,leaf-vertex-ID)`. +## * Given a set of start or root vertices, build the forest (of trees) +## downwards towards leafs vertices so that none of these vertices has a +## Merkle hash label. ## -## * All recently (i.e. not saved to backend) deleted entries must have an -## `lTab[]` entry with `(root-vertex,path,VertexID(0))`. +## * Starting at the leaf vertices in width-first fashion, calculate the +## Merkle hashes and label the leaf vertices. Recursively work up labelling +## vertices up until the root nodes are reached. ## -## * All vertices where the key (aka Merkle hash) has changed must have a -## top layer cache `kMap[]` entry `(vertex-ID,VOID_HASH_KEY)` indicating -## that there is no key available for this vertex. This also applies for -## backend verices where the key has changed while the structural logic -## did not change. -## -## The association algorithm is an optimised version of: -## -## * For all leaf vertices which have all child links on the top layer cache -## where the node keys (aka hashes) can be compiled, proceed with the parent -## vertex. Note that a top layer cache vertex can only have a key on the top -## top layer cache (whereas a bachend b -## -## Apparently, keys (aka hashes) can be compiled for leaf vertices. The same -## holds for follow up vertices where the child keys were available, alteady. -## This process stops when a vertex has children on the backend or children -## lead to a chain not sorted, yet. -## -## * For the remaining vertex chains (where the process stopped) up to the root -## vertex, set up a width-first schedule starting at the vertex where the -## previous chain broke off and follow up to the root vertex. -## -## * Follow the width-first schedule fo labelling all vertices with a hash key. -## -## Note that there are some tweaks for `proof` nodes with incomplete tries and -## handling of possible stray vertices on the top layer cache left over from -## deletion processes. +## Note that there are some tweaks for `proof` node vertices which lead to +## incomplete trees in a way that the algoritm handles existing Merkle hash +## labels for missing vertices. ## {.push raises: [].} import - std/[sequtils, sets, tables], + std/[algorithm, sequtils, sets, tables], chronicles, eth/common, results, - "."/[aristo_desc, aristo_get, aristo_hike, aristo_layers, aristo_serialise, - aristo_utils, aristo_vid] + stew/byteutils, + "."/[aristo_desc, aristo_get, aristo_layers, aristo_serialise, aristo_utils] type - FollowUpVid = object - ## Link item: VertexID -> VertexID - root: VertexID ## Root vertex, might be void unless known - toVid: VertexID ## Valid next/follow up vertex - - BackVidTab = - Table[VertexID,FollowUpVid] - WidthFirstForest = object ## Collected width first search trees - completed: HashSet[VertexID] ## Top level, root targets reached - root: HashSet[VertexID] ## Top level, root targets not reached yet - pool: BackVidTab ## Upper links pool - base: BackVidTab ## Width-first leaf level links - -const - SubTreeSearchDepthMax = 64 + root: HashSet[VertexID] ## Top level, root targets + pool: Table[VertexID,VertexID] ## Upper links pool + base: Table[VertexID,VertexID] ## Width-first leaf level links + leaf: HashSet[VertexID] ## Stans-alone leaf to process + rev: Table[VertexID,HashSet[VertexID]] ## Reverse look up table logScope: topics = "aristo-hashify" @@ -88,200 +56,250 @@ logScope: # Private helpers # ------------------------------------------------------------------------------ -when false: - template logTxt(info: static[string]): static[string] = - "Hashify " & info +template logTxt(info: static[string]): static[string] = + "Hashify " & info -func getOrVoid(tab: BackVidTab; vid: VertexID): FollowUpVid = - tab.getOrDefault(vid, FollowUpVid()) - -func isValid(w: FollowUpVid): bool = - w.toVid.isValid +func getOrVoid(tab: Table[VertexID,VertexID]; vid: VertexID): VertexID = + tab.getOrDefault(vid, VertexID(0)) func contains(wff: WidthFirstForest; vid: VertexID): bool = - vid in wff.base or vid in wff.pool or vid in wff.root or vid in wff.completed + vid in wff.base or vid in wff.pool or vid in wff.root # ------------------------------------------------------------------------------ # Private functions # ------------------------------------------------------------------------------ -proc cloudConnect( - cloud: HashSet[VertexID]; # Vertex IDs to start connecting from - db: AristoDbRef; # Database, top layer - target: BackVidTab; # Vertices to arrive to - ): tuple[paths: WidthFirstForest, unresolved: HashSet[VertexID]] = - ## For each vertex ID from argument `cloud` find a chain of `FollowUpVid` - ## type links reaching into argument `target`. The `paths` entry from the - ## `result` tuple contains the connections to the `target` argument and the - ## `unresolved` entries the IDs left over from `cloud`. - if 0 < cloud.len: - result.unresolved = cloud - var hold = target - while 0 < hold.len: - # Greedily trace back `bottomUp[]` entries for finding parents of - # unresolved vertices from `cloud` - var redo: BackVidTab - for (vid,val) in hold.pairs: - let vtx = db.getVtx vid - if vtx.isValid: - result.paths.pool[vid] = val - # Grab child links - for sub in vtx.subVids: - let w = FollowUpVid( - root: val.root, - toVid: vid) - if sub notin cloud: - redo[sub] = w - else: - result.paths.base[sub] = w # ok, use this - result.unresolved.excl sub - if result.unresolved.len == 0: - return - redo.swap hold - - -proc setNextLink( - wff: var WidthFirstForest; # Search tree to update - redo: var BackVidTab; # Temporary `base` list - val: FollowUpVid; # Current vertex value to follow up - ) = - ## Given the follow up argument `vid`, update the `redo[]` argument (an - ## optional substitute for the `wff.base[]` list) so that the `redo[]` - ## list contains the next `from->to` vertex pair from the `wff.pool[]` - ## list. - ## - ## Unless the `redo` argument is passed as `wff.base`, this function - ## supports the following construct: +func hasValue( + wffTable: Table[VertexID,VertexID]; + vid: VertexID; + wff: WidthFirstForest; + ): bool = + ## Helper for efficient `value` access: ## :: - ## while 0 < wff.base.len: - ## var redo: BackVidTab - ## for (vid,val) in wff.base.pairs: - ## ... - ## wff.setNextLink(redo, val) - ## wff.base.swap redo + ## wffTable.hasValue(wff, vid) ## - ## Otherwise, one would use the function as in + ## instead of ## :: - ## wff.base.del vid - ## wff.setNextLink(wff.pool, val) + ## vid in wffTable.values.toSeq ## - # Get current `from->to` vertex pair - if val.isValid: - # Find follow up `from->to` vertex pair in `pool` - let nextVal = wff.pool.getOrVoid val.toVid - if nextVal.isValid: - - # Make sure that strict hierachial order is kept. If the successor - # is in the temporary `redo[]` base list, move it to the `pool[]`. - if nextVal.toVid in redo: - wff.pool[nextVal.toVid] = redo.getOrVoid nextVal.toVid - redo.del nextVal.toVid - - elif val.toVid in redo.values.toSeq.mapIt(it.toVid): - # The follow up vertex ID is already a follow up ID for some - # `from->to` vertex pair in the temporary `redo[]` base list. - return - - # Move next `from->to vertex` pair to `redo[]` - wff.pool.del val.toVid - redo[val.toVid] = nextVal + for w in wff.rev.getOrVoid vid: + if w in wffTable: + return true -proc updateSchedule( - wff: var WidthFirstForest; # Search tree to update +proc pedigree( db: AristoDbRef; # Database, top layer - hike: Hike; # Chain of vertices - ) = - ## Use vertices from the `hike` argument and link them leaf-to-root in a way - ## so so that they can be traversed later in a width-first search. + ancestors: HashSet[VertexID]; # Vertex IDs to start connecting from + proofs: HashSet[VertexID]; # Additional proof nodes to start from + ): Result[WidthFirstForest,(VertexID,AristoError)] = + ## For each vertex ID from the argument set `ancestors` find all un-labelled + ## grand child vertices and build a forest (of trees) starting from the + ## grand child vertices. ## - let - root = hike.root var - legInx = 0 # find index of first unresolved vertex - unresolved: seq[VertexID] # vtx links, reason for unresolved vertex - # Find the index `legInx` of the first vertex that could not be compiled as - # node all from the top layer cache keys. - block findlegInx: - # Directly set tail vertex key (typically a leaf vertex) - let - leaf = hike.legs[^1].wp - node = leaf.vtx.toNode(db, stopEarly=false, beKeyOk=false).valueOr: - # Oops, depends on unresolved storage trie? - legInx = hike.legs.len - 1 - unresolved = error - if leaf.vtx.vType == Leaf: - let stoRoot = unresolved.toSeq[0] - if stoRoot notin wff.base and - stoRoot notin wff.pool: - wff.root.incl stoRoot - wff.base[stoRoot] = FollowUpVid( - root: root, # Jump to main tree - toVid: leaf.vid) - break findlegInx + wff: WidthFirstForest + leafs: HashSet[VertexID] - # If possible, compute a node from the current vertex with all links - # resolved on the cache layer. If this is not possible, stop here and - # return the list of vertex IDs that could not be resolved (see option - # `stopEarly=false`.) - for n in (hike.legs.len-2).countDown(0): - let vtx = hike.legs[n].wp.vtx - discard vtx.toNode(db, stopEarly=false, beKeyOk=false).valueOr: - legInx = n - unresolved = error - break findlegInx - - # All done this `hike` - if db.layersGetKeyOrVoid(root).isValid: - wff.root.excl root - wff.completed.incl root + proc register(wff: var WidthFirstForest; fromVid, toVid: VertexID) = + if toVid in wff.base: + # * there is `toVid->*` in `base[]` + # * so ``toVid->*` moved to `pool[]` + wff.pool[toVid] = wff.base.getOrVoid toVid + wff.base.del toVid + if wff.base.hasValue(fromVid, wff): + # * there is `*->fromVid` in `base[]` + # * so store `fromVid->toVid` in `pool[]` + wff.pool[fromVid] = toVid else: - wff.root.incl root - return + # store `fromVid->toVid` in `base[]` + wff.base[fromVid] = toVid - # Unresolved root target to reach via width-first search - if root notin wff.completed: - wff.root.incl root + # Register reverse pair for quick table value lookup + wff.rev.withValue(toVid, val): + val[].incl fromVid + do: + wff.rev[toVid] = @[fromVid].toHashSet - # Current situation: + # Remove unnecessarey sup-trie roots (e.g. for a storage root) + wff.root.excl fromVid + + # Initialise greedy search which will keep a set of current leafs in the + # `leafs{}` set and follow up links in the `pool[]` table, leading all the + # way up to the `root{}` set. # - # ..unresolved hash keys.. | ..all set here.. - # | - # | - # hike.legs: (leg[0], leg[1], ..leg[legInx], ..) - # | | | | - # | <---- | <----- | +-------+---- \ - # | | | | - # | wff.pool[] | +---- | vertices from the - # : | `unresoved` set - # | - # +---- / + # Process root nodes if they are unlabelled + var rootWasDeleted = VertexID(0) + for root in ancestors: + let vtx = db.getVtx root + if vtx.isNil: + if VertexID(LEAST_FREE_VID) <= root: + # There must be a another root, as well (e.g. `$1` for a storage + # root). Only the last one of some will be reported with error code. + rootWasDeleted = root + elif not db.getKey(root).isValid: + # Need to process `root` node + let children = vtx.subVids + if children.len == 0: + # This is an isolated leaf node + wff.leaf.incl root + else: + wff.root.incl root + for child in vtx.subVids: + if not db.getKey(child).isValid: + leafs.incl child + wff.register(child, root) + if rootWasDeleted.isValid and + wff.root.len == 0 and + wff.leaf.len == 0: + return err((rootWasDeleted,HashifyRootVtxUnresolved)) - # Add unresolved nodes for top level links - for u in 1 .. legInx: - let vid = hike.legs[u].wp.vid - # Make sure that `base[]` and `pool[]` are disjunkt, possibly moving - # `base[]` entries to the `pool[]`. - wff.base.del vid - wff.pool[vid] = FollowUpVid( - root: root, - toVid: hike.legs[u-1].wp.vid) + # Initialisation for `proof` nodes which are sort of similar to `root` nodes. + for proof in proofs: + let vtx = db.getVtx proof + if vtx.isNil or not db.getKey(proof).isValid: + return err((proof,HashifyVtxUnresolved)) + let children = vtx.subVids + if 0 < children.len: + # To be treated as a root node + wff.root.incl proof + for child in vtx.subVids: + if not db.getKey(child).isValid: + leafs.incl child + wff.register(child, proof) - # These ones have been resolved, already - for u in legInx+1 ..< hike.legs.len: - let vid = hike.legs[u].wp.vid - wff.pool.del vid - wff.base.del vid + # Recursively step down and collect unlabelled vertices + while 0 < leafs.len: + var redo: typeof(leafs) - assert 0 < unresolved.len # debugging, only - let vid = hike.legs[legInx].wp.vid - for sub in unresolved: - # Update request for unresolved sub-links by adding a new tail - # entry (unless registered, already.) - if sub notin wff: - wff.base[sub] = FollowUpVid( - root: root, - toVid: vid) + for parent in leafs: + assert parent.isValid + assert not db.getKey(parent).isValid + + let vtx = db.getVtx parent + if not vtx.isNil: + let children = vtx.subVids.filterIt(not db.getKey(it).isValid) + if 0 < children.len: + for child in children: + redo.incl child + wff.register(child, parent) + continue + + if parent notin wff.base: + # The buck stops here: + # move `(parent,granny)` from `pool[]` to `base[]` + let granny = wff.pool.getOrVoid parent + assert granny.isValid + wff.register(parent, granny) + wff.pool.del parent + + redo.swap leafs + + ok wff + +# ------------------------------------------------------------------------------ +# Private functions, tree traversal +# ------------------------------------------------------------------------------ + +proc createSched( + db: AristoDbRef; # Database, top layer + ): Result[WidthFirstForest,(VertexID,AristoError)] = + ## Create width-first search schedule (aka forest) + ## + var wff = ? db.pedigree(db.dirty, db.pPrf) + + if 0 < wff.leaf.len: + for vid in wff.leaf: + let node = db.getVtx(vid).toNode(db, beKeyOk=false).valueOr: + # Make sure that all those nodes are reachable + for needed in error: + if needed notin wff.base and + needed notin wff.pool: + return err((needed,HashifyVtxUnresolved)) + continue + db.layersPutKey(VertexID(1), vid, node.digestTo(HashKey)) + + ok wff + + +proc processSched( + wff: var WidthFirstForest; # Search tree to process + db: AristoDbRef; # Database, top layer + ): Result[void,(VertexID,AristoError)] = + ## Traverse width-first schedule and update vertex hash labels. + ## + while 0 < wff.base.len: + var + accept = false + redo: typeof(wff.base) + + for (vid,toVid) in wff.base.pairs: + let vtx = db.getVtx vid + assert vtx.isValid + + # Try to convert the vertex to a node. This is possible only if all + # link references have Merkle hash keys, already. + let node = vtx.toNode(db, stopEarly=false).valueOr: + # Do this vertex later, again + if wff.pool.hasValue(vid, wff): + wff.pool[vid] = toVid + accept = true # `redo[]` will be fifferent from `base[]` + else: + redo[vid] = toVid + continue + # End `valueOr` terminates error clause + + # Could resolve => update Merkle hash + db.layersPutKey(VertexID(1), vid, node.digestTo HashKey) + + # Set follow up link for next round + let toToVid = wff.pool.getOrVoid toVid + if toToVid.isValid: + if toToVid in redo: + # Got predecessor `(toVid,toToVid)` of `(toToVid,xxx)`, + # so move `(toToVid,xxx)` from `redo[]` to `pool[]` + wff.pool[toToVid] = redo.getOrVoid toToVid + redo.del toToVid + # Move `(toVid,toToVid)` from `pool[]` to `redo[]` + wff.pool.del toVid + redo[toVid] = toToVid + + accept = true # `redo[]` will be fifferent from `base[]` + # End `for (vid,toVid)..` + + # Make sure that `base[]` is different from `redo[]` + if not accept: + let vid = wff.base.keys.toSeq[0] + return err((vid,HashifyVtxUnresolved)) + # Restart `wff.base[]` + wff.base.swap redo + + ok() + + +proc finaliseRoots( + wff: var WidthFirstForest; # Search tree to process + db: AristoDbRef; # Database, top layer + ): Result[void,(VertexID,AristoError)] = + ## Process root vertices after all other vertices are done. + ## + # Make sure that the pool has been exhausted + if 0 < wff.pool.len: + let vid = wff.pool.keys.toSeq.sorted[0] + return err((vid,HashifyVtxUnresolved)) + + # Update or verify root nodes + for vid in wff.root: + # Calculate hash key + let + node = db.getVtx(vid).toNode(db).valueOr: + return err((vid,HashifyRootVtxUnresolved)) + key = node.digestTo(HashKey) + if vid notin db.pPrf: + db.layersPutKey(VertexID(1), vid, key) + elif key != db.getKey vid: + return err((vid,HashifyProofHashMismatch)) + + ok() # ------------------------------------------------------------------------------ # Public functions @@ -289,132 +307,24 @@ proc updateSchedule( proc hashify*( db: AristoDbRef; # Database, top layer - ): Result[HashSet[VertexID],(VertexID,AristoError)] = + ): Result[void,(VertexID,AristoError)] = ## Add keys to the `Patricia Trie` so that it becomes a `Merkle Patricia ## Tree`. If successful, the function returns the keys (aka Merkle hash) of ## the root vertices. - var - deleted = false # Need extra check for orphaned vertices - wff: WidthFirstForest # Leaf-to-root traversal structure + ## + if 0 < db.dirty.len: + # Set up widh-first traversal schedule + var wff = ? db.createSched() - if not db.dirty: - return ok wff.completed + # Traverse tree spanned by `wff` and label remaining vertices. + ? wff.processSched db - for (lky,lfVid) in db.lTab.pairs: - let - rc = lky.hikeUp db - hike = rc.to(Hike) + # Do/complete state root vertices + ? wff.finaliseRoots db - if not lfVid.isValid: - # Remember that there are left overs from a delete proedure which have - # to be eventually found before starting width-first processing. - deleted = true + db.top.final.dirty.clear # Mark top layer clean - if hike.legs.len == 0: - # Ignore left over path from deleted entry. - if not lfVid.isValid: - # FIXME: Is there a case for adding unresolved child-to-root links - # to the `wff` schedule? - continue - doAssert rc.isErr # see implementation of `hikeUp()` - return err((lfVid,rc.error[1])) - - # Compile width-first forest search schedule - wff.updateSchedule(db, hike) - - if deleted: - # Update unresolved keys left over after delete operations when overlay - # vertices have been added and there was no `hike` path to capture them. - # - # Considering a list of updated paths to these vertices after deleting - # a `Leaf` vertex is deemed too expensive and more error prone. So it - # is the task to search for unresolved node keys and add glue paths to - # the width-first schedule. - var unresolved: HashSet[VertexID] - for (vid,key) in db.layersWalkKey: - if not key.isValid and - vid notin wff: - let rc = db.layersGetVtx vid - if rc.isErr or rc.value.isValid: - unresolved.incl vid - - let glue = unresolved.cloudConnect(db, wff.base) - if 0 < glue.unresolved.len: - return err((glue.unresolved.toSeq[0],HashifyNodeUnresolved)) - # Add glue items to `wff.base[]` and `wff.pool[]` tables - for (vid,val) in glue.paths.base.pairs: - # Add vid to `wff.base[]` list - wff.base[vid] = val - # Move tail of VertexID chain to `wff.pool[]` - var toVid = val.toVid - while true: - let w = glue.paths.pool.getOrVoid toVid - if not w.isValid: - break - wff.base.del toVid - wff.pool[toVid] = w - toVid = w.toVid - - # Traverse width-first schedule and update remaining hashes. - while 0 < wff.base.len: - var redo: BackVidTab - for (vid,val) in wff.base.pairs: - - let vtx = db.getVtx vid - if not vtx.isValid: - # This might happen when proof nodes (see `snap` protocol) are on - # an incomplete trie where this `vid` has a key but no vertex yet. - # Also, the key (as part of the proof data) must be on the backend. - discard db.getKeyBE(vid).valueOr: - return err((vid,HashifyNodeUnresolved)) - else: - # Try to convert the vertex to a node. This is possible only if all - # link references have Merkle hash keys, already. - let node = vtx.toNode(db, stopEarly=false).valueOr: - # Cannot complete this vertex unless its child node keys are compiled. - for w in error: - if w notin wff.base and - w notin redo and - w notin wff.base.values.toSeq.mapit(it.toVid) and - w notin wff.pool.values.toSeq.mapit(it.toVid): - if db.layersGetVtx(w).isErr: - # Ooops, should have been marked for update - return err((w,HashifyNodeUnresolved)) - # Add the child vertex to `redo[]` for the schedule `base[]` list. - redo[w] = FollowUpVid(root: val.root, toVid: vid) - # Do this vertex later, i.e. add the vertex to the `pool[]`. - wff.pool[vid] = val - continue - # End `valueOr` terminates error clause - - # Could resolve => update Merkle hash - db.layersPutKey(vid, node.digestTo HashKey) - - # Set follow up link for next round - wff.setNextLink(redo, val) - # End `for (vid,val)..` - - # Restart `wff.base[]` - wff.base.swap redo - - # Make sure that all keys exist (actually, that set should be empty anyway) - for vid in wff.pool.keys: - discard db.getKeyRc(vid).valueOr: - return err((vid,HashifyNodeUnresolved)) - - # Update root nodes - for vid in wff.root - db.pPrf: - # Convert root vertex to a node. - let node = db.getVtx(vid).toNode(db,stopEarly=false).valueOr: - return err((vid,HashifyRootNodeUnresolved)) - db.layersPutKey(vid, node.digestTo(HashKey)) - wff.completed.incl vid - - db.top.final.dirty = false # Mark top layer clean - db.top.final.lTab.clear # Done with leafs - db.top.final.vGen = db.vGen.vidReorg() # Squeze list of recycled vertex IDs - - ok wff.completed + ok() # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/aristo/aristo_layers.nim b/nimbus/db/aristo/aristo_layers.nim index 4232a261b..1e91e2c92 100644 --- a/nimbus/db/aristo/aristo_layers.nim +++ b/nimbus/db/aristo/aristo_layers.nim @@ -25,28 +25,12 @@ func dup(sTab: Table[VertexID,VertexRef]): Table[VertexID,VertexRef] = for (k,v) in sTab.pairs: result[k] = v.dup -func getLebalOrVoid(stack: seq[LayerRef]; key: HashKey): HashSet[VertexID] = - # Helper: get next set of vertex IDs from stack. - for w in stack.reversed: - w.delta.pAmk.withValue(key,value): - return value[] - -proc recalcLebal(layer: var LayerObj) = - ## Calculate reverse `kMap[]` for final (aka zero) layer - layer.delta.pAmk.clear - for (vid,key) in layer.delta.kMap.pairs: - if key.isValid: - layer.delta.pAmk.withValue(key, value): - value[].incl vid - do: - layer.delta.pAmk[key] = @[vid].toHashSet - # ------------------------------------------------------------------------------ # Public getters: lazy value lookup for read only versions # ------------------------------------------------------------------------------ -func lTab*(db: AristoDbRef): Table[LeafTie,VertexID] = - db.top.final.lTab +func dirty*(db: AristoDbRef): HashSet[VertexID] = + db.top.final.dirty func pPrf*(db: AristoDbRef): HashSet[VertexID] = db.top.final.pPrf @@ -54,37 +38,28 @@ func pPrf*(db: AristoDbRef): HashSet[VertexID] = func vGen*(db: AristoDbRef): seq[VertexID] = db.top.final.vGen -func dirty*(db: AristoDbRef): bool = - db.top.final.dirty - # ------------------------------------------------------------------------------ # Public getters/helpers # ------------------------------------------------------------------------------ func nLayersVtx*(db: AristoDbRef): int = - ## Number of vertex ID/vertex entries on the cache layers. This is an upper bound - ## for the number of effective vertex ID mappings held on the cache layers as - ## there might be duplicate entries for the same vertex ID on different layers. + ## Number of vertex ID/vertex entries on the cache layers. This is an upper + ## bound for the number of effective vertex ID mappings held on the cache + ## layers as there might be duplicate entries for the same vertex ID on + ## different layers. ## db.stack.mapIt(it.delta.sTab.len).foldl(a + b, db.top.delta.sTab.len) func nLayersKey*(db: AristoDbRef): int = - ## Number of vertex ID/key entries on the cache layers. This is an upper bound - ## for the number of effective vertex ID mappingss held on the cache layers as - ## there might be duplicate entries for the same vertex ID on different layers. + ## Number of vertex ID/key entries on the cache layers. This is an upper + ## bound for the number of effective vertex ID mappingss held on the cache + ## layers as there might be duplicate entries for the same vertex ID on + ## different layers. ## db.stack.mapIt(it.delta.kMap.len).foldl(a + b, db.top.delta.kMap.len) -func nLayersYek*(db: AristoDbRef): int = - ## Number of key/vertex IDs reverse lookup entries on the cache layers. This - ## is an upper bound for the number of effective key mappingss held on the - ## cache layers as there might be duplicate entries for the same key on - ## different layers. - ## - db.stack.mapIt(it.delta.pAmk.len).foldl(a + b, db.top.delta.pAmk.len) - # ------------------------------------------------------------------------------ -# Public functions: get variants +# Public functions: getter variants # ------------------------------------------------------------------------------ proc layersGetVtx*(db: AristoDbRef; vid: VertexID): Result[VertexRef,void] = @@ -126,84 +101,85 @@ proc layersGetKeyOrVoid*(db: AristoDbRef; vid: VertexID): HashKey = db.layersGetKey(vid).valueOr: VOID_HASH_KEY -proc layersGetYek*( +proc layerGetProofKeyOrVoid*(db: AristoDbRef; vid: VertexID): HashKey = + ## Get the hash key of a proof node if it was registered as such. + if vid in db.top.final.pPrf: + db.top.delta.kMap.getOrVoid vid + else: + VOID_HASH_KEY + +proc layerGetProofVidOrVoid*(db: AristoDbRef; key: HashKey): VertexID = + ## Reverse look up for a registered proof node or a link key for such a + ## node. The vertex for a returned vertex ID might not exist if the + ## argument `key` refers to a link key of a registered proof node. + db.top.final.fRpp.getOrVoid key + +# ------------------------------------------------------------------------------ +# Public functions: setter variants +# ------------------------------------------------------------------------------ + +proc layersPutVtx*( db: AristoDbRef; - key: HashKey; - ): Result[HashSet[VertexID],void] = - ## Inverse of `layersGetKey()`. For a given argumnt `key`, finds all vertex IDs - ## that have `layersGetKey()` return this very `key` value for the argument - ## vertex IDs. - if db.top.delta.pAmk.hasKey key: - return ok(db.top.delta.pAmk.getOrVoid key) - - for w in db.stack.reversed: - if w.delta.pAmk.hasKey key: - return ok(w.delta.pAmk.getOrVoid key) - - err() - -proc layersGetYekOrVoid*(db: AristoDbRef; key: HashKey): HashSet[VertexID] = - ## Simplified version of `layersGetVidsOrVoid()` - db.layersGetYek(key).valueOr: EmptyVidSet - -# ------------------------------------------------------------------------------ -# Public functions: put variants -# ------------------------------------------------------------------------------ - -proc layersPutVtx*(db: AristoDbRef; vid: VertexID; vtx: VertexRef) = + root: VertexID; + vid: VertexID; + vtx: VertexRef; + ) = ## Store a (potentally empty) vertex on the top layer db.top.delta.sTab[vid] = vtx - db.top.final.dirty = true # Modified top cache layers + db.top.final.dirty.incl root -proc layersResVtx*(db: AristoDbRef; vid: VertexID) = +proc layersResVtx*( + db: AristoDbRef; + root: VertexID; + vid: VertexID; + ) = ## Shortcut for `db.layersPutVtx(vid, VertexRef(nil))`. It is sort of the ## equivalent of a delete function. - db.layersPutVtx(vid, VertexRef(nil)) + db.layersPutVtx(root, vid, VertexRef(nil)) -proc layersPutKey*(db: AristoDbRef; vid: VertexID; key: HashKey) = +proc layersPutKey*( + db: AristoDbRef; + root: VertexID; + vid: VertexID; + key: HashKey; + ) = ## Store a (potentally void) hash key on the top layer - - # Get previous key - let prvKey = db.top.delta.kMap.getOrVoid vid - - # Update key on `kMap:key->vid` mapping table db.top.delta.kMap[vid] = key - db.top.final.dirty = true # Modified top cache layers - - # Clear previous value on reverse table if it has changed - if prvKey.isValid and prvKey != key: - var vidsLen = -1 - db.top.delta.pAmk.withValue(prvKey, value): - value[].excl vid - vidsLen = value[].len - do: # provide empty lookup - let vids = db.stack.getLebalOrVoid(prvKey) - if vids.isValid and vid in vids: - # This entry supersedes non-emtpty changed ones from lower levels - db.top.delta.pAmk[prvKey] = vids - @[vid].toHashSet - if vidsLen == 0 and not db.stack.getLebalOrVoid(prvKey).isValid: - # There is no non-emtpty entry on lower levels, so ledete this one - db.top.delta.pAmk.del prvKey - - # Add updated value on reverse table if non-zero - if key.isValid: - db.top.delta.pAmk.withValue(key, value): - value[].incl vid - do: # else if not found: need to merge with value set from lower layer - db.top.delta.pAmk[key] = db.stack.getLebalOrVoid(key) + @[vid].toHashSet + db.top.final.dirty.incl root # Modified top cache layers => hashify -proc layersResKey*(db: AristoDbRef; vid: VertexID) = +proc layersResKey*(db: AristoDbRef; root: VertexID; vid: VertexID) = ## Shortcut for `db.layersPutKey(vid, VOID_HASH_KEY)`. It is sort of the ## equivalent of a delete function. - db.layersPutKey(vid, VOID_HASH_KEY) + db.layersPutKey(root, vid, VOID_HASH_KEY) + + +proc layersPutProof*(db: AristoDbRef; vid: VertexID; key: HashKey) = + ## Register a link key of a proof node. + let lKey = db.layersGetKeyOrVoid vid + if not lKey.isValid or lKey != key: + db.top.delta.kMap[vid] = key + db.top.final.fRpp[key] = vid + +proc layersPutProof*( + db: AristoDbRef; + vid: VertexID; + key: HashKey; + vtx: VertexRef; + ) = + ## Register a full proof node (not only a link key.) + let lVtx = db.layersGetVtxOrVoid vid + if not lVtx.isValid or lVtx != vtx: + db.top.delta.sTab[vid] = vtx + db.top.final.pPrf.incl vid + db.layersPutProof(vid, key) # ------------------------------------------------------------------------------ # Public functions # ------------------------------------------------------------------------------ -proc layersMergeOnto*(src: LayerRef; trg: var LayerObj; stack: seq[LayerRef]) = +proc layersMergeOnto*(src: LayerRef; trg: var LayerObj) = ## Merges the argument `src` into the argument `trg` and returns `trg`. For ## the result layer, the `txUid` value set to `0`. ## @@ -215,16 +191,6 @@ proc layersMergeOnto*(src: LayerRef; trg: var LayerObj; stack: seq[LayerRef]) = for (vid,key) in src.delta.kMap.pairs: trg.delta.kMap[vid] = key - if stack.len == 0: - # Re-calculate `pAmk[]` - trg.recalcLebal() - else: - # Merge reverse `kMap[]` layers. Empty key set images are ignored unless - # they supersede non-empty values on the argument `stack[]`. - for (key,vids) in src.delta.pAmk.pairs: - if 0 < vids.len or stack.getLebalOrVoid(key).isValid: - trg.delta.pAmk[key] = vids - func layersCc*(db: AristoDbRef; level = high(int)): LayerRef = ## Provide a collapsed copy of layers up to a particular transaction level. @@ -248,9 +214,6 @@ func layersCc*(db: AristoDbRef; level = high(int)): LayerRef = for (vid,key) in layers[n].delta.kMap.pairs: result.delta.kMap[vid] = key - # Re-calculate `pAmk[]` - result[].recalcLebal() - # ------------------------------------------------------------------------------ # Public iterators # ------------------------------------------------------------------------------ @@ -301,22 +264,6 @@ iterator layersWalkKey*( yield (vid,key) seen.incl vid - -iterator layersWalkYek*( - db: AristoDbRef; - ): tuple[key: HashKey, vids: HashSet[VertexID]] = - ## Walk over `(HashKey,HashSet[VertexID])` pairs. - var seen: HashSet[HashKey] - for (key,vids) in db.top.delta.pAmk.pairs: - yield (key,vids) - seen.incl key - - for w in db.stack.reversed: - for (key,vids) in w.delta.pAmk.pairs: - if key notin seen: - yield (key,vids) - seen.incl key - # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_merge.nim b/nimbus/db/aristo/aristo_merge.nim index 619b2bb4f..fb1a73823 100644 --- a/nimbus/db/aristo/aristo_merge.nim +++ b/nimbus/db/aristo/aristo_merge.nim @@ -31,8 +31,8 @@ import results, stew/keyed_queue, ../../sync/protocol/snap/snap_types, - "."/[aristo_desc, aristo_get, aristo_hike, aristo_layers, aristo_path, - aristo_serialise, aristo_utils, aristo_vid] + "."/[aristo_desc, aristo_get, aristo_hike, aristo_layers, + aristo_path, aristo_serialise, aristo_utils, aristo_vid] logScope: topics = "aristo-merge" @@ -76,28 +76,22 @@ proc to( # ----------- -proc nullifyKey( - db: AristoDbRef; # Database, top layer - vid: VertexID; # Vertex IDs to clear - ) = - # Register for void hash (to be recompiled) - db.layersResKey vid - proc clearMerkleKeys( db: AristoDbRef; # Database, top layer hike: Hike; # Implied vertex IDs to clear hashes for vid: VertexID; # Additionall vertex IDs to clear ) = for w in hike.legs.mapIt(it.wp.vid) & @[vid]: - db.nullifyKey w + db.layersResKey(hike.root, w) proc setVtxAndKey( db: AristoDbRef; # Database, top layer + root: VertexID; vid: VertexID; # Vertex IDs to add/clear vtx: VertexRef; # Vertex to add ) = - db.layersPutVtx(vid, vtx) - db.layersResKey vid + db.layersPutVtx(root, vid, vtx) + db.layersResKey(root, vid) # ----------- @@ -168,8 +162,7 @@ proc insertBranch( local = db.vidFetch(pristine = true) lty = LeafTie(root: hike.root, path: rc.value) - db.top.final.lTab[lty] = local # update leaf path lookup cache - db.setVtxAndKey(local, linkVtx) + db.setVtxAndKey(hike.root, local, linkVtx) linkVtx.lPfx = linkVtx.lPfx.slice(1+n) forkVtx.bVid[linkInx] = local @@ -179,7 +172,7 @@ proc insertBranch( else: let local = db.vidFetch - db.setVtxAndKey(local, linkVtx) + db.setVtxAndKey(hike.root, local, linkVtx) linkVtx.ePfx = linkVtx.ePfx.slice(1+n) forkVtx.bVid[linkInx] = local @@ -191,7 +184,7 @@ proc insertBranch( vType: Leaf, lPfx: hike.tail.slice(1+n), lData: payload) - db.setVtxAndKey(local, leafLeg.wp.vtx) + db.setVtxAndKey(hike.root, local, leafLeg.wp.vtx) # Update branch leg, ready to append more legs var okHike = Hike(root: hike.root, legs: hike.legs) @@ -203,7 +196,7 @@ proc insertBranch( ePfx: hike.tail.slice(0,n), eVid: db.vidFetch) - db.setVtxAndKey(linkID, extVtx) + db.setVtxAndKey(hike.root, linkID, extVtx) okHike.legs.add Leg( nibble: -1, @@ -211,7 +204,7 @@ proc insertBranch( vid: linkID, vtx: extVtx)) - db.setVtxAndKey(extVtx.eVid, forkVtx) + db.setVtxAndKey(hike.root, extVtx.eVid, forkVtx) okHike.legs.add Leg( nibble: leafInx.int8, wp: VidVtxPair( @@ -219,7 +212,7 @@ proc insertBranch( vtx: forkVtx)) else: - db.setVtxAndKey(linkID, forkVtx) + db.setVtxAndKey(hike.root, linkID, forkVtx) okHike.legs.add Leg( nibble: leafInx.int8, wp: VidVtxPair( @@ -265,8 +258,8 @@ proc concatBranchAndLeaf( lPfx: hike.tail.slice(1), lData: payload) brVtx.bVid[nibble] = vid - db.setVtxAndKey(brVid, brVtx) - db.setVtxAndKey(vid, vtx) + db.setVtxAndKey(hike.root, brVid, brVtx) + db.setVtxAndKey(hike.root, vid, vtx) okHike.legs.add Leg(wp: VidVtxPair(vtx: vtx, vid: vid), nibble: -1) ok okHike @@ -291,6 +284,7 @@ proc topIsBranchAddLeaf( return err(MergeBranchGarbledNibble) let + parent = hike.legs[^1].wp.vid branch = hike.legs[^1].wp.vtx linkID = branch.bVid[nibble] linkVtx = db.getVtx linkID @@ -311,9 +305,11 @@ proc topIsBranchAddLeaf( vType: Leaf, lPfx: hike.tail, lData: payload) - db.setVtxAndKey(linkID, vtx) + db.setVtxAndKey(hike.root, linkID, vtx) var okHike = Hike(root: hike.root, legs: hike.legs) okHike.legs.add Leg(wp: VidVtxPair(vid: linkID, vtx: vtx), nibble: -1) + if parent notin db.pPrf: + db.layersResKey(hike.root, parent) return ok(okHike) if linkVtx.vType == Branch: @@ -357,7 +353,7 @@ proc topIsExtAddLeaf( vType: Leaf, lPfx: extVtx.ePfx & hike.tail, lData: payload) - db.setVtxAndKey(extVid, vtx) + db.setVtxAndKey(hike.root, extVid, vtx) okHike.legs[^1].wp.vtx = vtx elif brVtx.vType != Branch: @@ -390,8 +386,8 @@ proc topIsExtAddLeaf( lPfx: hike.tail.slice(1), lData: payload) brVtx.bVid[nibble] = vid - db.setVtxAndKey(brVid, brVtx) - db.setVtxAndKey(vid, vtx) + db.setVtxAndKey(hike.root, brVid, brVtx) + db.setVtxAndKey(hike.root, vid, vtx) okHike.legs.add Leg(wp: VidVtxPair(vtx: brVtx, vid: brVid), nibble: nibble) okHike.legs.add Leg(wp: VidVtxPair(vtx: vtx, vid: vid), nibble: -1) @@ -424,8 +420,8 @@ proc topIsEmptyAddLeaf( lPfx: hike.tail.slice(1), lData: payload) rootVtx.bVid[nibble] = leafVid - db.setVtxAndKey(hike.root, rootVtx) - db.setVtxAndKey(leafVid, leafVtx) + db.setVtxAndKey(hike.root, hike.root, rootVtx) + db.setVtxAndKey(hike.root, leafVid, leafVtx) return ok Hike( root: hike.root, legs: @[Leg(wp: VidVtxPair(vtx: rootVtx, vid: hike.root), nibble: nibble), @@ -457,8 +453,7 @@ proc updatePayload( hike.legs[^1].wp.vtx = vtx # Modify top level cache - db.setVtxAndKey(vid, vtx) - db.top.final.lTab[leafTie] = vid + db.setVtxAndKey(hike.root, vid, vtx) db.clearMerkleKeys(hike, vid) ok hike @@ -487,73 +482,38 @@ proc mergeNodeImpl( ## allocated, already. If the node comes straight from the `decode()` RLP ## decoder as expected, these vertex IDs will be all zero. ## - ## This function expects that the parent for the argument node has already - ## been installed, i.e. the top layer cache mapping + ## This function expects that the parent for the argument `node` has already + ## been installed. ## - ## pAmk: {HashKey} -> {{VertexID}} - ## - ## has a result for the argument `node`. Also, the invers top layer cache - ## mapping - ## - ## sTab: {VertexID} -> {VertexRef} - ## - ## has no result for all images of the argument `node` under `pAmk`: + ## Caveat: + ## Proof of concept, not in production yet. ## # Check for error after RLP decoding doAssert node.error == AristoError(0) + + # Verify arguments if not rootVid.isValid: return err(MergeRootKeyInvalid) - - # Verify `hashKey` if not hashKey.isValid: return err(MergeHashKeyInvalid) - # Make sure that the `vid<->hashLbl` reverse mapping has been cached, - # already. This is provided for if the `nodes` are processed in the right - # order `root->.. ->leaf`. - let - vids = db.layersGetYekOrVoid(hashKey).toSeq - isRoot = rootVid in vids - if vids.len == 0: + # Make sure that the `vid<->key` reverse mapping is updated. + let vid = db.layerGetProofVidOrVoid hashKey + if not vid.isValid: return err(MergeRevVidMustHaveBeenCached) - if isRoot and 1 < vids.len: - # There can only be one root. - return err(MergeHashKeyRevLookUpGarbled) - # Use the first vertex ID from the `vis` list as representant for all others - let key = db.layersGetKeyOrVoid vids[0] - if key == hashKey: - if db.layersGetVtx(vids[0]).isOk: - for n in 1 ..< vids.len: - if db.layersGetVtx(vids[n]).isErr: - return err(MergeHashKeyRevLookUpGarbled) - # This is tyically considered OK - return err(MergeHashKeyCachedAlready) - # Otherwise proceed - elif key.isValid: - # Different key assigned => error + # Use the vertex ID `vid` to be populated by the argument root node + let key = db.layersGetKeyOrVoid vid + if key.isValid and key != hashKey: return err(MergeHashKeyDiffersFromCached) - # While the vertex referred to by `vids[0]` does not exists in the top layer - # cache it may well be in some lower layers or the backend. This typically - # happens for the root node. - var (vtx, hasVtx) = block: - let vty = db.getVtx vids[0] + # Set up vertex. + let (vtx, newVtxFromNode) = block: + let vty = db.getVtx vid if vty.isValid: - (vty, true) + (vty, false) else: - (node.to(VertexRef), false) - - # Verify that all `vids` entries are similar - for n in 1 ..< vids.len: - let w = vids[n] - if key != db.layersGetKeyOrVoid(w) or db.layersGetVtx(w).isOk: - return err(MergeHashKeyRevLookUpGarbled) - if not hasVtx: - # Prefer existing node which has all links available, already. - let u = db.getVtx w - if u.isValid: - (vtx, hasVtx) = (u, true) + (node.to(VertexRef), true) # The `vertexID <-> hashKey` mappings need to be set up now (if any) case node.vType: @@ -562,30 +522,27 @@ proc mergeNodeImpl( of Extension: if node.key[0].isValid: let eKey = node.key[0] - if not hasVtx: + if newVtxFromNode: # Brand new reverse lookup link for this vertex vtx.eVid = db.vidFetch - db.layersPutKey(vtx.eVid, eKey) + db.layersPutProof(vtx.eVid, eKey) elif not vtx.eVid.isValid: return err(MergeNodeVtxDiffersFromExisting) - db.layersPutKey(vtx.eVid, eKey) + db.layersPutProof(vtx.eVid, eKey) of Branch: for n in 0..15: if node.key[n].isValid: let bKey = node.key[n] - if not hasVtx: + if newVtxFromNode: # Brand new reverse lookup link for this vertex vtx.bVid[n] = db.vidFetch - db.layersPutKey(vtx.bVid[n], bKey) + db.layersPutProof(vtx.bVid[n], bKey) elif not vtx.bVid[n].isValid: return err(MergeNodeVtxDiffersFromExisting) - db.layersPutKey(vtx.bVid[n], bKey) - - for w in vids: - db.top.final.pPrf.incl w - if not hasVtx or db.getKey(w) != hashKey: - db.layersPutVtx(w, vtx.dup) + db.layersPutProof(vtx.bVid[n], bKey) + # Store and lock vertex + db.layersPutProof(vid, key, vtx) ok() # ------------------------------------------------------------------------------ @@ -611,14 +568,6 @@ proc merge*( ## of type `AccountData`, its `storageID` field must be unset or equal to the ## `payload.root` vertex ID. ## - # Check whether the leaf is on the database and payloads match - block: - let vid = db.lTab.getOrVoid leafTie - if vid.isValid: - let vtx = db.getVtx vid - if vtx.isValid and vtx.lData == payload: - return err(MergeLeafPathCachedAlready) - if LEAST_FREE_VID <= leafTie.root.distinctBase: ? db.registerAccount(leafTie.root, accPath) elif not leafTie.root.isValid: @@ -651,7 +600,7 @@ proc merge*( vType: Leaf, lPfx: leafTie.path.to(NibblesSeq), lData: payload)) - db.setVtxAndKey(wp.vid, wp.vtx) + db.setVtxAndKey(hike.root, wp.vid, wp.vtx) okHike = Hike(root: wp.vid, legs: @[Leg(wp: wp, nibble: -1)]) # Double check the result until the code is more reliable @@ -660,9 +609,6 @@ proc merge*( if rc.isErr or rc.value != leafTie.path: return err(MergeAssemblyFailed) # Ooops - # Update leaf acccess cache - db.top.final.lTab[leafTie] = okHike.legs[^1].wp.vid - ok okHike @@ -737,12 +683,15 @@ proc merge*( db: AristoDbRef; # Database, top layer proof: openArray[SnapProof]; # RLP encoded node records rootVid: VertexID; # Current sub-trie - ): tuple[merged: int, dups: int, error: AristoError] + ): Result[int, AristoError] {.gcsafe, raises: [RlpError].} = ## The function merges the argument `proof` list of RLP encoded node records ## into the `Aristo Trie` database. This function is intended to be used with ## the proof nodes as returened by `snap/1` messages. ## + ## Caveat: + ## Proof of concept, not in production yet. + ## proc update( seen: var Table[HashKey,NodeRef]; todo: var KeyedQueueNV[NodeRef]; @@ -757,10 +706,13 @@ proc merge*( seen[lid] = node if not rootVid.isValid: - return (0,0,MergeRootVidInvalid) + return err(MergeRootVidInvalid) let rootKey = db.getKey rootVid if not rootKey.isValid: - return (0,0,MergeRootKeyInvalid) + return err(MergeRootKeyInvalid) + # Make sure that the reverse lookup for the root vertex key is available. + if not db.layerGetProofVidOrVoid(rootKey).isValid: + return err(MergeProofInitMissing) # Expand and collect hash keys and nodes var nodeTab: Table[HashKey,NodeRef] @@ -769,7 +721,7 @@ proc merge*( key = w.Blob.digestTo(HashKey) node = rlp.decode(w.Blob,NodeRef) if node.error != AristoError(0): - return (0,0,node.error) + return err(node.error) nodeTab[key] = node # Check for embedded nodes, i.e. fully encoded node instead of a hash @@ -823,81 +775,80 @@ proc merge*( if 0 < chain.len and chain[^1] == rootKey: chains.add chain - # Make sure that the reverse lookup for the root vertex key is available. - block: - let vids = db.layersGetYekOrVoid rootKey - if not vids.isValid: - db.layersPutKey(rootVid, rootKey) - # Process over chains in reverse mode starting with the root node. This # allows the algorithm to find existing nodes on the backend. var seen: HashSet[HashKey] - (merged, dups) = (0, 0) + merged = 0 # Process the root ID which is common to all chains for chain in chains: for key in chain.reversed: if key notin seen: seen.incl key - let rc = db.mergeNodeImpl(key, nodeTab.getOrVoid key, rootVid) - if rc.isOK: - merged.inc - elif rc.error == MergeHashKeyCachedAlready: - dups.inc - else: - return (merged, dups, rc.error) + db.mergeNodeImpl(key, nodeTab.getOrVoid key, rootVid).isOkOr: + return err(error) + merged.inc + + ok merged - (merged, dups, AristoError(0)) proc merge*( db: AristoDbRef; # Database, top layer - rootKey: Hash256; # Merkle hash for root - rootVid = VertexID(0) # Optionally, force root vertex ID + rootHash: Hash256; # Merkle hash for root + rootVid = VertexID(0); # Optionally, force root vertex ID ): Result[VertexID,AristoError] = - ## Set up a `rootKey` associated with a vertex ID. + ## Set up a `rootKey` associated with a vertex ID for use with proof nodes. ## - ## If argument `rootVid` is unset (defaults to `VertexID(0)`) then the main - ## trie is tested for `VertexID(1)`. If assigned with a different Merkle key - ## already, a new vertex ID is created and the argument root key is assigned - ## to this vertex ID. + ## If argument `rootVid` is unset then a new dybamic root vertex (i.e. + ## the ID will be at least `LEAST_FREE_VID`) will be installed. ## - ## If the argument `rootVid` is set (to a value different from `VertexID(0)`), - ## then a sub-trie with root `rootVid` is checked for. If it exists with a - ## diffent root key assigned, then an error is returned. Otherwise a new - ## vertex ID is created and the argument root key is assigned. + ## Otherwise, if the argument `rootVid` is set then a sub-trie with root + ## `rootVid` is checked for. An error is returned if it is set up already + ## with a different `rootHash`. ## ## Upon successful return, the vertex ID assigned to the root key is returned. ## - if not rootKey.isValid: - return err(MergeRootKeyInvalid) + ## Caveat: + ## Proof of concept, not in production yet. + ## + let rootKey = rootHash.to(HashKey) - let rootLink = rootKey.to(HashKey) - - if rootVid.isValid and rootVid != VertexID(1): + if rootVid.isValid: let key = db.getKey rootVid - if key.to(Hash256) == rootKey: + if key.isValid: + if rootKey.isValid and key != rootKey: + # Cannot use installed root key differing from hash argument + return err(MergeRootKeyDiffersForVid) + # Confirm root ID and key for proof nodes processing + db.layersPutProof(rootVid, key) # note that `rootKey` might be void return ok rootVid - if not key.isValid: - db.layersPutKey(rootVid, rootLink) - return ok rootVid - else: - let key = db.getKey VertexID(1) - if key.to(Hash256) == rootKey: - return ok VertexID(1) + if not rootHash.isValid: + return err(MergeRootArgsIncomplete) + if db.getVtx(rootVid).isValid: + # Cannot use verify root key for existing root vertex + return err(MergeRootKeyMissing) - # Otherwise assign unless valid - if not key.isValid: - db.layersPutKey(VertexID(1), rootLink) - return ok VertexID(1) + # Confirm root ID and hash key for proof nodes processing + db.layersPutProof(rootVid, rootKey) + return ok rootVid - # Create and assign a new root key - if not rootVid.isValid: - let vid = db.vidFetch - db.layersPutKey(vid, rootLink) - return ok vid + if not rootHash.isValid: + return err(MergeRootArgsIncomplete) - err(MergeRootKeyDiffersForVid) + # Now there is no root vertex ID, only the hash argument. + # So Create and assign a new root key. + let vid = db.vidFetch + db.layersPutProof(vid, rootKey) + return ok vid + + +proc merge*( + db: AristoDbRef; # Database, top layer + rootVid: VertexID; # Root ID + ): Result[VertexID,AristoError] = + ## Variant of `merge()` for missing `rootHash` + db.merge(EMPTY_ROOT_HASH, rootVid) # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/aristo/aristo_sign.nim b/nimbus/db/aristo/aristo_sign.nim index c4f7bf0fc..7620d01ff 100644 --- a/nimbus/db/aristo/aristo_sign.nim +++ b/nimbus/db/aristo/aristo_sign.nim @@ -54,7 +54,7 @@ proc merkleSignCommit*( return ok VOID_HASH_KEY if sdb.error != AristoError(0): return err((sdb.errKey, sdb.error)) - discard sdb.db.hashify().valueOr: + sdb.db.hashify().isOkOr: let w = (EmptyBlob, error[1]) return err(w) let hash = sdb.db.getKeyRc(sdb.root).valueOr: diff --git a/nimbus/db/aristo/aristo_tx.nim b/nimbus/db/aristo/aristo_tx.nim index 5ce6daa54..51ef39a08 100644 --- a/nimbus/db/aristo/aristo_tx.nim +++ b/nimbus/db/aristo/aristo_tx.nim @@ -142,7 +142,7 @@ proc forkTx*( level: 1) if not dontHashify: - discard txClone.hashify().valueOr: + txClone.hashify().isOkOr: discard txClone.forget() return err(error[1]) @@ -166,7 +166,7 @@ proc forkTop*( dbClone.backend = db.backend if not dontHashify: - discard dbClone.hashify().valueOr: + dbClone.hashify().isOkOr: discard dbClone.forget() return err(error[1]) return ok(dbClone) @@ -249,14 +249,13 @@ proc commit*( ## previous transaction is returned if there was any. ## let db = ? tx.getDbDescFromTopTx() - discard db.hashify().valueOr: + db.hashify().isOkOr: return err(error[1]) # Pop layer from stack and merge database top layer onto it let merged = block: if db.top.delta.sTab.len == 0 and - db.top.delta.kMap.len == 0 and - db.top.delta.pAmk.len == 0: + db.top.delta.kMap.len == 0: # Avoid `layersMergeOnto()` db.top.delta = db.stack[^1].delta db.stack.setLen(db.stack.len-1) @@ -264,7 +263,7 @@ proc commit*( else: let layer = db.stack[^1] db.stack.setLen(db.stack.len-1) - db.top.layersMergeOnto(layer[], db.stack) + db.top.layersMergeOnto layer[] layer # Install `merged` stack top layer and update stack @@ -293,7 +292,7 @@ proc collapse*( if commit: # For commit, hashify the current layer if requested and install it - discard db.hashify().valueOr: + db.hashify().isOkOr: return err(error[1]) db.top.txUid = 0 @@ -332,7 +331,7 @@ proc stow*( if persistent and not db.canResolveBackendFilter(): return err(TxBackendNotWritable) - discard db.hashify().valueOr: + db.hashify().isOkOr: return err(error[1]) let fwd = db.fwdFilter(db.top, chunkedMpt).valueOr: diff --git a/nimbus/db/aristo/aristo_utils.nim b/nimbus/db/aristo/aristo_utils.nim index 5c4cbe985..a30c6447d 100644 --- a/nimbus/db/aristo/aristo_utils.nim +++ b/nimbus/db/aristo/aristo_utils.nim @@ -14,7 +14,7 @@ {.push raises: [].} import - std/[sequtils, tables, typetraits], + std/[sequtils, sets, typetraits], eth/common, results, "."/[aristo_constants, aristo_desc, aristo_get, aristo_hike, aristo_layers] @@ -200,17 +200,9 @@ proc registerAccount*( if not accPath.isValid: return err(UtilsAccPathMissing) - # Check whether the account is marked for re-hash, already - let lty = LeafTie(root: VertexID(1), path: accPath) - if db.lTab.hasKey lty: - return ok() - # Get account leaf with account data - let rc = lty.hikeUp(db) - let hike = block: - if rc.isErr: - return err(UtilsAccUnaccessible) - rc.value + let hike = LeafTie(root: VertexID(1), path: accPath).hikeUp(db).valueOr: + return err(UtilsAccUnaccessible) let wp = hike.legs[^1].wp if wp.vtx.vType != Leaf: @@ -223,10 +215,13 @@ proc registerAccount*( if stoID.isValid and stoID != stoRoot: return err(UtilsAccWrongStorageRoot) - # Clear Merkle keys and store leaf record + # Clear Merkle keys so that `hasify()` can calculate the re-hash forest/tree for w in hike.legs.mapIt(it.wp.vid): - db.layersResKey w - db.top.final.lTab[lty] = wp.vid + db.layersResKey(hike.root, w) + + # Signal to `hashify()` where to start rebuilding Merkel hashes + db.top.final.dirty.incl hike.root + db.top.final.dirty.incl wp.vid ok() diff --git a/nimbus/db/aristo/aristo_vid.nim b/nimbus/db/aristo/aristo_vid.nim index 432d74eef..d3abbe8bb 100644 --- a/nimbus/db/aristo/aristo_vid.nim +++ b/nimbus/db/aristo/aristo_vid.nim @@ -70,12 +70,21 @@ proc vidDispose*(db: AristoDbRef; vid: VertexID) = db.top.final.vGen[^1] = vid db.top.final.vGen.add topID + proc vidReorg*(vGen: seq[VertexID]): seq[VertexID] = ## Return a compacted version of the argument vertex ID generator state ## `vGen`. The function removes redundant items from the recycle queue and ## orders it in a way so that smaller `VertexID` numbers are re-used first. ## - if 1 < vGen.len: + # Apply heuristic test to avoid unnecessary sorting + var reOrgOk = false + if 2 < vGen.len and vGen[0] < vGen[^2]: + if vGen.len < 10: + reOrgOk = true + elif vGen[0] < vGen[1] and vGen[^3] < vGen[^2]: + reOrgOk = true + + if reOrgOk: let lst = vGen.mapIt(uint64(it)).sorted(Descending).mapIt(VertexID(it)) for n in 0 .. lst.len-2: if lst[n].uint64 != lst[n+1].uint64 + 1: diff --git a/tests/test_aristo/test_backend.nim b/tests/test_aristo/test_backend.nim index e26b6dccb..56470211f 100644 --- a/tests/test_aristo/test_backend.nim +++ b/tests/test_aristo/test_backend.nim @@ -22,9 +22,11 @@ import aristo_debug, aristo_desc, aristo_desc/desc_backend, + aristo_get, aristo_hashify, aristo_init/memory_db, aristo_init/rocks_db, + aristo_layers, aristo_persistent, aristo_blobify, aristo_vid], @@ -38,6 +40,13 @@ const # Private helpers # ------------------------------------------------------------------------------ +when not declared(aristo_hashify.noisy): + proc hashify( + db: AristoDbRef; + noisy: bool; + ): Result[void,(VertexID,AristoError)] = + aristo_hashify.hashify(db) + func hash(filter: FilterRef): Hash = ## Unique hash/filter -- cannot use de/blobify as the expressions ## `filter.blobify` and `filter.blobify.value.deblobify.value.blobify` are @@ -65,36 +74,6 @@ func hash(filter: FilterRef): Hash = # Private functions # ------------------------------------------------------------------------------ -proc mergeData( - db: AristoDbRef; - rootKey: Hash256; - rootVid: VertexID; - proof: openArray[SnapProof]; - leafs: openArray[LeafTiePayload]; - noisy: bool; - ): bool = - ## Simplified loop body of `test_mergeProofAndKvpList()` - if 0 < proof.len: - let rc = db.merge(rootKey, rootVid) - xCheckRc rc.error == 0 - - let proved = db.merge(proof, rc.value) - xCheck proved.error in {AristoError(0),MergeHashKeyCachedAlready} - - let merged = db.mergeList leafs - xCheck merged.error in {AristoError(0), MergeLeafPathCachedAlready} - - block: - let rc = db.hashify # (noisy, true) - xCheckRc rc.error == (0,0): - noisy.say "***", "dataMerge(9)", - " nLeafs=", leafs.len, - "\n cache dump\n ", db.pp, - "\n backend dump\n ", db.backend.pp(db) - - true - - proc verify( ly: LayerRef; # Database layer be: BackendRef; # Backend @@ -121,8 +100,13 @@ proc verify( " nVtx=", nVtx.pp, " mVtx=", mVtx.pp - xCheck beSTab.len == ly.delta.sTab.len - xCheck beKMap.len == ly.delta.kMap.len + xCheck beSTab.len == ly.delta.sTab.len + xCheck beKMap.len == ly.delta.kMap.len: + let + a = ly.delta.kMap.keys.toSeq.toHashSet + b = beKMap.keys.toSeq.toHashSet + noisy.say "***", "verify", + " delta=", (a -+- b).pp true @@ -134,28 +118,6 @@ proc verify( else: raiseAssert "Oops, unsupported backend " & $be.kind -# ----------- - -proc collectFilter( - db: AristoDbRef; - filter: FilterRef; - tab: var Table[QueueID,Hash]; - noisy: bool; - ): bool = - ## Store filter on permanent BE and register digest - if not filter.isNil: - let - fid = QueueID(7 * (tab.len + 1)) # just some number - be = db.backend - tx = be.putBegFn() - - be.putFilFn(tx, @[(fid,filter)]) - let rc = be.putEndFn tx - xCheckRc rc.error == 0 - - tab[fid] = filter.hash - - true proc verifyFilters( db: AristoDbRef; @@ -194,6 +156,100 @@ proc verifyFilters( else: raiseAssert "Oops, unsupported backend " & $be.kind + +proc verifyKeys( + db: AristoDbRef; + noisy: bool; + ): bool = + + proc verifyImpl[T](noisy: bool; db: AristoDbRef): bool = + ## Check for zero keys + var zeroKeys: seq[VertexID] + for (vid,vtx) in T.walkPairs(db): + if vtx.isValid and not db.getKey(vid).isValid: + zeroKeys.add vid + + xCheck zeroKeys == EmptyVidSeq: + noisy.say "***", "verifyKeys(1)", + "\n zeroKeys=", zeroKeys.pp, + #"\n db\n ", db.pp(backendOk=true), + "" + true + + ## Wrapper + let be = db.backend + case be.kind: + of BackendVoid: + verifyImpl[VoidBackendRef](noisy, db) + of BackendMemory: + verifyImpl[MemBackendRef](noisy, db) + of BackendRocksDB: + verifyImpl[RdbBackendRef](noisy, db) + +# ----------- + +proc collectFilter( + db: AristoDbRef; + filter: FilterRef; + tab: var Table[QueueID,Hash]; + noisy: bool; + ): bool = + ## Store filter on permanent BE and register digest + if not filter.isNil: + let + fid = QueueID(7 * (tab.len + 1)) # just some number + be = db.backend + tx = be.putBegFn() + + be.putFilFn(tx, @[(fid,filter)]) + let rc = be.putEndFn tx + xCheckRc rc.error == 0 + + tab[fid] = filter.hash + + true + +proc mergeData( + db: AristoDbRef; + rootKey: Hash256; + rootVid: VertexID; + proof: openArray[SnapProof]; + leafs: openArray[LeafTiePayload]; + noisy: bool; + ): bool = + ## Simplified loop body of `test_mergeProofAndKvpList()` + if 0 < proof.len: + let root = block: + let rc = db.merge(rootKey, rootVid) + xCheckRc rc.error == 0 + rc.value + + let nMerged = block: + let rc = db.merge(proof, root) # , noisy=noisy) + xCheckRc rc.error == 0 + rc.value + + let merged = db.mergeList(leafs, noisy=noisy) + xCheck merged.error in {AristoError(0), MergeLeafPathCachedAlready} + + block: + let rc = db.hashify(noisy = noisy) + xCheckRc rc.error == (0,0): + noisy.say "***", "dataMerge (8)", + " nProof=", proof.len, + " nLeafs=", leafs.len, + " error=", rc.error, + #"\n db\n ", db.pp(backendOk=true), + "" + block: + xCheck db.verifyKeys(noisy): + noisy.say "***", "dataMerge (9)", + " nProof=", proof.len, + " nLeafs=", leafs.len, + #"\n db\n ", db.pp(backendOk=true), + "" + true + # ------------------------------------------------------------------------------ # Public test function # ------------------------------------------------------------------------------ @@ -249,8 +305,8 @@ proc testBackendConsistency*( "\n ndb\n ", ndb.pp(backendOk = true), "\n -------------", "\n mdb\n ", mdb.pp(backendOk = true), - "\n -------------", - "\n rdb\n ", rdb.pp(backendOk = true), + #"\n -------------", + #"\n rdb\n ", rdb.pp(backendOk = true), "\n -------------" block: @@ -281,8 +337,8 @@ proc testBackendConsistency*( mdbPreSave = "" rdbPreSave = "" when true and false: - mdbPreSave = mdb.pp(backendOk = true) - rdbPreSave = rdb.pp(backendOk = true) + mdbPreSave = mdb.pp() # backendOk = true) + rdbPreSave = rdb.pp() # backendOk = true) # Provide filter, store filter on permanent BE, and register filter digest block: @@ -301,14 +357,18 @@ proc testBackendConsistency*( xCheckRc rc.error == 0 block: + ndb.top.final.pPrf.clear # let it look like mdb/rdb + xCheck mdb.pPrf.len == 0 + xCheck rdb.pPrf.len == 0 + let mdbVerifyOk = ndb.top.verify(mdb.backend, noisy) xCheck mdbVerifyOk: - when true and false: + when true: # and false: noisy.say "***", "beCon(4) <", n, "/", list.len-1, ">", " groups=", count, "\n ndb\n ", ndb.pp(backendOk = true), - #"\n -------------", - #"\n mdb pre-stow\n ", mdbPreSave, + "\n -------------", + "\n mdb pre-stow\n ", mdbPreSave, "\n -------------", "\n mdb\n ", mdb.pp(backendOk = true), "\n -------------" diff --git a/tests/test_aristo/test_filter.nim b/tests/test_aristo/test_filter.nim index 8bd756211..bd6012620 100644 --- a/tests/test_aristo/test_filter.nim +++ b/tests/test_aristo/test_filter.nim @@ -336,7 +336,7 @@ proc checkBeOk( ## .. for n in 0 ..< dx.len: let - cache = if forceCache: true else: not dx[n].dirty + cache = if forceCache: true else: dx[n].dirty.len == 0 rc = dx[n].checkBE(relax=relax, cache=cache) xCheckRc rc.error == (0,0): noisy.say "***", "db check failed", diff --git a/tests/test_aristo/test_helpers.nim b/tests/test_aristo/test_helpers.nim index 0dc0a412c..873fb391d 100644 --- a/tests/test_aristo/test_helpers.nim +++ b/tests/test_aristo/test_helpers.nim @@ -46,6 +46,26 @@ func to(a: NodeKey; T: type UInt256): T = func to(a: NodeKey; T: type PathID): T = a.to(UInt256).to(T) +when not declared(aristo_merge.noisy): + import ../../nimbus/db/aristo/aristo_hike + proc merge( + db: AristoDbRef; + root: VertexID; + path: openArray[byte]; + data: openArray[byte]; + accPath: PathID; + noisy: bool; + ): Result[bool, AristoError] = + aristo_merge.merge(db, root, path, data, accPath) + proc merge( + db: AristoDbRef; + lty: LeafTie; + pyl: PayloadRef; + accPath: PathID; + noisy: bool; + ): Result[Hike, AristoError] = + aristo_merge.merge(db, lty, pyl, accPath) + # ------------------------------------------------------------------------------ # Public pretty printing # ------------------------------------------------------------------------------ @@ -214,11 +234,18 @@ func mapRootVid*( proc mergeList*( db: AristoDbRef; # Database, top layer leafs: openArray[LeafTiePayload]; # Leaf items to add to the database + noisy = false; ): tuple[merged: int, dups: int, error: AristoError] = ## Variant of `merge()` for leaf lists. var (merged, dups) = (0, 0) for n,w in leafs: - let rc = db.merge(w.leafTie, w.payload, VOID_PATH_ID) + noisy.say "*** mergeList", + " n=", n, "/", leafs.len + let rc = db.merge(w.leafTie, w.payload, VOID_PATH_ID, noisy=noisy) + noisy.say "*** mergeList", + " n=", n, "/", leafs.len, + " rc=", (if rc.isOk: "ok" else: $rc.error), + "\n -------------\n" if rc.isOk: merged.inc elif rc.error in {MergeLeafPathCachedAlready,MergeLeafPathOnBackendAlready}: diff --git a/tests/test_aristo/test_misc.nim b/tests/test_aristo/test_misc.nim index 31c0c4945..f7bab98af 100644 --- a/tests/test_aristo/test_misc.nim +++ b/tests/test_aristo/test_misc.nim @@ -315,12 +315,18 @@ proc testVidRecycleLists*(noisy = true; seed = 42): bool = # Recycling and re-org tests func toVQ(a: seq[int]): seq[VertexID] = a.mapIt(VertexID(LEAST_FREE_VID+it)) - xCheck @[8, 7, 3, 4, 5, 9] .toVQ.vidReorg == @[5, 4, 3, 7] .toVQ - xCheck @[8, 7, 6, 3, 4, 5, 9] .toVQ.vidReorg == @[3] .toVQ - xCheck @[5, 4, 3, 7] .toVQ.vidReorg == @[5, 4, 3, 7] .toVQ - xCheck @[5] .toVQ.vidReorg == @[5] .toVQ - xCheck @[3, 5] .toVQ.vidReorg == @[3, 5] .toVQ - xCheck @[4, 5] .toVQ.vidReorg == @[4] .toVQ + # Heuristic prevents from re-org + xCheck @[8, 7, 3, 4, 5, 9] .toVQ.vidReorg == @[8, 7, 3, 4, 5, 9] .toVQ + xCheck @[8, 7, 6, 3, 4, 5, 9] .toVQ.vidReorg == @[8, 7, 6, 3, 4, 5, 9].toVQ + xCheck @[5, 4, 3, 7] .toVQ.vidReorg == @[5, 4, 3, 7] .toVQ + xCheck @[5] .toVQ.vidReorg == @[5] .toVQ + xCheck @[3, 5] .toVQ.vidReorg == @[3, 5] .toVQ + xCheck @[4, 5] .toVQ.vidReorg == @[4, 5] .toVQ + + # performing re-org + xCheck @[5, 7, 3, 4, 8, 9] .toVQ.vidReorg == @[5, 4, 3, 7] .toVQ + xCheck @[5, 7, 6, 3, 4, 8, 9] .toVQ.vidReorg == @[3] .toVQ + xCheck @[3, 4, 5, 7] .toVQ.vidReorg == @[5, 4, 3, 7] .toVQ xCheck newSeq[VertexID](0).vidReorg().len == 0 @@ -492,8 +498,6 @@ proc testShortKeys*( "\n k=", k.toHex, " v=", v.toHex, "\n r=", r.pp(sig), "\n ", sig.pp(), - "\n", - "\n pAmk=", sig.db.layersWalkYek.toSeq.toTable.pp(sig.db), "\n" let w = sig.merkleSignCommit().value gossip.say "*** testShortkeys (2)", "n=", n, " inx=", inx, @@ -501,9 +505,6 @@ proc testShortKeys*( "\n r=", r.pp(sig), "\n R=", w.pp(sig), "\n ", sig.pp(), - "\n", - "\n pAmk=", sig.db.layersWalkYek.toSeq.toTable.pp(sig.db), - "\n", "\n ----------------", "\n" let rc = sig.db.check diff --git a/tests/test_aristo/test_tx.nim b/tests/test_aristo/test_tx.nim index 93248b55e..03f5c6e77 100644 --- a/tests/test_aristo/test_tx.nim +++ b/tests/test_aristo/test_tx.nim @@ -18,7 +18,7 @@ import stew/endians2, ../../nimbus/db/aristo/[ aristo_check, aristo_debug, aristo_delete, aristo_desc, aristo_get, - aristo_layers, aristo_merge], + aristo_hike, aristo_layers, aristo_merge], ../../nimbus/db/[aristo, aristo/aristo_init/persistent], ../replay/xcheck, ./test_helpers @@ -83,14 +83,22 @@ proc rand(td: var PrngDesc; top: int): int = proc randomisedLeafs( db: AristoDbRef; + ltys: HashSet[LeafTie]; td: var PrngDesc; - ): seq[(LeafTie,VertexID)] = - result = db.lTab.pairs.toSeq.filterIt(it[1].isValid).sorted( - cmp = proc(a,b: (LeafTie,VertexID)): int = cmp(a[0], b[0])) - if 2 < result.len: - for n in 0 ..< result.len-1: - let r = n + td.rand(result.len - n) - result[n].swap result[r] + ): Result[seq[(LeafTie,VertexID)],(VertexID,AristoError)] = + var lvp: seq[(LeafTie,VertexID)] + for lty in ltys: + let hike = lty.hikeUp(db).valueOr: + return err((error[0],error[1])) + lvp.add (lty,hike.legs[^1].wp.vid) + + var lvp2 = lvp.sorted( + cmp = proc(a,b: (LeafTie,VertexID)): int = cmp(a[0],b[0])) + if 2 < lvp2.len: + for n in 0 ..< lvp2.len-1: + let r = n + td.rand(lvp2.len - n) + lvp2[n].swap lvp2[r] + ok lvp2 proc innerCleanUp(db: AristoDbRef): bool {.discardable.} = ## Defer action @@ -134,7 +142,7 @@ proc saveToBackend( xCheckRc rc.error == 0 # Make sure MPT hashes are OK - xCheck db.dirty == false + xCheck db.dirty.len == 0 block: let rc = db.txTop() @@ -154,7 +162,7 @@ proc saveToBackend( xCheckRc rc.error == 0 # Make sure MPT hashes are OK - xCheck db.dirty == false + xCheck db.dirty.len == 0 block: let rc = db.txTop() @@ -192,7 +200,7 @@ proc saveToBackendWithOops( xCheckRc rc.error == 0 # Make sure MPT hashes are OK - xCheck db.dirty == false + xCheck db.dirty.len == 0 block: let rc = db.txTop() @@ -208,7 +216,7 @@ proc saveToBackendWithOops( xCheckRc rc.error == 0 # Make sure MPT hashes are OK - xCheck db.dirty == false + xCheck db.dirty.len == 0 block: let rc = db.txTop() @@ -356,8 +364,10 @@ proc testTxMergeAndDeleteOneByOne*( var leafsLeft = kvpLeafs.mapIt(it.leafTie).toHashSet # Provide a (reproducible) peudo-random copy of the leafs list - let leafVidPairs = db.randomisedLeafs prng - xCheck leafVidPairs.len == leafsLeft.len + let leafVidPairs = block: + let rc = db.randomisedLeafs(leafsLeft, prng) + xCheckRc rc.error == (0,0) + rc.value # Trigger subsequent saving tasks in loop below let (saveMod, saveRest, relax) = block: @@ -459,8 +469,10 @@ proc testTxMergeAndDeleteSubTree*( var leafsLeft = kvpLeafs.mapIt(it.leafTie).toHashSet # Provide a (reproducible) peudo-random copy of the leafs list - let leafVidPairs = db.randomisedLeafs prng - xCheck leafVidPairs.len == leafsLeft.len + let leafVidPairs = block: + let rc = db.randomisedLeafs(leafsLeft, prng) + xCheckRc rc.error == (0,0) + rc.value # === delete sub-tree === block: @@ -538,29 +550,25 @@ proc testTxMergeProofAndKvpList*( testId = idPfx & "#" & $w.id & "." & $n runID = n sTabLen = db.nLayersVtx() - lTabLen = db.lTab.len leafs = w.kvpLst.mapRootVid VertexID(1) # merge into main trie - var - proved: tuple[merged: int, dups: int, error: AristoError] if 0 < w.proof.len: - let rc = db.merge(rootKey, VertexID(1)) - xCheckRc rc.error == 0 + let root = block: + let rc = db.merge(rootKey, VertexID(1)) + xCheckRc rc.error == 0 + rc.value - proved = db.merge(w.proof, rc.value) + let nMerged = block: + let rc = db.merge(w.proof, root) + xCheckRc rc.error == 0 + rc.value - xCheck proved.error in {AristoError(0),MergeHashKeyCachedAlready} - xCheck w.proof.len == proved.merged + proved.dups - xCheck db.lTab.len == lTabLen - xCheck db.nLayersVtx() <= proved.merged + sTabLen - xCheck proved.merged < db.nLayersYek() + xCheck w.proof.len == nMerged + xCheck db.nLayersVtx() <= nMerged + sTabLen - let - merged = db.mergeList leafs - - xCheck db.lTab.len == lTabLen + merged.merged - xCheck merged.merged + merged.dups == leafs.len + let merged = db.mergeList leafs xCheck merged.error in {AristoError(0), MergeLeafPathCachedAlready} + xCheck merged.merged + merged.dups == leafs.len block: let oops = oopsTab.getOrDefault(testId,(0,AristoError(0))) @@ -571,6 +579,7 @@ proc testTxMergeProofAndKvpList*( when true and false: noisy.say "***", "proofs(9) <", n, "/", list.len-1, ">", " groups=", count, " proved=", proved, " merged=", merged + true # ------------------------------------------------------------------------------ diff --git a/tests/test_coredb.nim b/tests/test_coredb.nim index 71eaabd86..5657e629a 100644 --- a/tests/test_coredb.nim +++ b/tests/test_coredb.nim @@ -11,7 +11,7 @@ ## Testing `CoreDB` wrapper implementation import - std/[os, strformat, strutils], + std/[os, strformat, strutils, times], chronicles, eth/common, results, @@ -210,7 +210,9 @@ proc chainSyncRunner( ## Test backend database and ledger let - fileInfo = capture.files[0].splitFile.name.split(".")[0] + fileInfo = capture.files[0] + .splitFile.name.split(".")[0] + .strip(leading=false, chars={'0'..'9'}) filePaths = capture.files.mapIt(it.findFilePath(baseDir,repoDir).value) baseDir = getTmpDir() / capture.name & "-chain-sync" dbDir = baseDir / "tmp" @@ -238,7 +240,7 @@ proc chainSyncRunner( com = initRunnerDB(dbDir, capture, dbType, ldgType) defer: com.db.finish(flush = finalDiskCleanUpOk) - if profilingOk: noisy.testChainSyncProfilingPrint numBlocks + if profilingOk: noisy.test_chainSyncProfilingPrint numBlocks if persistent and finalDiskCleanUpOk: dbDir.flushDbDir if noisy: @@ -247,7 +249,7 @@ proc chainSyncRunner( com.db.trackLedgerApi = true com.db.localDbOnly = true - check noisy.testChainSync(filePaths, com, numBlocks, + check noisy.test_chainSync(filePaths, com, numBlocks, lastOneExtra=lastOneExtraOk, enaLogging=enaLoggingOk) # ------------------------------------------------------------------------------ @@ -278,18 +280,18 @@ when isMainModule: var state: (Duration, int) for n,capture in sampleList: - noisy.profileSection("@testList #" & $n, state): + noisy.profileSection("@sample #" & $n, state): noisy.chainSyncRunner( capture = capture, #dbType = .., ldgType=LedgerCache, - #profilingOk = .., - finalDiskCleanUpOk = false, + #profilingOk = true, + #finalDiskCleanUpOk = false, #enaLoggingOk = .., #lastOneExtraOk = .., ) - noisy.say "***", "total elapsed: ", state[0].pp, " sections: ", state[1] + noisy.say "***", "total: ", state[0].pp, " sections: ", state[1] # ------------------------------------------------------------------------------ # End diff --git a/tests/test_coredb/coredb_test_xx.nim b/tests/test_coredb/coredb_test_xx.nim index 51640a774..63ebf84c0 100644 --- a/tests/test_coredb/coredb_test_xx.nim +++ b/tests/test_coredb/coredb_test_xx.nim @@ -83,6 +83,14 @@ let dbType: AristoDbRocks) ariTest2* = CaptureSpecs( + builtIn: true, + name: bulkTest3.name & "-am", + network: bulkTest3.network, + files: bulkTest3.files, + numBlocks: 500_000, + dbType: AristoDbMemory) + + ariTest3* = CaptureSpecs( builtIn: true, name: bulkTest3.name & "-ar", network: bulkTest3.network, @@ -110,16 +118,25 @@ let legaTest2* = CaptureSpecs( builtIn: true, - name: ariTest2.name.replace("-ar", "-lp"), + name: ariTest2.name.replace("-ar", "-lm"), + network: ariTest2.network, files: ariTest2.files, numBlocks: ariTest2.numBlocks, + dbType: LegacyDbMemory) + + legaTest3* = CaptureSpecs( + builtIn: true, + name: ariTest3.name.replace("-ar", "-lp"), + network: ariTest3.network, + files: ariTest3.files, + numBlocks: ariTest3.numBlocks, dbType: LegacyDbPersistent) # ------------------ allSamples* = [ bulkTest0, bulkTest1, bulkTest2, bulkTest3, - ariTest0, ariTest1, ariTest2, - legaTest0, legaTest1, legaTest2] + ariTest0, ariTest1, ariTest2, ariTest3, + legaTest0, legaTest1, legaTest2, legaTest3] # End