Simplify aristo tree deletion functionality (#2563)

* Cleaning up, removing cruft and debugging statements

* Make `aristo_delta` fluffy compatible

why:
  A sub-module that uses `chronicles` must import all possible
  modules used by a parent module that imports the sub-module.

* update TODO
This commit is contained in:
Jordan Hrycaj 2024-08-14 12:09:30 +00:00 committed by GitHub
parent d148de5b1c
commit cbe5131927
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 118 additions and 510 deletions

View File

@ -1,11 +1,4 @@
#
# For some reason `json[dynamic]` causes problems with subsequent modules from
# `Aristo` when compiling `fluffy`. There might be a `chronicles` inport missing
# but it is not obvious where. -- jordan
#
#-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
-d:"chronicles_sinks=textlines[dynamic]"
-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
-d:"chronicles_runtime_filtering=on"
-d:"chronicles_disable_thread_id"

View File

@ -1,6 +1,5 @@
# Use only `secp256k1` public key cryptography as an identity in LibP2P.
-d:"libp2p_pki_schemes=secp256k1"
# See `fluffy.nim.cfg`
#-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
-d:"chronicles_sinks=textlines[dynamic]"

View File

@ -1,3 +1,2 @@
# See `fluffy.nim.cfg`
#-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
-d:"chronicles_sinks=textlines[dynamic],json[dynamic]"
-d:"chronicles_sinks=textlines[dynamic]"

View File

@ -11,3 +11,10 @@
function mentioned above.
* `aristo_nearby` also qualifies for a re-write, now
* Revisit tree deletion. The idea is to finally use ranges of nodes by
exploiting the root ID prefix of a `RootedVertexID`. The `RocksDb` backend
seems to support this kind of operation, see
https://rocksdb.org/blog/2018/11/21/delete-range.html. For the application
part there are some great ideas floating which need to be followed up
some time.

View File

@ -42,15 +42,6 @@ const
## LRU cache size for accounts that have storage, see `.accLeaves` and
## `.stoLeaves` fields of the main descriptor.
DELETE_SUBTREE_VERTICES_MAX* = 25
## Maximum number of vertices for a tree to be deleted instantly. If the
## tree is larger, only the sub-tree root will be deleted immediately and
## subsequent entries will be deleted not until the cache layers are saved
## to the backend.
##
## Set to zero to disable in which case all sub-trees are deleted
## immediately.
static:
# must stay away from `VertexID(1)` and `VertexID(2)`
doAssert 2 < LEAST_FREE_VID

View File

@ -1,127 +0,0 @@
# nimbus-eth1
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
{.push raises: [].}
import
std/[math, strformat, times],
chronicles,
".."/[aristo_desc, aristo_get, aristo_profile]
export
aristo_profile.toStr
type
SubTreeStats* = tuple
nVtxs: int ## Number of vertices in sub-tree
nLeafs: int ## Number of leafs in sub-tree
depthMax: int ## Maximal vertex path length
nStoCache: int ## Size of storage leafs cache
elapsed: Duration ## Time spent analysing
SubTreeStatsAccu* = tuple
count: int ## Number of entries
sVtxs, qVtxs: float ## Sum and square sum of `.nVtxs`
sLeafs, qLeafs: float ## Sum and square sum of `.nLeafs`
sDepth, qDepth: float ## Sum and square sum of `.depthMax`
sElapsed: Duration ## Sum of `.elapsed`
SubTreeDist* = tuple
count: int ## Number of entries
mVtxs, dVtxs: float ## Mean and std deviation of `.nVtxs`
mLeafs, dLeafs: float ## Mean and std deviation of `.nLeafs`
mDepth, dDepth: float ## Mean and std deviation of `.depthMax`
# ------------------------------------------------------------------------------
# Prival helper
# ------------------------------------------------------------------------------
proc analyseSubTreeImpl(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
depth: int; # Recursion depth
stats: var SubTreeStats; # Statistics
) =
let (vtx, _) = db.getVtxRc(rvid).valueOr:
return
stats.nVtxs.inc
if stats.depthMax < depth:
stats.depthMax = depth
case vtx.vType:
of Branch:
for n in 0..15:
if vtx.bVid[n].isValid:
db.analyseSubTreeImpl((rvid.root,vtx.bVid[n]), depth+1, stats)
of Leaf:
stats.nLeafs.inc
func evalDist(count: int; sum, sqSum: float): tuple[mean, stdDev: float] =
result.mean = sum / count.float
let
sqMean = sqSum / count.float
meanSq = result.mean * result.mean
# Mathematically, `meanSq <= sqMean` but there might be rounding errors
# if `meanSq` and `sqMean` are approximately the same.
sigma = sqMean - min(meanSq,sqMean)
result.stdDev = sigma.sqrt
# ------------------------------------------------------------------------------
# Public analysis tools
# ------------------------------------------------------------------------------
proc analyseSubTree*(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
minVtxs: int; # Accumulate if `minVtxs` <= `.nVtxs`
accu: var SubTreeStatsAccu; # For accumulated statistics
): SubTreeStats =
let start = getTime()
db.analyseSubTreeImpl(rvid, 1, result)
result.nStoCache = db.stoLeaves.len
if minVtxs <= result.nVtxs:
accu.count.inc
accu.sVtxs += result.nVtxs.float
accu.qVtxs += (result.nVtxs * result.nVtxs).float
accu.sLeafs += result.nLeafs.float
accu.qLeafs += (result.nLeafs * result.nLeafs).float
accu.sDepth += result.depthMax.float
accu.qDepth += (result.depthMax * result.depthMax).float
result.elapsed = getTime() - start
accu.sElapsed += result.elapsed # Unconditionally collecrd
func stats*(a: SubTreeStatsAccu): SubTreeDist =
result.count = a.count
(result.mVtxs, result.dVtxs) = evalDist(a.count, a.sVtxs, a.qVtxs)
(result.mLeafs, result.dLeafs) = evalDist(a.count, a.sLeafs, a.qLeafs)
(result.mDepth, result.dDepth) = evalDist(a.count, a.sDepth, a.qDepth)
func strStats*(
a: SubTreeStatsAccu;
): tuple[count, vtxs, leafs, depth, elapsed: string] =
let w = a.stats()
result.count = $w.count
result.elapsed = a.sElapsed.toStr
result.vtxs = &"{w.mVtxs:.1f}[{w.dVtxs:.1f}]"
result.leafs = &"{w.mLeafs:.1f}[{w.dLeafs:.1f}]"
result.depth = &"{w.mDepth:.1f}[{w.dDepth:.1f}]"
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------

View File

@ -8,13 +8,109 @@
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
import ../aristo_constants
{.push raises: [].}
when DELETE_SUBTREE_VERTICES_MAX == 0:
import ./delete_subtree_now as del_sub
else:
import ./delete_subtree_lazy as del_sub
import
eth/common,
results,
".."/[aristo_desc, aristo_get, aristo_layers],
./delete_helpers
export del_sub
# ------------------------------------------------------------------------------
# Private heplers
# ------------------------------------------------------------------------------
proc collectStoTreeLazily(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
accPath: Hash256; # Accounts cache designator
stoPath: NibblesBuf; # Current storage path
): Result[void,AristoError] =
## Collect vertex/vid and delete cache entries.
let (vtx, _) = db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
case vtx.vType
of Branch:
for i in 0..15:
if vtx.bVid[i].isValid:
? db.collectStoTreeLazily(
(rvid.root, vtx.bVid[i]), accPath,
stoPath & vtx.ePfx & NibblesBuf.nibble(byte i))
of Leaf:
let stoPath = Hash256(data: (stoPath & vtx.lPfx).getBytes())
db.layersPutStoLeaf(AccountKey.mixUp(accPath, stoPath), nil)
# There is no useful approach avoiding to walk the whole tree for updating
# the storage data access cache.
#
# The alternative of stopping here and clearing the whole cache did degrade
# performance significantly in some tests on mainnet when importing `era1`.
#
# The cache it was seen
# * filled up to maximum size most of the time
# * at the same time having no `stoPath` hit at all (so there was nothing
# to be cleared.)
#
ok()
proc disposeOfSubTree(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
) =
## Evaluate results from `collectSubTreeLazyImpl()` or ftom
## `collectStoTreeLazyImpl)`.
##
let vtx = db.getVtxRc(rvid).value[0]
if vtx.vType == Branch:
for n in 0..15:
if vtx.bVid[n].isValid:
db.top.delTree.add (rvid.root,vtx.bVid[n])
# Delete top of tree now.
db.disposeOfVtx(rvid)
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
proc delSubTreeImpl*(
db: AristoDbRef; # Database, top layer
root: VertexID; # Root vertex
): Result[void,AristoError] =
## Delete all the `subRoots`if there are a few, only. Otherwise
## mark it for deleting later.
discard db.getVtxRc((root,root)).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
db.disposeOfSubTree((root,root))
ok()
proc delStoTreeImpl*(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
accPath: Hash256;
): Result[void,AristoError] =
## Collect vertex/vid and cache entry.
discard db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
? db.collectStoTreeLazily(rvid, accPath, NibblesBuf())
db.disposeOfSubTree(rvid)
ok()
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------

View File

@ -1,252 +0,0 @@
# nimbus-eth1
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
{.push raises: [].}
import
std/sets,
eth/common,
results,
".."/[aristo_desc, aristo_get, aristo_layers],
./delete_helpers
const
extraDebuggingMessages = false # or true
## Enable additional logging noise. Note that this might slow down the
## system performance but will not be too significant. When importing the
## first 5m blocks from `era1` on some Debian system,
## * loading time was ~5h
## * overhead of accumulated analysis times was ~1.2s
type
VidCollect = tuple
data: array[DELETE_SUBTREE_VERTICES_MAX,VertexID]
top: int # Next free slot if smaller `.data.len`
when extraDebuggingMessages:
import
std/times,
chronicles,
./delete_debug
const
allStatsFrequency = 20
## Print accumutated statistics every `allStatsFrequency` visits of
## the analysis tool.
minVtxsForLogging = 1000
## Suppress detailed logging for smaller sub-trees
var stats: SubTreeStatsAccu
## Accumulated statistics
func `$`(ela: Duration): string =
ela.toStr
template debugLog(info: static[string]; args: varargs[untyped]) =
## Statistics message via `chronicles` logger, makes it easy to
## change priority and format.
notice info, args
# ------------------------------------------------------------------------------
# Private heplers
# ------------------------------------------------------------------------------
func capa(T: type VidCollect): int =
## Syntactic sugar
T.default.data.len
proc collectSubTreeLazily(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
vids: var VidCollect; # Accumulating vertex IDs for deletion
): Result[void,AristoError] =
## Collect vids for a small sub-tree
let (vtx, _) = db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
if vids.top < vids.data.len:
vids.data[vids.top] = rvid.vid
vids.top.inc # Max value of `.top`: `vid.data.len + 1`
if vtx.vType == Branch:
for n in 0..15:
if vtx.bVid[n].isValid:
? db.collectSubTreeLazily((rvid.root,vtx.bVid[n]), vids)
elif vids.top <= vids.data.len:
vids.top.inc # Terminates here
ok()
proc collectStoTreeLazily(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
accPath: Hash256; # Accounts cache designator
stoPath: NibblesBuf; # Current storage path
vids: var VidCollect; # Accumulating vertex IDs for deletion
): Result[void,AristoError] =
## Collect vertex/vid and delete cache entries.
let (vtx, _) = db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
case vtx.vType
of Branch:
for i in 0..15:
if vtx.bVid[i].isValid:
? db.collectStoTreeLazily(
(rvid.root, vtx.bVid[i]), accPath,
stoPath & vtx.ePfx & NibblesBuf.nibble(byte i),
vids)
of Leaf:
let stoPath = Hash256(data: (stoPath & vtx.lPfx).getBytes())
db.layersPutStoLeaf(AccountKey.mixUp(accPath, stoPath), nil)
# There is no useful approach avoiding to walk the whole tree for updating
# the storage data access cache.
#
# The alternative of stopping here and clearing the whole cache did degrade
# performance significantly in some tests on mainnet when importing `era1`.
#
# When not clearing the cache it was seen
# * filled up to maximum size most of the time
# * at the same time having no `stoPath` hit at all (so there was nothing
# to be cleared.)
#
if vids.top <= vids.data.len:
if vids.top < vids.data.len:
vids.data[vids.top] = rvid.vid
vids.top.inc # Max value of `.top`: `vid.data.len + 1`
ok()
proc disposeOfSubTree(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
vids: var VidCollect; # Accumulated vertex IDs for disposal
) =
## Evaluate results from `collectSubTreeLazyImpl()` or ftom
## `collectStoTreeLazyImpl)`.
##
if vids.top <= typeof(vids).capa:
# Delete small tree
for n in 0 ..< vids.top:
db.disposeOfVtx((rvid.root, vids.data[n]))
else:
# Mark the sub-trees disabled to be deleted not until the layer is
# finally stored onto the backend.
let vtx = db.getVtxRc(rvid).value[0]
for n in 0..15:
if vtx.bVid[n].isValid:
db.top.delTree.add (rvid.root,vtx.bVid[n])
# Delete top of tree now.
db.disposeOfVtx(rvid)
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
proc delSubTreeImpl*(
db: AristoDbRef; # Database, top layer
root: VertexID; # Root vertex
): Result[void,AristoError] =
## Delete all the `subRoots`if there are a few, only. Otherwise
## mark it for deleting later.
discard db.getVtxRc((root,root)).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
when extraDebuggingMessages:
let
ana = db.analyseSubTree((root,root), VidCollect.capa+1, stats)
start = getTime()
var dispose: VidCollect
? db.collectSubTreeLazily((root,root), dispose)
db.disposeOfSubTree((root,root), dispose)
when extraDebuggingMessages:
if typeof(dispose).capa < dispose.top:
if minVtxsForLogging < ana.nVtxs:
debugLog("Generic sub-tree analysis",
nVtxs = ana.nVtxs,
nLeafs = ana.nLeafs,
depthMax = ana.depthMax,
nDelTree = db.top.delTree.len,
elaCollect = getTime() - start)
if (stats.count mod allStatsFrequency) == 0:
let
start = getTime()
(count, vtxs, leafs, depth, elapsed) = stats.strStats
debugLog("Sub-tree analysis stats", count, vtxs, leafs, depth, elapsed)
stats.sElapsed += getTime() - start
ok()
proc delStoTreeImpl*(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
accPath: Hash256;
): Result[void,AristoError] =
## Collect vertex/vid and cache entry.
discard db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
when extraDebuggingMessages:
let
ana = db.analyseSubTree(rvid, VidCollect.capa+1, stats)
start = getTime()
var dispose: VidCollect # Accumulating vertices for deletion
? db.collectStoTreeLazily(rvid, accPath, NibblesBuf(), dispose)
db.disposeOfSubTree(rvid, dispose)
when extraDebuggingMessages:
if typeof(dispose).capa < dispose.top:
if minVtxsForLogging < ana.nVtxs or db.stoLeaves.len < ana.nStoCache:
debugLog("Storage sub-tree analysis",
nVtxs = ana.nVtxs,
nLeafs = ana.nLeafs,
depthMax = ana.depthMax,
nStoCache = ana.nStoCache,
nStoCacheDelta = ana.nStoCache - db.stoLeaves.len,
nDelTree = db.top.delTree.len,
elaCollect = getTime() - start)
if (stats.count mod allStatsFrequency) == 0:
let
start = getTime()
(count, vtxs, leafs, depth, elapsed) = stats.strStats
debugLog("Sub-tree analysis stats", count, vtxs, leafs, depth, elapsed)
stats.sElapsed += getTime() - start
ok()
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------

View File

@ -1,101 +0,0 @@
# nimbus-eth1
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.
{.push raises: [].}
import
eth/common,
".."/[aristo_desc, aristo_get, aristo_layers],
./delete_helpers
# ------------------------------------------------------------------------------
# Private heplers
# ------------------------------------------------------------------------------
proc delSubTreeNow(
db: AristoDbRef;
rvid: RootedVertexID;
): Result[void,AristoError] =
## Delete sub-tree now
let (vtx, _) = db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
if vtx.vType == Branch:
for n in 0..15:
if vtx.bVid[n].isValid:
? db.delSubTreeNow((rvid.root,vtx.bVid[n]))
db.disposeOfVtx(rvid)
ok()
proc delStoTreeNow(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
accPath: Hash256; # Accounts cache designator
stoPath: NibblesBuf; # Current storage path
): Result[void,AristoError] =
## Implementation of *delete* sub-trie.
let (vtx, _) = db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
case vtx.vType
of Branch:
for i in 0..15:
if vtx.bVid[i].isValid:
? db.delStoTreeNow(
(rvid.root, vtx.bVid[i]), accPath,
stoPath & vtx.ePfx & NibblesBuf.nibble(byte i))
of Leaf:
let stoPath = Hash256(data: (stoPath & vtx.lPfx).getBytes())
db.layersPutStoLeaf(AccountKey.mixUp(accPath, stoPath), nil)
db.disposeOfVtx(rvid)
ok()
# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------
proc delSubTreeImpl*(
db: AristoDbRef;
root: VertexID;
): Result[void,AristoError] =
discard db.getVtxRc((root, root)).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
db.delSubTreeNow (root,root)
proc delStoTreeImpl*(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Root vertex
accPath: Hash256;
): Result[void,AristoError] =
## Implementation of *delete* sub-trie.
discard db.getVtxRc(rvid).valueOr:
if error == GetVtxNotFound:
return ok()
return err(error)
db.delStoTreeNow(rvid, accPath, NibblesBuf())
# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------

View File

@ -13,7 +13,7 @@
##
import
std/tables,
std/[strutils, tables],
chronicles,
eth/common,
results,
@ -28,16 +28,19 @@ logScope:
# Private functions
# ------------------------------------------------------------------------------
proc toStr(rvid: RootedVertexID): string =
"$" & rvid.root.uint64.toHex & ":" & rvid.vid.uint64.toHex
proc delSubTree(db: AristoDbRef; writer: PutHdlRef; rvid: RootedVertexID) =
## Collect subtrees marked for deletion
let (vtx,_) = db.getVtxRc(rvid).valueOr:
notice "Descending for deletion stopped", rvid, error
notice "Descending for deletion stopped", rvid=(rvid.toStr), error
return
for vid in vtx.subVids:
db.delSubTree(writer, (rvid.root, vid))
db.backend.putVtxFn(writer, rvid, VertexRef(nil))
db.backend.putKeyFn(writer, rvid, VOID_HASH_KEY)
# Make sure the `rvid` is not mentioned here, anymore for furter update.
# Make sure the `rvid` is not mentioned here, anymore for further update.
db.balancer.sTab.del rvid
db.balancer.kMap.del rvid