Add branch cache (#2923)

Now that branches are small, we can add a branch cache that fits more verticies in memory by only storing the branch portion (16 bytes) of the VertexRef (136 bytes). Where the original vertex cache hovers around a hit rate of ~60:ish, this branch cache reaches >90% hit rate instead around block 20M which gives a nice boost to processing. A downside of this approach is that a new VertexRef must be allocated for every cache hit instead of reusing an existing instance - this causes some GC overhead that needs to be addressed. Nice 15% improvement nonetheless, can't complain! ``` blocks: 19630784, baseline: 161h18m38s, contender: 136h23m23s Time (total): -24h55m14s, -15.45% ```
2024-12-11 11:53:26 +01:00 · 2024-12-11 11:53:26 +01:00 · 7b88bb3b30
parent 29decdf265
commit 7b88bb3b30
6 changed files with 140 additions and 49 deletions
--- a/nimbus/config.nim
+++ b/nimbus/config.nim
@ -386,17 +386,23 @@ type
      defaultValueDesc: $defaultBlockCacheSize
      name: "debug-rocksdb-block-cache-size".}: int

+    rdbVtxCacheSize {.
+      hidden
+      defaultValue: defaultRdbVtxCacheSize
+      defaultValueDesc: $defaultRdbVtxCacheSize
+      name: "debug-rdb-vtx-cache-size".}: int
+
    rdbKeyCacheSize {.
      hidden
      defaultValue: defaultRdbKeyCacheSize
      defaultValueDesc: $defaultRdbKeyCacheSize
      name: "debug-rdb-key-cache-size".}: int

-    rdbVtxCacheSize {.
+    rdbBranchCacheSize {.
      hidden
-      defaultValue: defaultRdbVtxCacheSize
-      defaultValueDesc: $defaultRdbVtxCacheSize
-      name: "debug-rdb-vtx-cache-size".}: int
+      defaultValue: defaultRdbBranchCacheSize
+      defaultValueDesc: $defaultRdbBranchCacheSize
+      name: "debug-rdb-branch-cache-size".}: int

    rdbPrintStats {.
      hidden
@ -773,11 +779,13 @@ func dbOptions*(conf: NimbusConf, noKeyCache = false): DbOptions =
    rowCacheSize = conf.rocksdbRowCacheSize,
    blockCacheSize = conf.rocksdbBlockCacheSize,
    rdbKeyCacheSize =
-      if noKeyCache: 0 else: conf.rdbKeyCacheSize ,
-    rdbVtxCacheSize =
-      # The import command does not use the key cache - better give it to vtx
-      if noKeyCache: conf.rdbKeyCacheSize + conf.rdbVtxCacheSize
-      else: conf.rdbVtxCacheSize,
+      if noKeyCache: 0 else: conf.rdbKeyCacheSize,
+    rdbVtxCacheSize = conf.rdbVtxCacheSize,
+    rdbBranchCacheSize =
+      # The import command does not use the key cache - better give it to branch
+      if noKeyCache: conf.rdbKeyCacheSize + conf.rdbBranchCacheSize
+      else: conf.rdbBranchCacheSize,
+
    rdbPrintStats = conf.rdbPrintStats,
  )

--- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_desc.nim
+++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_desc.nim
@ -60,6 +60,9 @@ type
    rdVtxLru*: LruCache[VertexID,VertexRef] ## Read cache
    rdVtxSize*: int

+    rdBranchLru*: LruCache[VertexID, (VertexID, uint16)]
+    rdBranchSize*: int
+
    basePath*: string                  ## Database directory
    trgWriteEvent*: RdbWriteEventCb    ## Database piggiback call back handler

@ -84,6 +87,7 @@ var
  # happens from a separate thread.
  # TODO maybe turn this into more general framework for LRU reporting since
  #      we have lots of caches of this sort
+  rdbBranchLruStats*: array[RdbStateType, RdbLruCounter]
  rdbVtxLruStats*: array[RdbStateType, array[VertexType, RdbLruCounter]]
  rdbKeyLruStats*: array[RdbStateType, RdbLruCounter]

--- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim
+++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim
@ -36,6 +36,7 @@ when defined(metrics):
  type
    RdbVtxLruCounter = ref object of Counter
    RdbKeyLruCounter = ref object of Counter
+    RdbBranchLruCounter = ref object of Counter

  var
    rdbVtxLruStatsMetric {.used.} = RdbVtxLruCounter.newCollector(
@ -46,6 +47,9 @@ when defined(metrics):
    rdbKeyLruStatsMetric {.used.} = RdbKeyLruCounter.newCollector(
      "aristo_rdb_key_lru_total", "HashKey LRU lookup", labels = ["state", "hit"]
    )
+    rdbBranchLruStatsMetric {.used.} = RdbBranchLruCounter.newCollector(
+      "aristo_rdb_branch_lru_total", "Branch LRU lookup", labels = ["state", "hit"]
+    )

  method collect*(collector: RdbVtxLruCounter, output: MetricHandler) =
    let timestamp = collector.now()
@ -76,6 +80,19 @@ when defined(metrics):
          timestamp = timestamp,
        )

+  method collect*(collector: RdbBranchLruCounter, output: MetricHandler) =
+    let timestamp = collector.now()
+
+    for state in RdbStateType:
+      for hit in [false, true]:
+        output(
+          name = "aristo_rdb_branch_lru_total",
+          value = float64(rdbBranchLruStats[state].get(hit)),
+          labels = ["state", "hit"],
+          labelValues = [$state, $ord(hit)],
+          timestamp = timestamp,
+        )
+
 # ------------------------------------------------------------------------------
 # Public functions
 # ------------------------------------------------------------------------------
@ -156,15 +173,26 @@ proc getVtx*(
    rdb: var RdbInst, rvid: RootedVertexID, flags: set[GetVtxFlag]
 ): Result[VertexRef, (AristoError, string)] =
  # Try LRU cache first
-  var rc =
-    if GetVtxFlag.PeekCache in flags:
-      rdb.rdVtxLru.peek(rvid.vid)
-    else:
-      rdb.rdVtxLru.get(rvid.vid)
+  block:
+    let rc =
+      if GetVtxFlag.PeekCache in flags:
+        rdb.rdBranchLru.peek(rvid.vid)
+      else:
+        rdb.rdBranchLru.get(rvid.vid)
+    if rc.isOk():
+      rdbBranchLruStats[rvid.to(RdbStateType)].inc(true)
+      return ok(VertexRef(vType: Branch, startVid: rc[][0], used: rc[][1]))

-  if rc.isOk:
-    rdbVtxLruStats[rvid.to(RdbStateType)][rc.value().vType].inc(true)
-    return ok(move(rc.value))
+  block:
+    var rc =
+      if GetVtxFlag.PeekCache in flags:
+        rdb.rdVtxLru.peek(rvid.vid)
+      else:
+        rdb.rdVtxLru.get(rvid.vid)
+
+    if rc.isOk:
+      rdbVtxLruStats[rvid.to(RdbStateType)][rc.value().vType].inc(true)
+      return ok(move(rc.value))

  # Otherwise fetch from backend database
  # A threadvar is used to avoid allocating an environment for onData
@ -186,11 +214,17 @@ proc getVtx*(
  if res.isErr():
    return err((res.error(), "Parsing failed")) # Parsing failed

-  rdbVtxLruStats[rvid.to(RdbStateType)][res.value().vType].inc(false)
+  if res.value.vType == Branch and res.value.pfx.len == 0:
+    rdbBranchLruStats[rvid.to(RdbStateType)].inc(false)
+  else:
+    rdbVtxLruStats[rvid.to(RdbStateType)][res.value().vType].inc(false)

  # Update cache and return - in peek mode, avoid evicting cache items
-  if GetVtxFlag.PeekCache notin flags or rdb.rdVtxLru.len < rdb.rdVtxLru.capacity:
-    rdb.rdVtxLru.put(rvid.vid, res.value())
+  if GetVtxFlag.PeekCache notin flags:
+    if res.value.vType == Branch and res.value.pfx.len == 0:
+      rdb.rdBranchLru.put(rvid.vid, (res.value().startVid, res.value.used))
+    else:
+      rdb.rdVtxLru.put(rvid.vid, res.value())

  ok res.value()

--- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_init.nim
+++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_init.nim
@ -25,11 +25,9 @@ import
 # Private constructor
 # ------------------------------------------------------------------------------

-const
-  lruOverhead = 20
-    # Approximate LRU cache overhead per entry based on minilru sizes
+const lruOverhead = 20 # Approximate LRU cache overhead per entry based on minilru sizes

-proc dumpCacheStats(keySize, vtxSize: int) =
+proc dumpCacheStats(keySize, vtxSize, branchSize: int) =
  block vtx:
    var misses, hits: uint64
    echo "vtxLru(", vtxSize, ")"
@ -67,14 +65,33 @@ proc dumpCacheStats(keySize, vtxSize: int) =
    let hitRate = float64(hits * 100) / (float64(hits + misses))
    echo &"     all {misses:>10} {hits:>10} {misses+hits:>10} {hitRate:>5.2f}%"

+  block key:
+    var misses, hits: uint64
+    echo "branchLru(", branchSize, ") "
+
+    echo "   state       miss        hit      total hitrate"
+
+    for state in RdbStateType:
+      let
+        (miss, hit) =
+          (rdbBranchLruStats[state].get(false), rdbBranchLruStats[state].get(true))
+        hitRate = float64(hit * 100) / (float64(hit + miss))
+      misses += miss
+      hits += hit
+
+      echo &"{state:>8} {miss:>10} {hit:>10} {miss+hit:>10} {hitRate:>5.2f}%"
+
+    let hitRate = float64(hits * 100) / (float64(hits + misses))
+    echo &"     all {misses:>10} {hits:>10} {misses+hits:>10} {hitRate:>5.2f}%"
+
 proc initImpl(
-    rdb: var RdbInst;
-    basePath: string;
-    opts: DbOptions;
+    rdb: var RdbInst,
+    basePath: string,
+    opts: DbOptions,
    dbOpts: DbOptionsRef,
-    cfOpts: ColFamilyOptionsRef;
-    guestCFs: openArray[ColFamilyDescriptor] = [];
-      ): Result[seq[ColFamilyReadWrite],(AristoError,string)] =
+    cfOpts: ColFamilyOptionsRef,
+    guestCFs: openArray[ColFamilyDescriptor] = [],
+): Result[seq[ColFamilyReadWrite], (AristoError, string)] =
  ## Database backend constructor
  const initFailed = "RocksDB/init() failed"

@ -84,23 +101,30 @@ proc initImpl(
  rdb.rdKeySize =
    opts.rdbKeyCacheSize div (sizeof(VertexID) + sizeof(HashKey) + lruOverhead)
  rdb.rdVtxSize =
-    opts.rdbVtxCacheSize div (sizeof(VertexID) + sizeof(default(VertexRef)[]) + lruOverhead)
+    opts.rdbVtxCacheSize div
+    (sizeof(VertexID) + sizeof(default(VertexRef)[]) + lruOverhead)
+
+  rdb.rdBranchSize =
+    opts.rdbBranchCacheSize div (sizeof(typeof(rdb.rdBranchLru).V) + lruOverhead)

  rdb.rdKeyLru = typeof(rdb.rdKeyLru).init(rdb.rdKeySize)
  rdb.rdVtxLru = typeof(rdb.rdVtxLru).init(rdb.rdVtxSize)
+  rdb.rdBranchLru = typeof(rdb.rdBranchLru).init(rdb.rdBranchSize)

  if opts.rdbPrintStats:
    let
      ks = rdb.rdKeySize
      vs = rdb.rdVtxSize
+      bs = rdb.rdBranchSize
    # TODO instead of dumping at exit, these stats could be logged or written
    #      to a file for better tracking over time - that said, this is mainly
    #      a debug utility at this point
-    addExitProc(proc() =
-      dumpCacheStats(ks, vs))
+    addExitProc(
+      proc() =
+        dumpCacheStats(ks, vs, bs)
+    )

-  let
-    dataDir = rdb.dataDir
+  let dataDir = rdb.dataDir
  try:
    dataDir.createDir
  except OSError, IOError:
@ -132,7 +156,7 @@ proc initImpl(
  let cfs = useCFs.toSeq.mapIt(it.initColFamilyDescriptor cfOpts) & guestCFq

  # Open database for the extended family :)
-  let baseDb = openRocksDb(dataDir, dbOpts, columnFamilies=cfs).valueOr:
+  let baseDb = openRocksDb(dataDir, dbOpts, columnFamilies = cfs).valueOr:
    raiseAssert initFailed & " cannot create base descriptor: " & error

  # Initialise column handlers (this stores implicitely `baseDb`)
@ -148,18 +172,17 @@ proc initImpl(
 # ------------------------------------------------------------------------------

 proc init*(
-    rdb: var RdbInst;
-    basePath: string;
-    opts: DbOptions;
-    dbOpts: DbOptionsRef;
-    cfOpts: ColFamilyOptionsRef;
-    guestCFs: openArray[ColFamilyDescriptor];
-      ): Result[seq[ColFamilyReadWrite],(AristoError,string)] =
+    rdb: var RdbInst,
+    basePath: string,
+    opts: DbOptions,
+    dbOpts: DbOptionsRef,
+    cfOpts: ColFamilyOptionsRef,
+    guestCFs: openArray[ColFamilyDescriptor],
+): Result[seq[ColFamilyReadWrite], (AristoError, string)] =
  ## Temporarily define a guest CF list here.
  rdb.initImpl(basePath, opts, dbOpts, cfOpts, guestCFs)

-
-proc destroy*(rdb: var RdbInst; eradicate: bool) =
+proc destroy*(rdb: var RdbInst, eradicate: bool) =
  ## Destructor
  rdb.baseDb.close()

@ -174,7 +197,6 @@ proc destroy*(rdb: var RdbInst; eradicate: bool) =
          if 0 < w.len and w[^1] != '~':
            break done
        rdb.baseDir.removeDir
-
    except CatchableError:
      discard

--- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_put.nim
+++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_put.nim
@ -51,6 +51,7 @@ proc rollback*(rdb: var RdbInst) =
  if not rdb.session.isClosed():
    rdb.rdKeyLru = typeof(rdb.rdKeyLru).init(rdb.rdKeySize)
    rdb.rdVtxLru = typeof(rdb.rdVtxLru).init(rdb.rdVtxSize)
+    rdb.rdBranchLru = typeof(rdb.rdBranchLru).init(rdb.rdBranchSize)
    rdb.disposeSession()

 proc commit*(rdb: var RdbInst): Result[void,(AristoError,string)] =
@ -100,9 +101,25 @@ proc putVtx*(
    # Update existing cached items but don't add new ones since doing so is
    # likely to evict more useful items (when putting many items, we might even
    # evict those that were just added)
-    discard rdb.rdVtxLru.update(rvid.vid, vtx)
+
+    if vtx.vType == Branch and vtx.pfx.len == 0:
+      rdb.rdVtxLru.del(rvid.vid)
+      if rdb.rdBranchLru.len < rdb.rdBranchLru.capacity:
+        rdb.rdBranchLru.put(rvid.vid, (vtx.startVid, vtx.used))
+      else:
+        discard rdb.rdBranchLru.update(rvid.vid, (vtx.startVid, vtx.used))
+    else:
+      rdb.rdBranchLru.del(rvid.vid)
+      if rdb.rdVtxLru.len < rdb.rdVtxLru.capacity:
+        rdb.rdVtxLru.put(rvid.vid, vtx)
+      else:
+        discard rdb.rdVtxLru.update(rvid.vid, vtx)
+
    if key.isValid:
-      discard rdb.rdKeyLru.update(rvid.vid, key)
+      if rdb.rdKeyLru.len < rdb.rdKeyLru.capacity:
+        rdb.rdKeyLru.put(rvid.vid, key)
+      else:
+        discard rdb.rdKeyLru.update(rvid.vid, key)
    else:
      rdb.rdKeyLru.del rvid.vid

@ -115,6 +132,7 @@ proc putVtx*(
      return err((rvid.vid,errSym,error))

    # Update cache, vertex will most probably never be visited anymore
+    rdb.rdBranchLru.del rvid.vid
    rdb.rdVtxLru.del rvid.vid
    rdb.rdKeyLru.del rvid.vid

--- a/nimbus/db/opts.nim
+++ b/nimbus/db/opts.nim
@ -33,10 +33,12 @@ const
    ## re-reads from file.
    ##
    ## A bit of space on top of the filter is left for data block caching
-  defaultRdbVtxCacheSize* = 768 * 1024 * 1024
+  defaultRdbVtxCacheSize* = 512 * 1024 * 1024
    ## Cache of branches and leaves in the state MPTs (world and account)
  defaultRdbKeyCacheSize* = 256 * 1024 * 1024
    ## Hashes of the above
+  defaultRdbBranchCacheSize* = 1024 * 1024 * 1024
+    ## Cache of branches and leaves in the state MPTs (world and account)


 type DbOptions* = object # Options that are transported to the database layer
@ -46,6 +48,7 @@ type DbOptions* = object # Options that are transported to the database layer
  blockCacheSize*: int
  rdbVtxCacheSize*: int
  rdbKeyCacheSize*: int
+  rdbBranchCacheSize*: int
  rdbPrintStats*: bool

 func init*(
@ -56,6 +59,7 @@ func init*(
    blockCacheSize = defaultBlockCacheSize,
    rdbVtxCacheSize = defaultRdbVtxCacheSize,
    rdbKeyCacheSize = defaultRdbKeyCacheSize,
+    rdbBranchCacheSize = defaultRdbBranchCacheSize,
    rdbPrintStats = false,
 ): T =
  T(
@ -65,5 +69,6 @@ func init*(
    blockCacheSize: blockCacheSize,
    rdbVtxCacheSize: rdbVtxCacheSize,
    rdbKeyCacheSize: rdbKeyCacheSize,
+    rdbBranchCacheSize: rdbBranchCacheSize,
    rdbPrintStats: rdbPrintStats,
  )