speed up key computation (#2642)

* batch database key writes during `computeKey` calls * log progress when there are many keys to update * avoid evicting the vertex cache when traversing the trie for key computation purposes * avoid storing trivial leaf hashes that directly can be loaded from the vertex
2024-09-20 07:43:53 +02:00 · 2024-09-20 07:43:53 +02:00 · b4b4d16729
parent c77c6239b0
commit b4b4d16729
10 changed files with 139 additions and 50 deletions
--- a/nimbus/db/aristo/aristo_api.nim
+++ b/nimbus/db/aristo/aristo_api.nim
@ -1036,9 +1036,9 @@ func init*(

  else:
    beDup.getVtxFn =
-      proc(a: RootedVertexID): auto =
+      proc(a: RootedVertexID, flags: set[GetVtxFlag]): auto =
        AristoApiProfBeGetVtxFn.profileRunner:
-          result = be.getVtxFn(a)
+          result = be.getVtxFn(a, flags)
    data.list[AristoApiProfBeGetVtxFn.ord].masked = true

    beDup.getKeyFn =
--- a/nimbus/db/aristo/aristo_compute.nim
+++ b/nimbus/db/aristo/aristo_compute.nim
@ -11,24 +11,71 @@
 {.push raises: [].}

 import
+  std/strformat,
+  chronicles,
  eth/common,
  results,
-  "."/[aristo_desc, aristo_get, aristo_serialise]
+  "."/[aristo_desc, aristo_get, aristo_serialise],
+  ./aristo_desc/desc_backend
+
+type WriteBatch = tuple[writer: PutHdlRef, count: int, depth: int, prefix: uint64]
+
+# Keep write batch size _around_ 1mb, give or take some overhead - this is a
+# tradeoff between efficiency and memory usage with diminishing returns the
+# larger it is..
+const batchSize = 1024 * 1024 div (sizeof(RootedVertexID) + sizeof(HashKey))
+
+func progress(batch: WriteBatch): string =
+  # Return an approximation on how much of the keyspace has been covered by
+  # looking at the path prefix that we're currently processing
+  &"{(float(batch.prefix) / float(uint64.high)) * 100:02.2f}%"
+
+func enter(batch: var WriteBatch, nibble: int) =
+  batch.depth += 1
+  if batch.depth <= 16:
+    batch.prefix += uint64(nibble) shl ((16 - batch.depth) * 4)
+
+func leave(batch: var WriteBatch, nibble: int) =
+  if batch.depth <= 16:
+    batch.prefix -= uint64(nibble) shl ((16 - batch.depth) * 4)
+  batch.depth -= 1

 proc putKeyAtLevel(
-    db: AristoDbRef, rvid: RootedVertexID, key: HashKey, level: int
+    db: AristoDbRef,
+    rvid: RootedVertexID,
+    key: HashKey,
+    level: int,
+    batch: var WriteBatch,
 ): Result[void, AristoError] =
  ## Store a hash key in the given layer or directly to the underlying database
  ## which helps ensure that memory usage is proportional to the pending change
  ## set (vertex data may have been committed to disk without computing the
  ## corresponding hash!)
+
+  # Only put computed keys in the database which keeps churn down by focusing on
+  # the ones that do not change - the ones that don't require hashing might as
+  # well be loaded from the vertex!
  if level == -2:
-    let be = db.backend
-    doAssert be != nil, "source data is from the backend"
-    # TODO long-running batch here?
-    let writeBatch = ?be.putBegFn()
-    be.putKeyFn(writeBatch, rvid, key)
-    ?be.putEndFn writeBatch
+    if key.len == 32:
+      let be = db.backend
+      if batch.writer == nil:
+        doAssert be != nil, "source data is from the backend"
+        # TODO long-running batch here?
+        batch.writer = ?be.putBegFn()
+
+      be.putKeyFn(batch.writer, rvid, key)
+      batch.count += 1
+
+      if batch.count mod batchSize == 0:
+        if batch.count mod (batchSize * 100) == 0:
+          info "Writing computeKey cache",
+            count = batch.count, accounts = batch.progress
+        else:
+          debug "Writing computeKey cache",
+            count = batch.count, accounts = batch.progress
+        ?be.putEndFn batch.writer
+        batch.writer = nil
+
    ok()
  else:
    db.deltaAtLevel(level).kMap[rvid] = key
@ -45,9 +92,10 @@ func maxLevel(cur, other: int): int =
    min(cur, other) # Here the order is reversed and 0 is the top layer

 proc computeKeyImpl(
-    db: AristoDbRef;                  # Database, top layer
-    rvid: RootedVertexID;             # Vertex to convert
-      ): Result[(HashKey, int), AristoError] =
+    db: AristoDbRef, # Database, top layer
+    rvid: RootedVertexID, # Vertex to convert
+    batch: var WriteBatch,
+): Result[(HashKey, int), AristoError] =
  ## Compute the key for an arbitrary vertex ID. If successful, the length of
  ## the resulting key might be smaller than 32. If it is used as a root vertex
  ## state/hash, it must be converted to a `Hash256` (using (`.to(Hash256)`) as
@ -57,7 +105,8 @@ proc computeKeyImpl(
  db.getKeyRc(rvid).isErrOr:
    # Value cached either in layers or database
    return ok value
-  let (vtx, vl) = ? db.getVtxRc rvid
+
+  let (vtx, vl) = ?db.getVtxRc(rvid, {GetVtxFlag.PeekCache})

  # Top-most level of all the verticies this hash compution depends on
  var level = vl
@ -65,7 +114,7 @@ proc computeKeyImpl(
  # TODO this is the same code as when serializing NodeRef, without the NodeRef
  var writer = initRlpWriter()

-  case vtx.vType:
+  case vtx.vType
  of Leaf:
    writer.startList(2)
    writer.append(vtx.pfx.toHexPrefix(isLeaf = true).data())
@ -76,36 +125,41 @@ proc computeKeyImpl(
        stoID = vtx.lData.stoID
        skey =
          if stoID.isValid:
-            let (skey, sl) = ?db.computeKeyImpl((stoID.vid, stoID.vid))
+            let (skey, sl) = ?db.computeKeyImpl((stoID.vid, stoID.vid), batch)
            level = maxLevel(level, sl)
            skey
          else:
            VOID_HASH_KEY

-      writer.append(encode Account(
-        nonce:       vtx.lData.account.nonce,
-        balance:     vtx.lData.account.balance,
-        storageRoot: skey.to(Hash256),
-        codeHash:    vtx.lData.account.codeHash)
+      writer.append(
+        encode Account(
+          nonce: vtx.lData.account.nonce,
+          balance: vtx.lData.account.balance,
+          storageRoot: skey.to(Hash256),
+          codeHash: vtx.lData.account.codeHash,
+        )
      )
    of RawData:
      writer.append(vtx.lData.rawBlob)
    of StoData:
      # TODO avoid memory allocation when encoding storage data
      writer.append(rlp.encode(vtx.lData.stoData))
-
  of Branch:
    template writeBranch(w: var RlpWriter) =
      w.startList(17)
-      for n in 0..15:
+      for n in 0 .. 15:
        let vid = vtx.bVid[n]
        if vid.isValid:
-          let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid))
+          batch.enter(n)
+          let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid), batch)
+          batch.leave(n)
+
          level = maxLevel(level, bl)
          w.append(bkey)
        else:
          w.append(VOID_HASH_KEY)
      w.append EmptyBlob
+
    if vtx.pfx.len > 0: # Extension node
      var bwriter = initRlpWriter()
      writeBranch(bwriter)
@ -124,15 +178,25 @@ proc computeKeyImpl(
  # likely to live in an in-memory layer since any leaf change will lead to the
  # root key also changing while leaves that have never been hashed will see
  # their hash being saved directly to the backend.
-  ? db.putKeyAtLevel(rvid, h, level)
+  ?db.putKeyAtLevel(rvid, h, level, batch)

  ok (h, level)

 proc computeKey*(
-    db: AristoDbRef;                  # Database, top layer
-    rvid: RootedVertexID;             # Vertex to convert
-      ): Result[HashKey, AristoError] =
-  ok (?computeKeyImpl(db, rvid))[0]
+    db: AristoDbRef, # Database, top layer
+    rvid: RootedVertexID, # Vertex to convert
+): Result[HashKey, AristoError] =
+  var batch: WriteBatch
+  let res = computeKeyImpl(db, rvid, batch)
+  if res.isOk:
+    if batch.writer != nil:
+      if batch.count >= batchSize * 100:
+        info "Writing computeKey cache", count = batch.count, progress = "100.00%"
+      else:
+        debug "Writing computeKey cache", count = batch.count, progress = "100.00%"
+      ?db.backend.putEndFn batch.writer
+      batch.writer = nil
+  ok (?res)[0]

 # ------------------------------------------------------------------------------
 # End
--- a/nimbus/db/aristo/aristo_desc/desc_backend.nim
+++ b/nimbus/db/aristo/aristo_desc/desc_backend.nim
@ -20,7 +20,7 @@ import

 type
  GetVtxFn* =
-    proc(rvid: RootedVertexID): Result[VertexRef,AristoError] {.gcsafe, raises: [].}
+    proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[VertexRef,AristoError] {.gcsafe, raises: [].}
      ## Generic backend database retrieval function for a single structural
      ## `Aristo DB` data record.

--- a/nimbus/db/aristo/aristo_desc/desc_structural.nim
+++ b/nimbus/db/aristo/aristo_desc/desc_structural.nim
@ -130,6 +130,11 @@ type

    txUid*: uint                           ## Transaction identifier if positive

+  GetVtxFlag* = enum
+    PeekCache
+      ## Peek into, but don't update cache - useful on work loads that are
+      ## unfriendly to caches
+
 # ------------------------------------------------------------------------------
 # Public helpers (misc)
 # ------------------------------------------------------------------------------
--- a/nimbus/db/aristo/aristo_get.nim
+++ b/nimbus/db/aristo/aristo_get.nim
@ -43,11 +43,12 @@ proc getLstUbe*(
 proc getVtxUbe*(
    db: AristoDbRef;
    rvid: RootedVertexID;
+    flags: set[GetVtxFlag] = {};
      ): Result[VertexRef,AristoError] =
  ## Get the vertex from the unfiltered backened if available.
  let be = db.backend
  if not be.isNil:
-    return be.getVtxFn rvid
+    return be.getVtxFn(rvid, flags)
  err GetVtxNotFound

 proc getKeyUbe*(
@ -73,6 +74,7 @@ proc getTuvBE*(
 proc getVtxBE*(
    db: AristoDbRef;
    rvid: RootedVertexID;
+    flags: set[GetVtxFlag] = {};
      ): Result[(VertexRef, int),AristoError] =
  ## Get the vertex from the (filtered) backened if available.
  if not db.balancer.isNil:
@ -80,7 +82,7 @@ proc getVtxBE*(
      if w[].isValid:
        return ok (w[], -1)
      return err(GetVtxNotFound)
-  ok (? db.getVtxUbe rvid, -2)
+  ok (? db.getVtxUbe(rvid, flags), -2)

 proc getKeyBE*(
    db: AristoDbRef;
@ -98,7 +100,8 @@ proc getKeyBE*(

 proc getVtxRc*(
    db: AristoDbRef;
-    rvid: RootedVertexID
+    rvid: RootedVertexID;
+    flags: set[GetVtxFlag] = {};
      ): Result[(VertexRef, int),AristoError] =
  ## Cascaded attempt to fetch a vertex from the cache layers or the backend.
  ##
@ -113,15 +116,14 @@ proc getVtxRc*(
    else:
      return err(GetVtxNotFound)

-  db.getVtxBE rvid
+  db.getVtxBE(rvid, flags)

-proc getVtx*(db: AristoDbRef; rvid: RootedVertexID): VertexRef =
+proc getVtx*(db: AristoDbRef; rvid: RootedVertexID, flags: set[GetVtxFlag] = {}): VertexRef =
  ## Cascaded attempt to fetch a vertex from the cache layers or the backend.
  ## The function returns `nil` on error or failure.
  ##
  db.getVtxRc(rvid).valueOr((VertexRef(nil), 0))[0]

-
 proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[(HashKey, int),AristoError] =
  ## Cascaded attempt to fetch a Merkle hash from the cache layers or the
  ## backend. This function will never return a `VOID_HASH_KEY` but rather
--- a/nimbus/db/aristo/aristo_init/memory_db.nim
+++ b/nimbus/db/aristo/aristo_init/memory_db.nim
@ -86,7 +86,7 @@ proc endSession(hdl: PutHdlRef; db: MemBackendRef): MemPutHdlRef =

 proc getVtxFn(db: MemBackendRef): GetVtxFn =
  result =
-    proc(rvid: RootedVertexID): Result[VertexRef,AristoError] =
+    proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[VertexRef,AristoError] =
      # Fetch serialised data record
      let data = db.mdb.sTab.getOrDefault(rvid, EmptyBlob)
      if 0 < data.len:
--- a/nimbus/db/aristo/aristo_init/rocks_db.nim
+++ b/nimbus/db/aristo/aristo_init/rocks_db.nim
@ -75,10 +75,10 @@ proc endSession(hdl: PutHdlRef; db: RdbBackendRef): RdbPutHdlRef =

 proc getVtxFn(db: RdbBackendRef): GetVtxFn =
  result =
-    proc(rvid: RootedVertexID): Result[VertexRef,AristoError] =
+    proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[VertexRef,AristoError] =

      # Fetch serialised data record
-      let vtx = db.rdb.getVtx(rvid).valueOr:
+      let vtx = db.rdb.getVtx(rvid, flags).valueOr:
        when extraTraceMessages:
          trace logTxt "getVtxFn() failed", rvid, error=error[0], info=error[1]
        return err(error[0])
--- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim
+++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim
@ -135,9 +135,15 @@ proc getKey*(
 proc getVtx*(
    rdb: var RdbInst;
    rvid: RootedVertexID;
+    flags: set[GetVtxFlag];
      ): Result[VertexRef,(AristoError,string)] =
  # Try LRU cache first
-  var rc = rdb.rdVtxLru.get(rvid.vid)
+  var rc =
+    if GetVtxFlag.PeekCache in flags:
+      rdb.rdVtxLru.peek(rvid.vid)
+    else:
+      rdb.rdVtxLru.get(rvid.vid)
+
  if rc.isOK:
    rdbVtxLruStats[rvid.to(RdbStateType)][rc.value().vType].inc(true)
    return ok(move(rc.value))
@ -164,8 +170,9 @@ proc getVtx*(

  rdbVtxLruStats[rvid.to(RdbStateType)][res.value().vType].inc(false)

-  # Update cache and return
-  rdb.rdVtxLru.put(rvid.vid, res.value())
+  # Update cache and return - in peek mode, avoid evicting cache items
+  if GetVtxFlag.PeekCache notin flags or rdb.rdVtxLru.len < rdb.rdVtxLru.capacity:
+    rdb.rdVtxLru.put(rvid.vid, res.value())

  ok res.value()

--- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_put.nim
+++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_put.nim
@ -89,7 +89,13 @@ proc putKey*(
    rvid: RootedVertexID, key: HashKey;
      ): Result[void,(VertexID,AristoError,string)] =
  let dsc = rdb.session
-  if key.isValid:
+  # We only write keys whose value has to be hashed - the trivial ones can be
+  # loaded from the corresponding vertex directly!
+  # TODO move this logic to a higher layer
+  # TODO skip the delete for trivial keys - it's here to support databases that
+  #      were written at a time when trivial keys were also cached - it should
+  #      be cleaned up when revising the key cache in general.
+  if key.isValid and key.len == 32:
    dsc.put(rvid.blobify().data(), key.data, rdb.keyCol.handle()).isOkOr:
      # Caller must `rollback()` which will flush the `rdKeyLru` cache
      const errSym = RdbBeDriverPutKeyError
--- a/scripts/make_states.sh
+++ b/scripts/make_states.sh
@ -1,6 +1,6 @@
 #!/bin/bash

-# Create a set of states, each advanced by 100k blocks
+# Create a set of states, each advanced by 1M blocks

 set -e

@ -18,18 +18,23 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 DATE="$(date -u +%Y%m%d_%H%M)"
 REV=$(git rev-parse --short=8 HEAD)
 DATA_DIR="$1/${DATE}-${REV}"
+ERA_DIR="$2"
+ERA1_DIR="$3"
+STATS_DIR="$4"
+
+shift 4

 mkdir -p "$DATA_DIR"
-[ "$5" ] && cp -ar "$5"/* "$DATA_DIR"
+[ -d "$1" ] && { cp -ar "$1"/* "$DATA_DIR" ; shift ; }

 while true;
 do
  "$SCRIPT_DIR/../build/nimbus" import \
    --data-dir:"${DATA_DIR}" \
-    --era1-dir:"$2" \
-    --era-dir:"$3" \
-    --debug-csv-stats:"$4/stats-${DATE}-${REV}.csv" \
-    --max-blocks:1000000
-  cp -ar "$1/${DATE}-${REV}" "$1/${DATE}-${REV}"-$(printf "%04d" $counter)
+    --era1-dir:"${ERA_DIR}" \
+    --era-dir:"${ERA1_DIR}" \
+    --debug-csv-stats:"${STATS_DIR}/stats-${DATE}-${REV}.csv" \
+    --max-blocks:1000000 "$@"
+  cp -ar "${DATA_DIR}" "${DATA_DIR}-$(printf "%04d" $counter)"
  counter=$((counter+1))
 done