nimbus-eth1/nimbus/db/aristo/aristo_compute.nim

# nimbus-eth1
# Copyright (c) 2023-2024 Status Research & Development GmbH
# Licensed under either of
#  * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
#    http://www.apache.org/licenses/LICENSE-2.0)
#  * MIT license ([LICENSE-MIT](LICENSE-MIT) or
#    http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

{.push raises: [].}

import
  std/strformat,
  chronicles,
  eth/common,
  results,
  "."/[aristo_desc, aristo_get, aristo_serialise],
  ./aristo_desc/desc_backend

type WriteBatch = tuple[writer: PutHdlRef, count: int, depth: int, prefix: uint64]

# Keep write batch size _around_ 1mb, give or take some overhead - this is a
# tradeoff between efficiency and memory usage with diminishing returns the
# larger it is..
const batchSize = 1024 * 1024 div (sizeof(RootedVertexID) + sizeof(HashKey))

func progress(batch: WriteBatch): string =
  # Return an approximation on how much of the keyspace has been covered by
  # looking at the path prefix that we're currently processing
  &"{(float(batch.prefix) / float(uint64.high)) * 100:02.2f}%"

func enter(batch: var WriteBatch, nibble: int) =
  batch.depth += 1
  if batch.depth <= 16:
    batch.prefix += uint64(nibble) shl ((16 - batch.depth) * 4)

func leave(batch: var WriteBatch, nibble: int) =
  if batch.depth <= 16:
    batch.prefix -= uint64(nibble) shl ((16 - batch.depth) * 4)
  batch.depth -= 1

proc putKeyAtLevel(
    db: AristoDbRef,
    rvid: RootedVertexID,
    key: HashKey,
    level: int,
    batch: var WriteBatch,
): Result[void, AristoError] =
  ## Store a hash key in the given layer or directly to the underlying database
  ## which helps ensure that memory usage is proportional to the pending change
  ## set (vertex data may have been committed to disk without computing the
  ## corresponding hash!)

  # Only put computed keys in the database which keeps churn down by focusing on
  # the ones that do not change - the ones that don't require hashing might as
  # well be loaded from the vertex!
  if level == -2:
    if key.len == 32:
      let be = db.backend
      if batch.writer == nil:
        doAssert be != nil, "source data is from the backend"
        # TODO long-running batch here?
        batch.writer = ?be.putBegFn()

      be.putKeyFn(batch.writer, rvid, key)
      batch.count += 1

      if batch.count mod batchSize == 0:
        if batch.count mod (batchSize * 100) == 0:
          info "Writing computeKey cache",
            count = batch.count, accounts = batch.progress
        else:
          debug "Writing computeKey cache",
            count = batch.count, accounts = batch.progress
        ?be.putEndFn batch.writer
        batch.writer = nil

    ok()
  else:
    db.deltaAtLevel(level).kMap[rvid] = key
    ok()

func maxLevel(cur, other: int): int =
  # Compare two levels and return the topmost in the stack, taking into account
  # the odd reversal of order around the zero point
  if cur < 0:
    max(cur, other) # >= 0 is always more topmost than <0
  elif other < 0:
    cur
  else:
    min(cur, other) # Here the order is reversed and 0 is the top layer

proc computeKeyImpl(
    db: AristoDbRef, # Database, top layer
    rvid: RootedVertexID, # Vertex to convert
    batch: var WriteBatch,
): Result[(HashKey, int), AristoError] =
  ## Compute the key for an arbitrary vertex ID. If successful, the length of
  ## the resulting key might be smaller than 32. If it is used as a root vertex
  ## state/hash, it must be converted to a `Hash32` (using (`.to(Hash32)`) as
  ## in `db.computeKey(rvid).value.to(Hash32)` which always results in a
  ## 32 byte value.

  db.getKeyRc(rvid).isErrOr:
    # Value cached either in layers or database
    return ok value

  let (vtx, vl) = ?db.getVtxRc(rvid, {GetVtxFlag.PeekCache})

  # Top-most level of all the verticies this hash compution depends on
  var level = vl

  # TODO this is the same code as when serializing NodeRef, without the NodeRef
  var writer = initRlpWriter()

  case vtx.vType
  of Leaf:
    writer.startList(2)
    writer.append(vtx.pfx.toHexPrefix(isLeaf = true).data())

    case vtx.lData.pType
    of AccountData:
      let
        stoID = vtx.lData.stoID
        skey =
          if stoID.isValid:
            let (skey, sl) = ?db.computeKeyImpl((stoID.vid, stoID.vid), batch)
            level = maxLevel(level, sl)
            skey
          else:
            VOID_HASH_KEY

      writer.append(
        encode Account(
          nonce: vtx.lData.account.nonce,
          balance: vtx.lData.account.balance,
          storageRoot: skey.to(Hash32),
          codeHash: vtx.lData.account.codeHash,
        )
      )
    of RawData:
      writer.append(vtx.lData.rawBlob)
    of StoData:
      # TODO avoid memory allocation when encoding storage data
      writer.append(rlp.encode(vtx.lData.stoData))
  of Branch:
    template writeBranch(w: var RlpWriter) =
      w.startList(17)
      for n in 0 .. 15:
        let vid = vtx.bVid[n]
        if vid.isValid:
          batch.enter(n)
          let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid), batch)
          batch.leave(n)

          level = maxLevel(level, bl)
          w.append(bkey)
        else:
          w.append(VOID_HASH_KEY)
      w.append EmptyBlob

    if vtx.pfx.len > 0: # Extension node
      var bwriter = initRlpWriter()
      writeBranch(bwriter)

      writer.startList(2)
      writer.append(vtx.pfx.toHexPrefix(isleaf = false).data())
      writer.append(bwriter.finish().digestTo(HashKey))
    else:
      writeBranch(writer)

  let h = writer.finish().digestTo(HashKey)

  # Cache the hash int the same storage layer as the the top-most value that it
  # depends on (recursively) - this could be an ephemeral in-memory layer or the
  # underlying database backend - typically, values closer to the root are more
  # likely to live in an in-memory layer since any leaf change will lead to the
  # root key also changing while leaves that have never been hashed will see
  # their hash being saved directly to the backend.
  ?db.putKeyAtLevel(rvid, h, level, batch)

  ok (h, level)

proc computeKey*(
    db: AristoDbRef, # Database, top layer
    rvid: RootedVertexID, # Vertex to convert
): Result[HashKey, AristoError] =
  var batch: WriteBatch
  let res = computeKeyImpl(db, rvid, batch)
  if res.isOk:
    if batch.writer != nil:
      if batch.count >= batchSize * 100:
        info "Writing computeKey cache", count = batch.count, progress = "100.00%"
      else:
        debug "Writing computeKey cache", count = batch.count, progress = "100.00%"
      ?db.backend.putEndFn batch.writer
      batch.writer = nil
  ok (?res)[0]

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------