avoid allocations for slot storage (#2455)

Introduce a new `StoData` payload type similar to `AccountData` * slightly more efficient storage format * typed api * fewer seqs * fix encoding docs - it wasn't rlp after all :)
2025-02-12 20:16:27 +00:00 · 2024-07-05 01:48:45 +02:00 · 2024-07-05 01:48:45 +02:00 · 7d78fd97d5
commit 7d78fd97d5
parent 1f40b710ee
16 changed files with 89 additions and 111 deletions
--- a/nimbus/db/aristo/aristo_api.nim
+++ b/nimbus/db/aristo/aristo_api.nim
@ -81,7 +81,7 @@ type
  AristoApiDeleteStorageDataFn* =
    proc(db: AristoDbRef;
         accPath: Hash256;
-         stoPath: openArray[byte];
+         stoPath: Hash256;
        ): Result[bool,AristoError]
        {.noRaise.}
      ## For a given account argument `accPath`, this function deletes the
@ -143,8 +143,8 @@ type
  AristoApiFetchStorageDataFn* =
    proc(db: AristoDbRef;
         accPath: Hash256;
-         stoPath: openArray[byte];
-        ): Result[Blob,AristoError]
+         stoPath: Hash256;
+        ): Result[Uint256,AristoError]
        {.noRaise.}
      ## For a storage tree related to account `accPath`, fetch the data
      ## record from the database indexed by `stoPath`.
@ -257,7 +257,7 @@ type
  AristoApiHasPathStorageFn* =
    proc(db: AristoDbRef;
         accPath: Hash256;
-         stoPath: openArray[byte];
+         stoPath: Hash256;
        ): Result[bool,AristoError]
        {.noRaise.}
      ## For a storage tree related to account `accPath`, query whether the
@ -315,8 +315,8 @@ type
  AristoApiMergeStorageDataFn* =
    proc(db: AristoDbRef;
         accPath: Hash256;
-         stoPath: openArray[byte];
-         stoData: openArray[byte];
+         stoPath: Hash256;
+         stoData: UInt256;
        ): Result[void,AristoError]
        {.noRaise.}
      ## Store the `stoData` data argument on the storage area addressed by
@ -768,9 +768,9 @@ func init*(
        result = api.fetchGenericState(a, b, c)

  profApi.fetchStorageData =
-    proc(a: AristoDbRef; accPath: Hash256, c: openArray[byte]): auto =
+    proc(a: AristoDbRef; accPath, stoPath: Hash256): auto =
      AristoApiProfFetchStorageDataFn.profileRunner:
-        result = api.fetchStorageData(a, accPath, c)
+        result = api.fetchStorageData(a, accPath, stoPath)

  profApi.fetchStorageState =
    proc(a: AristoDbRef; accPath: Hash256; c: bool): auto =
--- a/nimbus/db/aristo/aristo_blobify.nim
+++ b/nimbus/db/aristo/aristo_blobify.nim
@ -16,16 +16,15 @@ import
  stew/[arrayops, endians2],
  ./aristo_desc

-# Allocation-free version of the RLP integer encoding, returning the shortest
-# big-endian representation - to decode, the length must be known / stored
-# elsewhere
+# Allocation-free version short big-endian encoding that skips the leading
+# zeroes
 type
-  RlpBuf*[I] = object
+  SbeBuf*[I] = object
    buf*: array[sizeof(I), byte]
    len*: byte

  RVidBuf* = object
-    buf*: array[sizeof(RlpBuf[VertexID]) * 2, byte]
+    buf*: array[sizeof(SbeBuf[VertexID]) * 2, byte]
    len*: byte

 func significantBytesBE(val: openArray[byte]): byte =
@ -34,15 +33,15 @@ func significantBytesBE(val: openArray[byte]): byte =
      return byte(val.len - i)
  return 1

-func blobify*(v: VertexID|uint64): RlpBuf[typeof(v)] =
+func blobify*(v: VertexID|uint64): SbeBuf[typeof(v)] =
  let b = v.uint64.toBytesBE()
-  RlpBuf[typeof(v)](buf: b, len: significantBytesBE(b))
+  SbeBuf[typeof(v)](buf: b, len: significantBytesBE(b))

-func blobify*(v: StUint): RlpBuf[typeof(v)] =
+func blobify*(v: StUint): SbeBuf[typeof(v)] =
  let b = v.toBytesBE()
-  RlpBuf[typeof(v)](buf: b, len: significantBytesBE(b))
+  SbeBuf[typeof(v)](buf: b, len: significantBytesBE(b))

-template data*(v: RlpBuf): openArray[byte] =
+template data*(v: SbeBuf): openArray[byte] =
  let vv = v
  vv.buf.toOpenArray(vv.buf.len - int(vv.len), vv.buf.high)

@ -159,6 +158,9 @@ proc blobifyTo*(pyl: PayloadRef, data: var Blob) =

    data &= lens.toBytesBE()
    data &= [mask]
+  of StoData:
+    data &= pyl.stoData.blobify().data
+    data &= [0x20.byte]

 proc blobifyTo*(vtx: VertexRef; data: var Blob): Result[void,AristoError] =
  ## This function serialises the vertex argument to a database record.
@ -255,6 +257,11 @@ proc deblobify(
  if (mask and 0x10) > 0: # unstructured payload
    return ok PayloadRef(pType: RawData, rawBlob: data[0 .. ^2])

+  if (mask and 0x20) > 0: # Slot storage data
+    return ok PayloadRef(
+      pType: StoData,
+      stoData: ?deblobify(data.toOpenArray(0, data.len - 2), UInt256))
+
  var
    pAcc = PayloadRef(pType: AccountData)
    start = 0
--- a/nimbus/db/aristo/aristo_compute.nim
+++ b/nimbus/db/aristo/aristo_compute.nim
@ -15,7 +15,6 @@ import
  results,
  "."/[aristo_desc, aristo_get, aristo_layers, aristo_serialise]

-
 proc computeKey*(
    db: AristoDbRef;                  # Database, top layer
    rvid: RootedVertexID;             # Vertex to convert
@ -44,12 +43,12 @@ proc computeKey*(
  let vtx = ? db.getVtxRc rvid

  # TODO this is the same code as when serializing NodeRef, without the NodeRef
-  var rlp = initRlpWriter()
+  var writer = initRlpWriter()

  case vtx.vType:
  of Leaf:
-    rlp.startList(2)
-    rlp.append(vtx.lPfx.toHexPrefix(isLeaf = true))
+    writer.startList(2)
+    writer.append(vtx.lPfx.toHexPrefix(isLeaf = true))
    # Need to resolve storage root for account leaf
    case vtx.lData.pType
    of AccountData:
@ -60,31 +59,34 @@ proc computeKey*(
        else:
          VOID_HASH_KEY

-      rlp.append(encode Account(
+      writer.append(encode Account(
        nonce:       vtx.lData.account.nonce,
        balance:     vtx.lData.account.balance,
        storageRoot: key.to(Hash256),
        codeHash:    vtx.lData.account.codeHash)
      )
    of RawData:
-      rlp.append(vtx.lData.rawBlob)
+      writer.append(vtx.lData.rawBlob)
+    of StoData:
+      # TODO avoid memory allocation when encoding storage data
+      writer.append(rlp.encode(vtx.lData.stoData))

  of Branch:
-    rlp.startList(17)
+    writer.startList(17)
    for n in 0..15:
      let vid = vtx.bVid[n]
      if vid.isValid:
-        rlp.append(?db.computeKey((rvid.root, vid)))
+        writer.append(?db.computeKey((rvid.root, vid)))
      else:
-        rlp.append(VOID_HASH_KEY)
-    rlp.append EmptyBlob
+        writer.append(VOID_HASH_KEY)
+    writer.append EmptyBlob

  of Extension:
-    rlp.startList(2)
-    rlp.append(vtx.ePfx.toHexPrefix(isleaf = false))
-    rlp.append(?db.computeKey((rvid.root, vtx.eVid)))
+    writer.startList(2)
+    writer.append(vtx.ePfx.toHexPrefix(isleaf = false))
+    writer.append(?db.computeKey((rvid.root, vtx.eVid)))

-  let h = rlp.finish().digestTo(HashKey)
+  let h = writer.finish().digestTo(HashKey)
  # TODO This shouldn't necessarily go into the database if we're just computing
  #      a key ephemerally - it should however be cached for some tiem since
  #      deep hash computations are expensive
--- a/nimbus/db/aristo/aristo_debug.nim
+++ b/nimbus/db/aristo/aristo_debug.nim
@ -193,6 +193,8 @@ proc ppPayload(p: PayloadRef, db: AristoDbRef): string =
      result &= ($p.account.balance).stripZeros(toExp=true) & ","
      result &= p.stoID.ppVid & ","
      result &= p.account.codeHash.ppCodeHash & ")"
+    of StoData:
+      result = $p.stoData

 proc ppVtx(nd: VertexRef, db: AristoDbRef, rvid: RootedVertexID): string =
  if not nd.isValid:
@ -497,38 +499,6 @@ proc pp*(p: PayloadRef, db = AristoDbRef(nil)): string =
 proc pp*(nd: VertexRef, db = AristoDbRef(nil)): string =
  nd.ppVtx(db.orDefault, default(RootedVertexID))

-# proc pp*(nd: NodeRef; db: AristoDbRef): string =
-#   if not nd.isValid:
-#     result = "n/a"
-#   elif nd.error != AristoError(0):
-#     result = "(!" & $nd.error
-#   else:
-#     result = ["L(", "X(", "B("][nd.vType.ord]
-#     case nd.vType:
-#     of Leaf:
-#       result &= $nd.lPfx.ppPathPfx & "," & nd.lData.pp(db)
-
-#     of Extension:
-#       result &= $nd.ePfx.ppPathPfx & "," & nd.eVid.ppVid & ","
-#       result &= nd.key[0].ppKey(db)
-#       result &= db.ppKeyOk(nd.key[0], nd.eVid)
-
-#     of Branch:
-#       result &= "["
-#       for n in 0..15:
-#         if nd.bVid[n].isValid or nd.key[n].isValid:
-#           result &= nd.bVid[n].ppVid
-#         result &= db.ppKeyOk(nd.key[n], nd.bVid[n]) & ","
-#       result[^1] = ']'
-
-#       result &= ",["
-#       for n in 0..15:
-#         if nd.bVid[n].isValid or nd.key[n].isValid:
-#           result &= nd.key[n].ppKey(db)
-#         result &= ","
-#       result[^1] = ']'
-#   result &= ")"
-
 proc pp*[T](rc: Result[T,(VertexID,AristoError)]): string =
  if rc.isOk:
    result = "ok("
@ -538,9 +508,6 @@ proc pp*[T](rc: Result[T,(VertexID,AristoError)]): string =
  else:
    result = "err((" & rc.error[0].pp & "," & $rc.error[1] & "))"

-proc pp*(nd: NodeRef): string =
-  nd.pp(AristoDbRef(nil).orDefault)
-
 proc pp*(
    sTab: Table[RootedVertexID,VertexRef];
    db = AristoDbRef(nil);
--- a/nimbus/db/aristo/aristo_delete.nim
+++ b/nimbus/db/aristo/aristo_delete.nim
@ -399,7 +399,7 @@ proc deleteGenericTree*(
 proc deleteStorageData*(
    db: AristoDbRef;
    accPath: Hash256;          # Implies storage data tree
-    stoPath: openArray[byte];
+    stoPath: Hash256;
      ): Result[bool,AristoError] =
  ## For a given account argument `accPath`, this function deletes the
  ## argument `stoPath` from the associated storage tree (if any, at all.) If
--- a/nimbus/db/aristo/aristo_delta/delta_merge.nim
+++ b/nimbus/db/aristo/aristo_delta/delta_merge.nim
@ -41,9 +41,6 @@ proc deltaMerge*(
  if upper.isNil:
    return ok(lower)

-  # Verify stackability
-  let lowerTrg = lower.kMap.getOrVoid (VertexID(1), VertexID(1))
-
  # There is no need to deep copy table vertices as they will not be modified.
  let newFilter = LayerDeltaRef(
    sTab: lower.sTab,
--- a/nimbus/db/aristo/aristo_desc/desc_structural.nim
+++ b/nimbus/db/aristo/aristo_desc/desc_structural.nim
@ -43,6 +43,7 @@ type
    ## Type of leaf data.
    RawData                          ## Generic data
    AccountData                      ## `Aristo account` with vertex IDs links
+    StoData                         ## Slot storage data

  PayloadRef* = ref object of RootRef
    ## The payload type depends on the sub-tree used. The `VertesID(1)` rooted
@ -54,6 +55,8 @@ type
    of AccountData:
      account*: AristoAccount
      stoID*: VertexID               ## Storage vertex ID (if any)
+    of StoData:
+      stoData*: UInt256

  VertexRef* = ref object of RootRef
    ## Vertex for building a hexary Patricia or Merkle Patricia Trie
@ -154,6 +157,9 @@ proc `==`*(a, b: PayloadRef): bool =
      if a.account != b.account or
         a.stoID != b.stoID:
        return false
+    of StoData:
+      if a.stoData != b.stoData:
+        return false
  true

 proc `==`*(a, b: VertexRef): bool =
@ -210,6 +216,11 @@ func dup*(pld: PayloadRef): PayloadRef =
      pType:   AccountData,
      account: pld.account,
      stoID:   pld.stoID)
+  of StoData:
+    PayloadRef(
+      pType:   StoData,
+      stoData: pld.stoData
+    )

 func dup*(vtx: VertexRef): VertexRef =
  ## Duplicate vertex.
--- a/nimbus/db/aristo/aristo_fetch.nim
+++ b/nimbus/db/aristo/aristo_fetch.nim
@ -223,14 +223,14 @@ proc hasPathGeneric*(
 proc fetchStorageData*(
    db: AristoDbRef;
    accPath: Hash256;
-    stoPath: openArray[byte];
-      ): Result[Blob,AristoError] =
+    stoPath: Hash256;
+      ): Result[UInt256,AristoError] =
  ## For a storage tree related to account `accPath`, fetch the data record
  ## from the database indexed by `path`.
  ##
-  let pyl = ? db.retrievePayload(? db.fetchStorageID accPath, stoPath)
-  assert pyl.pType == RawData   # debugging only
-  ok pyl.rawBlob
+  let pyl = ? db.retrievePayload(? db.fetchStorageID accPath, stoPath.data)
+  assert pyl.pType == StoData   # debugging only
+  ok pyl.stoData

 proc fetchStorageState*(
    db: AristoDbRef;
@ -247,12 +247,12 @@ proc fetchStorageState*(
 proc hasPathStorage*(
    db: AristoDbRef;
    accPath: Hash256;
-    stoPath: openArray[byte];
+    stoPath: Hash256;
      ): Result[bool,AristoError] =
  ## For a storage tree related to account `accPath`, query whether the data
  ## record indexed by `path` exists on the database.
  ##
-  db.hasPayload(? db.fetchStorageID accPath, stoPath)
+  db.hasPayload(? db.fetchStorageID accPath, stoPath.data)

 proc hasStorageData*(
    db: AristoDbRef;
--- a/nimbus/db/aristo/aristo_merge.nim
+++ b/nimbus/db/aristo/aristo_merge.nim
@ -100,8 +100,8 @@ proc mergeGenericData*(
 proc mergeStorageData*(
    db: AristoDbRef;                   # Database, top layer
    accPath: Hash256;          # Needed for accounts payload
-    stoPath: openArray[byte];          # Storage data path (aka key)
-    stoData: openArray[byte];          # Storage data payload value
+    stoPath: Hash256;          # Storage data path (aka key)
+    stoData: UInt256;          # Storage data payload value
      ): Result[void,AristoError] =
  ## Store the `stoData` data argument on the storage area addressed by
  ## `(accPath,stoPath)` where `accPath` is the account key (into the MPT)
@ -119,8 +119,8 @@ proc mergeStorageData*(
    useID = if stoID.isValid: stoID else: db.vidFetch()

    # Call merge
-    pyl = PayloadRef(pType: RawData, rawBlob: @stoData)
-    rc = db.mergePayloadImpl(useID, stoPath, pyl)
+    pyl = PayloadRef(pType: StoData, stoData: stoData)
+    rc = db.mergePayloadImpl(useID, stoPath.data, pyl)

  if rc.isOk:
    # Mark account path Merkle keys for update
--- a/nimbus/db/aristo/aristo_serialise.nim
+++ b/nimbus/db/aristo/aristo_serialise.nim
@ -57,6 +57,8 @@ proc serialise*(
      balance:     pyl.account.balance,
      storageRoot: key.to(Hash256),
      codeHash:    pyl.account.codeHash)
+  of StoData:
+    ok rlp.encode pyl.stoData

 # ------------------------------------------------------------------------------
 # Public RLP transcoder mixins
--- a/nimbus/db/core_db/base.nim
+++ b/nimbus/db/core_db/base.nim
@ -660,12 +660,12 @@ proc state*(acc: CoreDbAccRef; updateOk = false): CoreDbRc[Hash256] =
 proc slotFetch*(
    acc: CoreDbAccRef;
    accPath: Hash256;
-    slot: openArray[byte];
-      ):  CoreDbRc[Blob] =
+    stoPath: Hash256;
+      ):  CoreDbRc[UInt256] =
  ## Like `fetch()` but with cascaded index `(accPath,slot)`.
  acc.setTrackNewApi AccSlotFetchFn
  result = block:
-    let rc = acc.call(fetchStorageData, acc.mpt, accPath, slot)
+    let rc = acc.call(fetchStorageData, acc.mpt, accPath, stoPath)
    if rc.isOk:
      ok(rc.value)
    elif rc.error == FetchPathNotFound:
@ -679,12 +679,12 @@ proc slotFetch*(
 proc slotDelete*(
    acc: CoreDbAccRef;
    accPath: Hash256;
-    slot: openArray[byte];
+    stoPath: Hash256;
      ):  CoreDbRc[void] =
  ## Like `delete()` but with cascaded index `(accPath,slot)`.
  acc.setTrackNewApi AccSlotDeleteFn
  result = block:
-    let rc = acc.call(deleteStorageData, acc.mpt, accPath, slot)
+    let rc = acc.call(deleteStorageData, acc.mpt, accPath, stoPath)
    if rc.isOk or rc.error == DelStoRootMissing:
      # The second `if` clause is insane but legit: A storage column was
      # announced for an account but no data have been added, yet.
@ -700,12 +700,12 @@ proc slotDelete*(
 proc slotHasPath*(
    acc: CoreDbAccRef;
    accPath: Hash256;
-    slot: openArray[byte];
+    stoPath: Hash256;
      ):  CoreDbRc[bool] =
  ## Like `hasPath()` but with cascaded index `(accPath,slot)`.
  acc.setTrackNewApi AccSlotHasPathFn
  result = block:
-    let rc = acc.call(hasPathStorage, acc.mpt, accPath, slot)
+    let rc = acc.call(hasPathStorage, acc.mpt, accPath, stoPath)
    if rc.isOk:
      ok(rc.value)
    else:
@ -717,13 +717,13 @@ proc slotHasPath*(
 proc slotMerge*(
    acc: CoreDbAccRef;
    accPath: Hash256;
-    slot: openArray[byte];
-    data: openArray[byte];
+    stoPath: Hash256;
+    stoData: UInt256;
      ):  CoreDbRc[void] =
  ## Like `merge()` but with cascaded index `(accPath,slot)`.
  acc.setTrackNewApi AccSlotMergeFn
  result = block:
-    let rc = acc.call(mergeStorageData, acc.mpt, accPath, slot, data)
+    let rc = acc.call(mergeStorageData, acc.mpt, accPath, stoPath, stoData)
    if rc.isOk:
      ok()
    else:
--- a/nimbus/db/core_db/core_apps.nim
+++ b/nimbus/db/core_db/core_apps.nim
@ -115,7 +115,6 @@ iterator getBlockTransactionData*(
    db: CoreDbRef;
    txRoot: Hash256;
      ): Blob =
-  const info = "getBlockTransactionData()"
  block body:
    if txRoot == EMPTY_ROOT_HASH:
      break body
@ -174,7 +173,6 @@ iterator getReceipts*(
    receiptsRoot: Hash256;
      ): Receipt
      {.gcsafe, raises: [RlpError].} =
-  const info = "getReceipts()"
  block body:
    if receiptsRoot == EMPTY_ROOT_HASH:
      break body
--- a/nimbus/db/ledger/accounts_ledger.nim
+++ b/nimbus/db/ledger/accounts_ledger.nim
@ -151,7 +151,6 @@ template noRlpException(info: static[string]; code: untyped) =
 # The AccountsLedgerRef is modeled after TrieDatabase for it's transaction style
 proc init*(x: typedesc[AccountsLedgerRef], db: CoreDbRef,
           root: KeccakHash): AccountsLedgerRef =
-  const info = "AccountsLedgerRef.init(): "
  new result
  result.ledger = db.ctx.getAccounts()
  result.kvt = db.newKvt() # save manually in `persist()`
@ -308,11 +307,10 @@ proc originalStorageValue(

  # Not in the original values cache - go to the DB.
  let
-    slotKey = slot.toBytesBE.keccakHash.data
+    slotKey = slot.toBytesBE.keccakHash
    rc = ac.ledger.slotFetch(acc.toAccountKey, slotKey)
-  if rc.isOk and 0 < rc.value.len:
-    noRlpException "originalStorageValue()":
-      result = rlp.decode(rc.value, UInt256)
+  if rc.isOk:
+    result = rc.value

  acc.originalStorage[slot] = result

@ -375,10 +373,9 @@ proc persistStorage(acc: AccountRef, ac: AccountsLedgerRef) =

  # Save `overlayStorage[]` on database
  for slot, value in acc.overlayStorage:
-    let slotKey = slot.toBytesBE.keccakHash.data
+    let slotKey = slot.toBytesBE.keccakHash
    if value > 0:
-      let encodedValue = rlp.encode(value)
-      ac.ledger.slotMerge(acc.toAccountKey, slotKey, encodedValue).isOkOr:
+      ac.ledger.slotMerge(acc.toAccountKey, slotKey, value).isOkOr:
        raiseAssert info & $$error
    else:
      ac.ledger.slotDelete(acc.toAccountKey, slotKey).isOkOr:
--- a/nimbus/db/ledger/base/api_tracking.nim
+++ b/nimbus/db/ledger/base/api_tracking.nim
@ -98,7 +98,7 @@ func toStr*(w: Hash256): string =
  w.data.oaToStr

 func toStr*(w: CodeBytesRef): string =
-  if w.CodeBytesRef.isNil: "nil"
+  if w.isNil: "nil"
  else: "[" & $w.bytes.len & "]"

 func toStr*(w: Blob): string =
--- a/nimbus/evm/state_transactions.nim
+++ b/nimbus/evm/state_transactions.nim
@ -13,7 +13,6 @@ import
  ../db/ledger,
  ./computation,
  ./interpreter_dispatch,
-  ./interpreter/gas_costs,
  ./message,
  ./state,
  ./types
--- a/tests/macro_assembler.nim
+++ b/tests/macro_assembler.nim
@ -334,13 +334,11 @@ proc verifyAsmResult(vmState: BaseVMState, boa: Assembler, asmResult: CallResult
  for kv in boa.storage:
    let key = kv[0].toHex()
    let val = kv[1].toHex()
-    let slotKey = UInt256.fromBytesBE(kv[0]).toBytesBE.keccakHash.data
-    let data = al.slotFetch(accPath, slotKey).valueOr: EmptyBlob
-    let actual = data.toHex
-    let zerosLen = 64 - (actual.len)
-    let value = repeat('0', zerosLen) & actual
-    if val != value:
-      error "storage has different value", key=key, expected=val, actual=value
+    let slotKey = UInt256.fromBytesBE(kv[0]).toBytesBE.keccakHash
+    let data = al.slotFetch(accPath, slotKey).valueOr: default(UInt256)
+    let actual = data.toBytesBE().toHex
+    if val != actual:
+      error "storage has different value", key=key, expected=val, actual
      return false

  let logs = vmState.getAndClearLogEntries()