Hash keys and hash256 revisited (#2497)

* Remove cruft left-over from PR #2494 * TODO * Update comments on `HashKey` type values * Remove obsolete hash key conversion flag `forceRoot` why: Is treated implicitly by having vertex keys as `HashKey` type and root vertex states converted to `Hash256`
2024-07-17 13:48:21 +00:00 · 2024-07-17 13:48:21 +00:00 · 17391b58d0
parent 916f88a373
commit 17391b58d0
7 changed files with 47 additions and 133 deletions
--- a/nimbus/db/aristo/TODO.md
+++ b/nimbus/db/aristo/TODO.md
@ -1,11 +1,10 @@
-* Check whether `HashKey` can be reduced to a simple 32 byte array (see
-  *desc_identifiers.nim*)
-
 * Re-visit `delTree()`. Suggestion is deleting small trees on the memory later,
  otherwise only deleting the root vertex (so it becomes inaccessible) and
  remember the follow up vertices which can travel through the tx-layers
  to be picked up by the backend store.

-* Mental note: For *proof-mode* with pre-allocated locked vertices and Merkle
-  keys, verification of a partial tree must be done by computing sub-tree keys
-  at the relative roots and comparing them with the pre-allocated Merkle keys.
+* Note that the *proof-mode* code was removed with PR #2445. An idea for a
+  re-implementation would be to pre-load vertices and keep the perimeter
+  hashes of the pre-loaded vertices externally in a vid-hash table. That way,
+  the vid hashes can be verified should they appear in the partial MPT at a
+  later stage.
--- a/nimbus/db/aristo/aristo_check/check_top.nim
+++ b/nimbus/db/aristo/aristo_check/check_top.nim
@ -40,7 +40,7 @@ proc checkTopStrict*(
      let node = vtx.toNode(rvid.root, db).valueOr:
        # not all sub-keys might be ready du to lazy hashing
        continue
-      if key != node.digestTo(HashKey, rvid.root==rvid.vid):
+      if key != node.digestTo(HashKey):
        return err((rvid.vid,CheckStkVtxKeyMismatch))

    else: # Empty key flags key is for update
@ -63,7 +63,7 @@ proc checkTopProofMode*(
      if vtx.isValid:
        let node = vtx.toNode(rvid.root, db).valueOr:
          continue
-        if key != node.digestTo(HashKey, rvid.root == rvid.vid):
+        if key != node.digestTo(HashKey):
          return err((rvid.vid,CheckRlxVtxKeyMismatch))
  ok()

--- a/nimbus/db/aristo/aristo_compute.nim
+++ b/nimbus/db/aristo/aristo_compute.nim
@ -19,6 +19,12 @@ proc computeKey*(
    db: AristoDbRef;                  # Database, top layer
    rvid: RootedVertexID;             # Vertex to convert
      ): Result[HashKey, AristoError] =
+  ## Compute the key for an arbitrary vertex ID. If successful, the length of
+  ## the resulting key might be smaller than 32. If it is used as a root vertex
+  ## state/hash, it must be converted to a `Hash256` (using (`.to(Hash256)`) as
+  ## in `db.computeKey(rvid).value.to(Hash256)` which always results in a
+  ## 32 byte value.
+  ##
  # This is a variation on getKeyRc which computes the key instead of returning
  # an error
  # TODO it should not always write the key to the persistent storage
@ -87,11 +93,12 @@ proc computeKey*(

      writer.startList(2)
      writer.append(vtx.ePfx.toHexPrefix(isleaf = false))
-      writer.append(bwriter.finish().digestTo(HashKey, forceRoot=false))
+      writer.append(bwriter.finish().digestTo(HashKey))
    else:
      writeBranch(writer)

-  let h = writer.finish().digestTo(HashKey, rvid.root == rvid.vid)
+  var h = writer.finish().digestTo(HashKey)
+
  # TODO This shouldn't necessarily go into the database if we're just computing
  #      a key ephemerally - it should however be cached for some tiem since
  #      deep hash computations are expensive
--- a/nimbus/db/aristo/aristo_debug.nim
+++ b/nimbus/db/aristo/aristo_debug.nim
@ -159,7 +159,7 @@ proc ppKey(key: HashKey; db: AristoDbRef; pfx = true): string =
      let vtx = db.getVtx rv
      if vtx.isValid:
        let rc = vtx.toNode(rv.root, db)
-        if rc.isOk and key == rc.value.digestTo(HashKey, rv.root==rv.vid):
+        if rc.isOk and key == rc.value.digestTo(HashKey):
          rvid = rv
          break
    # Ok, assemble key representation
@ -257,7 +257,7 @@ proc ppXMap*(
              let rc = vtx.toNode(w.root, db)
              if rc.isErr:
                2
-              elif key != rc.value.digestTo(HashKey, root==w.vid):
+              elif key != rc.value.digestTo(HashKey):
                3
              else:
                4
--- a/nimbus/db/aristo/aristo_desc/desc_identifiers.nim
+++ b/nimbus/db/aristo/aristo_desc/desc_identifiers.nim
@ -57,24 +57,29 @@ type
    ## To reference the root itself, use (root, root).

  HashKey* = object
-    ## Ethereum MPTs use Keccak hashes as node links if the size of an RLP
-    ## encoded node is of size at least 32 bytes. Otherwise, the RLP encoded
-    ## node value is used as a pseudo node link (rather than a hash.) Such a
-    ## node is nor stored on key-value database. Rather the RLP encoded node
-    ## value is stored instead of a lode link in a parent node instead. Only
-    ## for the root hash, the top level node is always referred to by the
-    ## hash.
+    ## Ethereum reference MPTs use Keccak hashes as node links if the size of
+    ## an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded
+    ## node value is used as a pseudo node link (rather than a hash.) This is
+    ## specified in the yellow paper, appendix D. Only for the root hash, the
+    ## top level node is always referred to by the Keccak hash.
    ##
-    ## This compaction feature needed an abstraction of the `HashKey` object
+    ## On the `Aristo` database node links are called keys which are of this
+    ## very type `HashKey`. For key-value tables (which assign a key to a
+    ## vertex), the keys are always stored as such with length probably
+    ## smaller than 32, including for root vertex keys. Only when used as a
+    ## root state, the key of the latter is digested to a Keccak hash
+    ## on-the-fly.
+    ##
+    ## This compaction feature nees an abstraction of the hash link object
    ## which is either a `Hash256` or a `Blob` of length at most 31 bytes.
    ## This leaves two ways of representing an empty/void `HashKey` type.
    ## It may be available as an empty `Blob` of zero length, or the
    ## `Hash256` type of the Keccak hash of an empty `Blob` (see constant
    ## `EMPTY_ROOT_HASH`.)
    ##
-    ## For performance, we avoid storing blobs as `seq`, instead storing their
-    ## length and sharing the data "space".
-    ## TODO can we skip one byte of hash and reduce this type to 32 bytes?
+    ## For performance, storing blobs as `seq` is avoided, instead storing
+    ## their length and sharing the data "space".
+    ##
    buf: array[32, byte] # Either Hash256 or blob data, depending on `len`
    len: int8 # length in the case of blobs, or 32 when it's a hash

@ -328,20 +333,22 @@ func to*(n: UInt256; T: type PathID): T =
 # Public helpers: Miscellaneous mappings
 # ------------------------------------------------------------------------------

-func digestTo*(data: openArray[byte]; T: type HashKey; forceRoot = false): T =
+func digestTo*(data: openArray[byte]; T: type HashKey): T =
  ## For argument `data` with length smaller than 32, import them as-is into
  ## the result. Otherwise import the Keccak hash of the argument `data`.
  ##
-  ## If the argument `forceRoot` is set `true`, the `data` argument is always
-  ## hashed.
+  ## The `data` argument is only hashed if the `data` length is at least
+  ## 32 bytes. Otherwise it is converted as-is to a `HashKey` type result.
  ##
-  ## Otherwise it is only hashed if the `data` length is at least 32 bytes.
-  ##
-  ## Otherwise it is converted as-is to a `HashKey` type result.
+  ## Note that for calculating a root state (when `data` is a serialised
+  ## vertex), one would use the expression `data.digestTo(HashKey).to(Hash256)`
+  ## which would always hash the `data` argument regardless of its length
+  ## (and might result in an `EMPTY_ROOT_HASH`.) See the comment at the
+  ## definition of the `HashKey` type for an explanation of its usage.
  ##
  if data.len == 0:
    result.len = 0
-  elif data.len < 32 and not forceRoot:
+  elif data.len < 32:
    result.len = int8 data.len
    (addr result.data[0]).copyMem(unsafeAddr data[0], data.len)
  else:
--- a/nimbus/db/aristo/aristo_nearby.nim
+++ b/nimbus/db/aristo/aristo_nearby.nim
@ -106,15 +106,6 @@ proc complete(
      uHike.legs.add leg
      return ok(uHike) # done

-    # of Extension:
-    #   vid = vtx.eVid
-    #   if vid.isValid:
-    #     vtx = db.getVtx (hike.root, vid)
-    #     if vtx.isValid:
-    #       uHike.legs.add leg
-    #       continue
-    #   return err((vid,NearbyExtensionError)) # Oops, no way
-
    of Branch:
      when doLeast:
        leg.nibble = vtx.branchNibbleMin 0
@ -181,16 +172,6 @@ proc zeroAdjust(
          return err((hike.root,NearbyBeyondRange))
        pfx = root.ePfx & NibblesBuf.nibble(n.byte)

-      # of Extension:
-      #   let ePfx = root.ePfx
-      #   # Must be followed by a branch vertex
-      #   if not hike.accept ePfx:
-      #     break fail
-      #   let vtx = db.getVtx (hike.root, root.eVid)
-      #   if not vtx.isValid:
-      #     break fail
-      #   pfx =  ePfx
-
      of Leaf:
        pfx = root.lPfx
        if not hike.accept pfx:
@ -302,10 +283,6 @@ proc nearbyNext(
    of Branch:
      if top.nibble < 0 or uHike.tail.len == 0:
        return err((top.wp.vid,NearbyUnexpectedVtx))
-    # of Extension:
-    #   uHike.tail = top.wp.vtx.ePfx & uHike.tail
-    #   uHike.legs.setLen(uHike.legs.len - 1)
-    #   continue

    var
      step = top
@ -327,9 +304,6 @@ proc nearbyNext(
      of Leaf:
        if uHike.accept vtx.lPfx:
          return uHike.complete(vid, db, hikeLenMax, doLeast=moveRight)
-      # of Extension:
-      #   if uHike.accept vtx.ePfx:
-      #     return uHike.complete(vid, db, hikeLenMax, doLeast=moveRight)
      of Branch:
        let nibble = uHike.tail[0].int8
        if start and accept nibble:
@ -588,8 +562,6 @@ proc rightMissing*(
  case vtx.vType
  of Leaf:
    return ok(vtx.lPfx < hike.tail)
-  # of Extension:
-  #   return ok(vtx.ePfx < hike.tail)
  of Branch:
    return ok(vtx.branchNibbleMin(hike.tail[0].int8) < 0)

--- a/nimbus/db/aristo/aristo_serialise.nim
+++ b/nimbus/db/aristo/aristo_serialise.nim
@ -59,74 +59,6 @@ proc serialise(
 # Public RLP transcoder mixins
 # ------------------------------------------------------------------------------

-when false: # free parking (not yet cruft)
-  proc read*(rlp: var Rlp; T: type NodeRef): T {.gcsafe, raises: [RlpError].} =
-    ## Mixin for RLP writer, a decoder with error return code in a `Dummy`
-    ## node if needed.
-    proc aristoError(error: AristoError): NodeRef =
-      ## Allows returning de
-      NodeRef(vType: Leaf, error: error)
-
-    if not rlp.isList:
-      # Otherwise `rlp.items` would raise a `Defect`
-      return aristoError(Rlp2Or17ListEntries)
-
-    var
-      blobs = newSeq[Blob](2)         # temporary, cache
-      links: array[16,HashKey]        # reconstruct branch node
-      top = 0                         # count entries and positions
-
-    # Collect lists of either 2 or 17 blob entries.
-    for w in rlp.items:
-      case top
-      of 0, 1:
-        if not w.isBlob:
-          return aristoError(RlpBlobExpected)
-        blobs[top] = rlp.read(Blob)
-      of 2 .. 15:
-        let blob = rlp.read(Blob)
-        links[top] = HashKey.fromBytes(blob).valueOr:
-          return aristoError(RlpBranchHashKeyExpected)
-      of 16:
-        if not w.isBlob or 0 < rlp.read(Blob).len:
-          return aristoError(RlpEmptyBlobExpected)
-      else:
-        return aristoError(Rlp2Or17ListEntries)
-      top.inc
-
-    # Verify extension data
-    case top
-    of 2:
-      if blobs[0].len == 0:
-        return aristoError(RlpNonEmptyBlobExpected)
-      let (isLeaf, pathSegment) = NibblesBuf.fromHexPrefix blobs[0]
-      if isLeaf:
-        return NodeRef(
-          vType:     Leaf,
-          lPfx:      pathSegment,
-          lData:     LeafPayload(
-            pType:   RawData,
-            rawBlob: blobs[1]))
-      else:
-        raiseAssert "TODO"
-        # var node = NodeRef(
-        #   vType: Extension,
-        #   ePfx:  pathSegment)
-        # node.key[0] = HashKey.fromBytes(blobs[1]).valueOr:
-        #   return aristoError(RlpExtHashKeyExpected)
-        # return node
-    of 17:
-      for n in [0,1]:
-        links[n] = HashKey.fromBytes(blobs[n]).valueOr:
-          return aristoError(RlpBranchHashKeyExpected)
-      return NodeRef(
-        vType: Branch,
-        key:   links)
-    else:
-      discard
-
-    aristoError(Rlp2Or17ListEntries)
-
 func append*(w: var RlpWriter; key: HashKey) =
  if 1 < key.len and key.len < 32:
    w.appendRawBytes key.data
@ -150,7 +82,7 @@ proc to*(w: tuple[key: HashKey, node: NodeRef]; T: type seq[(Blob,Blob)]): T =

    if 0 < w.node.ePfx.len:
      # Do for embedded extension node
-      let brHash = wr.finish().digestTo(HashKey, forceRoot=false)
+      let brHash = wr.finish().digestTo(HashKey)
      result.add (@(brHash.data), wr.finish())

      wr = initRlpWriter()
@ -174,13 +106,10 @@ proc to*(w: tuple[key: HashKey, node: NodeRef]; T: type seq[(Blob,Blob)]): T =

  result.add (@(w.key.data), wr.finish())

-proc digestTo*(node: NodeRef; T: type HashKey; forceRoot = false): T =
+proc digestTo*(node: NodeRef; T: type HashKey): T =
  ## Convert the argument `node` to the corresponding Merkle hash key. Note
  ## that a `Dummy` node is encoded as as a `Leaf`.
  ##
-  ## The argument `forceRoot` is passed on to the function
-  ## `desc_identifiers.digestTo()`.
-  ##
  var wr = initRlpWriter()
  case node.vType:
  of Branch:
@ -192,7 +121,7 @@ proc digestTo*(node: NodeRef; T: type HashKey; forceRoot = false): T =

    # Do for embedded extension node
    if 0 < node.ePfx.len:
-      let brHash = wr.finish().digestTo(HashKey, forceRoot=false)
+      let brHash = wr.finish().digestTo(HashKey)
      wr= initRlpWriter()
      wr.startList(2)
      wr.append node.ePfx.toHexPrefix(isleaf = false)
@ -209,7 +138,7 @@ proc digestTo*(node: NodeRef; T: type HashKey; forceRoot = false): T =
    wr.append node.lPfx.toHexPrefix(isleaf = true)
    wr.append node.lData.serialise(getKey0).value

-  wr.finish().digestTo(HashKey, forceRoot)
+  wr.finish().digestTo(HashKey)

 proc serialise*(
    db: AristoDbRef;