Hash keys and hash256 revisited (#2497)

* Remove cruft left-over from PR #2494

* TODO

* Update comments on `HashKey` type values

* Remove obsolete hash key conversion flag `forceRoot`

why:
  Is treated implicitly by having vertex keys as `HashKey` type and
  root vertex states converted to `Hash256`
This commit is contained in:
Jordan Hrycaj 2024-07-17 13:48:21 +00:00 committed by GitHub
parent 916f88a373
commit 17391b58d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 47 additions and 133 deletions

View File

@ -1,11 +1,10 @@
* Check whether `HashKey` can be reduced to a simple 32 byte array (see
*desc_identifiers.nim*)
* Re-visit `delTree()`. Suggestion is deleting small trees on the memory later, * Re-visit `delTree()`. Suggestion is deleting small trees on the memory later,
otherwise only deleting the root vertex (so it becomes inaccessible) and otherwise only deleting the root vertex (so it becomes inaccessible) and
remember the follow up vertices which can travel through the tx-layers remember the follow up vertices which can travel through the tx-layers
to be picked up by the backend store. to be picked up by the backend store.
* Mental note: For *proof-mode* with pre-allocated locked vertices and Merkle * Note that the *proof-mode* code was removed with PR #2445. An idea for a
keys, verification of a partial tree must be done by computing sub-tree keys re-implementation would be to pre-load vertices and keep the perimeter
at the relative roots and comparing them with the pre-allocated Merkle keys. hashes of the pre-loaded vertices externally in a vid-hash table. That way,
the vid hashes can be verified should they appear in the partial MPT at a
later stage.

View File

@ -40,7 +40,7 @@ proc checkTopStrict*(
let node = vtx.toNode(rvid.root, db).valueOr: let node = vtx.toNode(rvid.root, db).valueOr:
# not all sub-keys might be ready du to lazy hashing # not all sub-keys might be ready du to lazy hashing
continue continue
if key != node.digestTo(HashKey, rvid.root==rvid.vid): if key != node.digestTo(HashKey):
return err((rvid.vid,CheckStkVtxKeyMismatch)) return err((rvid.vid,CheckStkVtxKeyMismatch))
else: # Empty key flags key is for update else: # Empty key flags key is for update
@ -63,7 +63,7 @@ proc checkTopProofMode*(
if vtx.isValid: if vtx.isValid:
let node = vtx.toNode(rvid.root, db).valueOr: let node = vtx.toNode(rvid.root, db).valueOr:
continue continue
if key != node.digestTo(HashKey, rvid.root == rvid.vid): if key != node.digestTo(HashKey):
return err((rvid.vid,CheckRlxVtxKeyMismatch)) return err((rvid.vid,CheckRlxVtxKeyMismatch))
ok() ok()

View File

@ -19,6 +19,12 @@ proc computeKey*(
db: AristoDbRef; # Database, top layer db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Vertex to convert rvid: RootedVertexID; # Vertex to convert
): Result[HashKey, AristoError] = ): Result[HashKey, AristoError] =
## Compute the key for an arbitrary vertex ID. If successful, the length of
## the resulting key might be smaller than 32. If it is used as a root vertex
## state/hash, it must be converted to a `Hash256` (using (`.to(Hash256)`) as
## in `db.computeKey(rvid).value.to(Hash256)` which always results in a
## 32 byte value.
##
# This is a variation on getKeyRc which computes the key instead of returning # This is a variation on getKeyRc which computes the key instead of returning
# an error # an error
# TODO it should not always write the key to the persistent storage # TODO it should not always write the key to the persistent storage
@ -87,11 +93,12 @@ proc computeKey*(
writer.startList(2) writer.startList(2)
writer.append(vtx.ePfx.toHexPrefix(isleaf = false)) writer.append(vtx.ePfx.toHexPrefix(isleaf = false))
writer.append(bwriter.finish().digestTo(HashKey, forceRoot=false)) writer.append(bwriter.finish().digestTo(HashKey))
else: else:
writeBranch(writer) writeBranch(writer)
let h = writer.finish().digestTo(HashKey, rvid.root == rvid.vid) var h = writer.finish().digestTo(HashKey)
# TODO This shouldn't necessarily go into the database if we're just computing # TODO This shouldn't necessarily go into the database if we're just computing
# a key ephemerally - it should however be cached for some tiem since # a key ephemerally - it should however be cached for some tiem since
# deep hash computations are expensive # deep hash computations are expensive

View File

@ -159,7 +159,7 @@ proc ppKey(key: HashKey; db: AristoDbRef; pfx = true): string =
let vtx = db.getVtx rv let vtx = db.getVtx rv
if vtx.isValid: if vtx.isValid:
let rc = vtx.toNode(rv.root, db) let rc = vtx.toNode(rv.root, db)
if rc.isOk and key == rc.value.digestTo(HashKey, rv.root==rv.vid): if rc.isOk and key == rc.value.digestTo(HashKey):
rvid = rv rvid = rv
break break
# Ok, assemble key representation # Ok, assemble key representation
@ -257,7 +257,7 @@ proc ppXMap*(
let rc = vtx.toNode(w.root, db) let rc = vtx.toNode(w.root, db)
if rc.isErr: if rc.isErr:
2 2
elif key != rc.value.digestTo(HashKey, root==w.vid): elif key != rc.value.digestTo(HashKey):
3 3
else: else:
4 4

View File

@ -57,24 +57,29 @@ type
## To reference the root itself, use (root, root). ## To reference the root itself, use (root, root).
HashKey* = object HashKey* = object
## Ethereum MPTs use Keccak hashes as node links if the size of an RLP ## Ethereum reference MPTs use Keccak hashes as node links if the size of
## encoded node is of size at least 32 bytes. Otherwise, the RLP encoded ## an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded
## node value is used as a pseudo node link (rather than a hash.) Such a ## node value is used as a pseudo node link (rather than a hash.) This is
## node is nor stored on key-value database. Rather the RLP encoded node ## specified in the yellow paper, appendix D. Only for the root hash, the
## value is stored instead of a lode link in a parent node instead. Only ## top level node is always referred to by the Keccak hash.
## for the root hash, the top level node is always referred to by the
## hash.
## ##
## This compaction feature needed an abstraction of the `HashKey` object ## On the `Aristo` database node links are called keys which are of this
## very type `HashKey`. For key-value tables (which assign a key to a
## vertex), the keys are always stored as such with length probably
## smaller than 32, including for root vertex keys. Only when used as a
## root state, the key of the latter is digested to a Keccak hash
## on-the-fly.
##
## This compaction feature nees an abstraction of the hash link object
## which is either a `Hash256` or a `Blob` of length at most 31 bytes. ## which is either a `Hash256` or a `Blob` of length at most 31 bytes.
## This leaves two ways of representing an empty/void `HashKey` type. ## This leaves two ways of representing an empty/void `HashKey` type.
## It may be available as an empty `Blob` of zero length, or the ## It may be available as an empty `Blob` of zero length, or the
## `Hash256` type of the Keccak hash of an empty `Blob` (see constant ## `Hash256` type of the Keccak hash of an empty `Blob` (see constant
## `EMPTY_ROOT_HASH`.) ## `EMPTY_ROOT_HASH`.)
## ##
## For performance, we avoid storing blobs as `seq`, instead storing their ## For performance, storing blobs as `seq` is avoided, instead storing
## length and sharing the data "space". ## their length and sharing the data "space".
## TODO can we skip one byte of hash and reduce this type to 32 bytes? ##
buf: array[32, byte] # Either Hash256 or blob data, depending on `len` buf: array[32, byte] # Either Hash256 or blob data, depending on `len`
len: int8 # length in the case of blobs, or 32 when it's a hash len: int8 # length in the case of blobs, or 32 when it's a hash
@ -328,20 +333,22 @@ func to*(n: UInt256; T: type PathID): T =
# Public helpers: Miscellaneous mappings # Public helpers: Miscellaneous mappings
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
func digestTo*(data: openArray[byte]; T: type HashKey; forceRoot = false): T = func digestTo*(data: openArray[byte]; T: type HashKey): T =
## For argument `data` with length smaller than 32, import them as-is into ## For argument `data` with length smaller than 32, import them as-is into
## the result. Otherwise import the Keccak hash of the argument `data`. ## the result. Otherwise import the Keccak hash of the argument `data`.
## ##
## If the argument `forceRoot` is set `true`, the `data` argument is always ## The `data` argument is only hashed if the `data` length is at least
## hashed. ## 32 bytes. Otherwise it is converted as-is to a `HashKey` type result.
## ##
## Otherwise it is only hashed if the `data` length is at least 32 bytes. ## Note that for calculating a root state (when `data` is a serialised
## ## vertex), one would use the expression `data.digestTo(HashKey).to(Hash256)`
## Otherwise it is converted as-is to a `HashKey` type result. ## which would always hash the `data` argument regardless of its length
## (and might result in an `EMPTY_ROOT_HASH`.) See the comment at the
## definition of the `HashKey` type for an explanation of its usage.
## ##
if data.len == 0: if data.len == 0:
result.len = 0 result.len = 0
elif data.len < 32 and not forceRoot: elif data.len < 32:
result.len = int8 data.len result.len = int8 data.len
(addr result.data[0]).copyMem(unsafeAddr data[0], data.len) (addr result.data[0]).copyMem(unsafeAddr data[0], data.len)
else: else:

View File

@ -106,15 +106,6 @@ proc complete(
uHike.legs.add leg uHike.legs.add leg
return ok(uHike) # done return ok(uHike) # done
# of Extension:
# vid = vtx.eVid
# if vid.isValid:
# vtx = db.getVtx (hike.root, vid)
# if vtx.isValid:
# uHike.legs.add leg
# continue
# return err((vid,NearbyExtensionError)) # Oops, no way
of Branch: of Branch:
when doLeast: when doLeast:
leg.nibble = vtx.branchNibbleMin 0 leg.nibble = vtx.branchNibbleMin 0
@ -181,16 +172,6 @@ proc zeroAdjust(
return err((hike.root,NearbyBeyondRange)) return err((hike.root,NearbyBeyondRange))
pfx = root.ePfx & NibblesBuf.nibble(n.byte) pfx = root.ePfx & NibblesBuf.nibble(n.byte)
# of Extension:
# let ePfx = root.ePfx
# # Must be followed by a branch vertex
# if not hike.accept ePfx:
# break fail
# let vtx = db.getVtx (hike.root, root.eVid)
# if not vtx.isValid:
# break fail
# pfx = ePfx
of Leaf: of Leaf:
pfx = root.lPfx pfx = root.lPfx
if not hike.accept pfx: if not hike.accept pfx:
@ -302,10 +283,6 @@ proc nearbyNext(
of Branch: of Branch:
if top.nibble < 0 or uHike.tail.len == 0: if top.nibble < 0 or uHike.tail.len == 0:
return err((top.wp.vid,NearbyUnexpectedVtx)) return err((top.wp.vid,NearbyUnexpectedVtx))
# of Extension:
# uHike.tail = top.wp.vtx.ePfx & uHike.tail
# uHike.legs.setLen(uHike.legs.len - 1)
# continue
var var
step = top step = top
@ -327,9 +304,6 @@ proc nearbyNext(
of Leaf: of Leaf:
if uHike.accept vtx.lPfx: if uHike.accept vtx.lPfx:
return uHike.complete(vid, db, hikeLenMax, doLeast=moveRight) return uHike.complete(vid, db, hikeLenMax, doLeast=moveRight)
# of Extension:
# if uHike.accept vtx.ePfx:
# return uHike.complete(vid, db, hikeLenMax, doLeast=moveRight)
of Branch: of Branch:
let nibble = uHike.tail[0].int8 let nibble = uHike.tail[0].int8
if start and accept nibble: if start and accept nibble:
@ -588,8 +562,6 @@ proc rightMissing*(
case vtx.vType case vtx.vType
of Leaf: of Leaf:
return ok(vtx.lPfx < hike.tail) return ok(vtx.lPfx < hike.tail)
# of Extension:
# return ok(vtx.ePfx < hike.tail)
of Branch: of Branch:
return ok(vtx.branchNibbleMin(hike.tail[0].int8) < 0) return ok(vtx.branchNibbleMin(hike.tail[0].int8) < 0)

View File

@ -59,74 +59,6 @@ proc serialise(
# Public RLP transcoder mixins # Public RLP transcoder mixins
# ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------
when false: # free parking (not yet cruft)
proc read*(rlp: var Rlp; T: type NodeRef): T {.gcsafe, raises: [RlpError].} =
## Mixin for RLP writer, a decoder with error return code in a `Dummy`
## node if needed.
proc aristoError(error: AristoError): NodeRef =
## Allows returning de
NodeRef(vType: Leaf, error: error)
if not rlp.isList:
# Otherwise `rlp.items` would raise a `Defect`
return aristoError(Rlp2Or17ListEntries)
var
blobs = newSeq[Blob](2) # temporary, cache
links: array[16,HashKey] # reconstruct branch node
top = 0 # count entries and positions
# Collect lists of either 2 or 17 blob entries.
for w in rlp.items:
case top
of 0, 1:
if not w.isBlob:
return aristoError(RlpBlobExpected)
blobs[top] = rlp.read(Blob)
of 2 .. 15:
let blob = rlp.read(Blob)
links[top] = HashKey.fromBytes(blob).valueOr:
return aristoError(RlpBranchHashKeyExpected)
of 16:
if not w.isBlob or 0 < rlp.read(Blob).len:
return aristoError(RlpEmptyBlobExpected)
else:
return aristoError(Rlp2Or17ListEntries)
top.inc
# Verify extension data
case top
of 2:
if blobs[0].len == 0:
return aristoError(RlpNonEmptyBlobExpected)
let (isLeaf, pathSegment) = NibblesBuf.fromHexPrefix blobs[0]
if isLeaf:
return NodeRef(
vType: Leaf,
lPfx: pathSegment,
lData: LeafPayload(
pType: RawData,
rawBlob: blobs[1]))
else:
raiseAssert "TODO"
# var node = NodeRef(
# vType: Extension,
# ePfx: pathSegment)
# node.key[0] = HashKey.fromBytes(blobs[1]).valueOr:
# return aristoError(RlpExtHashKeyExpected)
# return node
of 17:
for n in [0,1]:
links[n] = HashKey.fromBytes(blobs[n]).valueOr:
return aristoError(RlpBranchHashKeyExpected)
return NodeRef(
vType: Branch,
key: links)
else:
discard
aristoError(Rlp2Or17ListEntries)
func append*(w: var RlpWriter; key: HashKey) = func append*(w: var RlpWriter; key: HashKey) =
if 1 < key.len and key.len < 32: if 1 < key.len and key.len < 32:
w.appendRawBytes key.data w.appendRawBytes key.data
@ -150,7 +82,7 @@ proc to*(w: tuple[key: HashKey, node: NodeRef]; T: type seq[(Blob,Blob)]): T =
if 0 < w.node.ePfx.len: if 0 < w.node.ePfx.len:
# Do for embedded extension node # Do for embedded extension node
let brHash = wr.finish().digestTo(HashKey, forceRoot=false) let brHash = wr.finish().digestTo(HashKey)
result.add (@(brHash.data), wr.finish()) result.add (@(brHash.data), wr.finish())
wr = initRlpWriter() wr = initRlpWriter()
@ -174,13 +106,10 @@ proc to*(w: tuple[key: HashKey, node: NodeRef]; T: type seq[(Blob,Blob)]): T =
result.add (@(w.key.data), wr.finish()) result.add (@(w.key.data), wr.finish())
proc digestTo*(node: NodeRef; T: type HashKey; forceRoot = false): T = proc digestTo*(node: NodeRef; T: type HashKey): T =
## Convert the argument `node` to the corresponding Merkle hash key. Note ## Convert the argument `node` to the corresponding Merkle hash key. Note
## that a `Dummy` node is encoded as as a `Leaf`. ## that a `Dummy` node is encoded as as a `Leaf`.
## ##
## The argument `forceRoot` is passed on to the function
## `desc_identifiers.digestTo()`.
##
var wr = initRlpWriter() var wr = initRlpWriter()
case node.vType: case node.vType:
of Branch: of Branch:
@ -192,7 +121,7 @@ proc digestTo*(node: NodeRef; T: type HashKey; forceRoot = false): T =
# Do for embedded extension node # Do for embedded extension node
if 0 < node.ePfx.len: if 0 < node.ePfx.len:
let brHash = wr.finish().digestTo(HashKey, forceRoot=false) let brHash = wr.finish().digestTo(HashKey)
wr= initRlpWriter() wr= initRlpWriter()
wr.startList(2) wr.startList(2)
wr.append node.ePfx.toHexPrefix(isleaf = false) wr.append node.ePfx.toHexPrefix(isleaf = false)
@ -209,7 +138,7 @@ proc digestTo*(node: NodeRef; T: type HashKey; forceRoot = false): T =
wr.append node.lPfx.toHexPrefix(isleaf = true) wr.append node.lPfx.toHexPrefix(isleaf = true)
wr.append node.lData.serialise(getKey0).value wr.append node.lData.serialise(getKey0).value
wr.finish().digestTo(HashKey, forceRoot) wr.finish().digestTo(HashKey)
proc serialise*( proc serialise*(
db: AristoDbRef; db: AristoDbRef;