Hash keys and hash256 revisited (#2497)

* Remove cruft left-over from PR #2494

* TODO

* Update comments on `HashKey` type values

* Remove obsolete hash key conversion flag `forceRoot`

why:
  Is treated implicitly by having vertex keys as `HashKey` type and
  root vertex states converted to `Hash256`
This commit is contained in:
Jordan Hrycaj 2024-07-17 13:48:21 +00:00 committed by GitHub
parent 916f88a373
commit 17391b58d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 47 additions and 133 deletions

View File

@ -1,11 +1,10 @@
* Check whether `HashKey` can be reduced to a simple 32 byte array (see
*desc_identifiers.nim*)
* Re-visit `delTree()`. Suggestion is deleting small trees on the memory later,
otherwise only deleting the root vertex (so it becomes inaccessible) and
remember the follow up vertices which can travel through the tx-layers
to be picked up by the backend store.
* Mental note: For *proof-mode* with pre-allocated locked vertices and Merkle
keys, verification of a partial tree must be done by computing sub-tree keys
at the relative roots and comparing them with the pre-allocated Merkle keys.
* Note that the *proof-mode* code was removed with PR #2445. An idea for a
re-implementation would be to pre-load vertices and keep the perimeter
hashes of the pre-loaded vertices externally in a vid-hash table. That way,
the vid hashes can be verified should they appear in the partial MPT at a
later stage.

View File

@ -40,7 +40,7 @@ proc checkTopStrict*(
let node = vtx.toNode(rvid.root, db).valueOr:
# not all sub-keys might be ready du to lazy hashing
continue
if key != node.digestTo(HashKey, rvid.root==rvid.vid):
if key != node.digestTo(HashKey):
return err((rvid.vid,CheckStkVtxKeyMismatch))
else: # Empty key flags key is for update
@ -63,7 +63,7 @@ proc checkTopProofMode*(
if vtx.isValid:
let node = vtx.toNode(rvid.root, db).valueOr:
continue
if key != node.digestTo(HashKey, rvid.root == rvid.vid):
if key != node.digestTo(HashKey):
return err((rvid.vid,CheckRlxVtxKeyMismatch))
ok()

View File

@ -19,6 +19,12 @@ proc computeKey*(
db: AristoDbRef; # Database, top layer
rvid: RootedVertexID; # Vertex to convert
): Result[HashKey, AristoError] =
## Compute the key for an arbitrary vertex ID. If successful, the length of
## the resulting key might be smaller than 32. If it is used as a root vertex
## state/hash, it must be converted to a `Hash256` (using (`.to(Hash256)`) as
## in `db.computeKey(rvid).value.to(Hash256)` which always results in a
## 32 byte value.
##
# This is a variation on getKeyRc which computes the key instead of returning
# an error
# TODO it should not always write the key to the persistent storage
@ -87,11 +93,12 @@ proc computeKey*(
writer.startList(2)
writer.append(vtx.ePfx.toHexPrefix(isleaf = false))
writer.append(bwriter.finish().digestTo(HashKey, forceRoot=false))
writer.append(bwriter.finish().digestTo(HashKey))
else:
writeBranch(writer)
let h = writer.finish().digestTo(HashKey, rvid.root == rvid.vid)
var h = writer.finish().digestTo(HashKey)
# TODO This shouldn't necessarily go into the database if we're just computing
# a key ephemerally - it should however be cached for some tiem since
# deep hash computations are expensive

View File

@ -159,7 +159,7 @@ proc ppKey(key: HashKey; db: AristoDbRef; pfx = true): string =
let vtx = db.getVtx rv
if vtx.isValid:
let rc = vtx.toNode(rv.root, db)
if rc.isOk and key == rc.value.digestTo(HashKey, rv.root==rv.vid):
if rc.isOk and key == rc.value.digestTo(HashKey):
rvid = rv
break
# Ok, assemble key representation
@ -257,7 +257,7 @@ proc ppXMap*(
let rc = vtx.toNode(w.root, db)
if rc.isErr:
2
elif key != rc.value.digestTo(HashKey, root==w.vid):
elif key != rc.value.digestTo(HashKey):
3
else:
4

View File

@ -57,24 +57,29 @@ type
## To reference the root itself, use (root, root).
HashKey* = object
## Ethereum MPTs use Keccak hashes as node links if the size of an RLP
## encoded node is of size at least 32 bytes. Otherwise, the RLP encoded
## node value is used as a pseudo node link (rather than a hash.) Such a
## node is nor stored on key-value database. Rather the RLP encoded node
## value is stored instead of a lode link in a parent node instead. Only
## for the root hash, the top level node is always referred to by the
## hash.
## Ethereum reference MPTs use Keccak hashes as node links if the size of
## an RLP encoded node is at least 32 bytes. Otherwise, the RLP encoded
## node value is used as a pseudo node link (rather than a hash.) This is
## specified in the yellow paper, appendix D. Only for the root hash, the
## top level node is always referred to by the Keccak hash.
##
## This compaction feature needed an abstraction of the `HashKey` object
## On the `Aristo` database node links are called keys which are of this
## very type `HashKey`. For key-value tables (which assign a key to a
## vertex), the keys are always stored as such with length probably
## smaller than 32, including for root vertex keys. Only when used as a
## root state, the key of the latter is digested to a Keccak hash
## on-the-fly.
##
## This compaction feature nees an abstraction of the hash link object
## which is either a `Hash256` or a `Blob` of length at most 31 bytes.
## This leaves two ways of representing an empty/void `HashKey` type.
## It may be available as an empty `Blob` of zero length, or the
## `Hash256` type of the Keccak hash of an empty `Blob` (see constant
## `EMPTY_ROOT_HASH`.)
##
## For performance, we avoid storing blobs as `seq`, instead storing their
## length and sharing the data "space".
## TODO can we skip one byte of hash and reduce this type to 32 bytes?
## For performance, storing blobs as `seq` is avoided, instead storing
## their length and sharing the data "space".
##
buf: array[32, byte] # Either Hash256 or blob data, depending on `len`
len: int8 # length in the case of blobs, or 32 when it's a hash
@ -328,20 +333,22 @@ func to*(n: UInt256; T: type PathID): T =
# Public helpers: Miscellaneous mappings
# ------------------------------------------------------------------------------
func digestTo*(data: openArray[byte]; T: type HashKey; forceRoot = false): T =
func digestTo*(data: openArray[byte]; T: type HashKey): T =
## For argument `data` with length smaller than 32, import them as-is into
## the result. Otherwise import the Keccak hash of the argument `data`.
##
## If the argument `forceRoot` is set `true`, the `data` argument is always
## hashed.
## The `data` argument is only hashed if the `data` length is at least
## 32 bytes. Otherwise it is converted as-is to a `HashKey` type result.
##
## Otherwise it is only hashed if the `data` length is at least 32 bytes.
##
## Otherwise it is converted as-is to a `HashKey` type result.
## Note that for calculating a root state (when `data` is a serialised
## vertex), one would use the expression `data.digestTo(HashKey).to(Hash256)`
## which would always hash the `data` argument regardless of its length
## (and might result in an `EMPTY_ROOT_HASH`.) See the comment at the
## definition of the `HashKey` type for an explanation of its usage.
##
if data.len == 0:
result.len = 0
elif data.len < 32 and not forceRoot:
elif data.len < 32:
result.len = int8 data.len
(addr result.data[0]).copyMem(unsafeAddr data[0], data.len)
else:

View File

@ -106,15 +106,6 @@ proc complete(
uHike.legs.add leg
return ok(uHike) # done
# of Extension:
# vid = vtx.eVid
# if vid.isValid:
# vtx = db.getVtx (hike.root, vid)
# if vtx.isValid:
# uHike.legs.add leg
# continue
# return err((vid,NearbyExtensionError)) # Oops, no way
of Branch:
when doLeast:
leg.nibble = vtx.branchNibbleMin 0
@ -181,16 +172,6 @@ proc zeroAdjust(
return err((hike.root,NearbyBeyondRange))
pfx = root.ePfx & NibblesBuf.nibble(n.byte)
# of Extension:
# let ePfx = root.ePfx
# # Must be followed by a branch vertex
# if not hike.accept ePfx:
# break fail
# let vtx = db.getVtx (hike.root, root.eVid)
# if not vtx.isValid:
# break fail
# pfx = ePfx
of Leaf:
pfx = root.lPfx
if not hike.accept pfx:
@ -302,10 +283,6 @@ proc nearbyNext(
of Branch:
if top.nibble < 0 or uHike.tail.len == 0:
return err((top.wp.vid,NearbyUnexpectedVtx))
# of Extension:
# uHike.tail = top.wp.vtx.ePfx & uHike.tail
# uHike.legs.setLen(uHike.legs.len - 1)
# continue
var
step = top
@ -327,9 +304,6 @@ proc nearbyNext(
of Leaf:
if uHike.accept vtx.lPfx:
return uHike.complete(vid, db, hikeLenMax, doLeast=moveRight)
# of Extension:
# if uHike.accept vtx.ePfx:
# return uHike.complete(vid, db, hikeLenMax, doLeast=moveRight)
of Branch:
let nibble = uHike.tail[0].int8
if start and accept nibble:
@ -588,8 +562,6 @@ proc rightMissing*(
case vtx.vType
of Leaf:
return ok(vtx.lPfx < hike.tail)
# of Extension:
# return ok(vtx.ePfx < hike.tail)
of Branch:
return ok(vtx.branchNibbleMin(hike.tail[0].int8) < 0)

View File

@ -59,74 +59,6 @@ proc serialise(
# Public RLP transcoder mixins
# ------------------------------------------------------------------------------
when false: # free parking (not yet cruft)
proc read*(rlp: var Rlp; T: type NodeRef): T {.gcsafe, raises: [RlpError].} =
## Mixin for RLP writer, a decoder with error return code in a `Dummy`
## node if needed.
proc aristoError(error: AristoError): NodeRef =
## Allows returning de
NodeRef(vType: Leaf, error: error)
if not rlp.isList:
# Otherwise `rlp.items` would raise a `Defect`
return aristoError(Rlp2Or17ListEntries)
var
blobs = newSeq[Blob](2) # temporary, cache
links: array[16,HashKey] # reconstruct branch node
top = 0 # count entries and positions
# Collect lists of either 2 or 17 blob entries.
for w in rlp.items:
case top
of 0, 1:
if not w.isBlob:
return aristoError(RlpBlobExpected)
blobs[top] = rlp.read(Blob)
of 2 .. 15:
let blob = rlp.read(Blob)
links[top] = HashKey.fromBytes(blob).valueOr:
return aristoError(RlpBranchHashKeyExpected)
of 16:
if not w.isBlob or 0 < rlp.read(Blob).len:
return aristoError(RlpEmptyBlobExpected)
else:
return aristoError(Rlp2Or17ListEntries)
top.inc
# Verify extension data
case top
of 2:
if blobs[0].len == 0:
return aristoError(RlpNonEmptyBlobExpected)
let (isLeaf, pathSegment) = NibblesBuf.fromHexPrefix blobs[0]
if isLeaf:
return NodeRef(
vType: Leaf,
lPfx: pathSegment,
lData: LeafPayload(
pType: RawData,
rawBlob: blobs[1]))
else:
raiseAssert "TODO"
# var node = NodeRef(
# vType: Extension,
# ePfx: pathSegment)
# node.key[0] = HashKey.fromBytes(blobs[1]).valueOr:
# return aristoError(RlpExtHashKeyExpected)
# return node
of 17:
for n in [0,1]:
links[n] = HashKey.fromBytes(blobs[n]).valueOr:
return aristoError(RlpBranchHashKeyExpected)
return NodeRef(
vType: Branch,
key: links)
else:
discard
aristoError(Rlp2Or17ListEntries)
func append*(w: var RlpWriter; key: HashKey) =
if 1 < key.len and key.len < 32:
w.appendRawBytes key.data
@ -150,7 +82,7 @@ proc to*(w: tuple[key: HashKey, node: NodeRef]; T: type seq[(Blob,Blob)]): T =
if 0 < w.node.ePfx.len:
# Do for embedded extension node
let brHash = wr.finish().digestTo(HashKey, forceRoot=false)
let brHash = wr.finish().digestTo(HashKey)
result.add (@(brHash.data), wr.finish())
wr = initRlpWriter()
@ -174,13 +106,10 @@ proc to*(w: tuple[key: HashKey, node: NodeRef]; T: type seq[(Blob,Blob)]): T =
result.add (@(w.key.data), wr.finish())
proc digestTo*(node: NodeRef; T: type HashKey; forceRoot = false): T =
proc digestTo*(node: NodeRef; T: type HashKey): T =
## Convert the argument `node` to the corresponding Merkle hash key. Note
## that a `Dummy` node is encoded as as a `Leaf`.
##
## The argument `forceRoot` is passed on to the function
## `desc_identifiers.digestTo()`.
##
var wr = initRlpWriter()
case node.vType:
of Branch:
@ -192,7 +121,7 @@ proc digestTo*(node: NodeRef; T: type HashKey; forceRoot = false): T =
# Do for embedded extension node
if 0 < node.ePfx.len:
let brHash = wr.finish().digestTo(HashKey, forceRoot=false)
let brHash = wr.finish().digestTo(HashKey)
wr= initRlpWriter()
wr.startList(2)
wr.append node.ePfx.toHexPrefix(isleaf = false)
@ -209,7 +138,7 @@ proc digestTo*(node: NodeRef; T: type HashKey; forceRoot = false): T =
wr.append node.lPfx.toHexPrefix(isleaf = true)
wr.append node.lData.serialise(getKey0).value
wr.finish().digestTo(HashKey, forceRoot)
wr.finish().digestTo(HashKey)
proc serialise*(
db: AristoDbRef;