Extended data Payload specs for the backend. (#1630)

why:
  For the main tree with root vertex ID 1, the leaf nodes hold the
  account data. These accounts may link to sub trees the storage root
  node ID of which must be registered here. There is no reverse key
  lookup on the backend.

note:
  These definitions are experimental. Also, there are some tests missing
  for validating Payload data conversions.
This commit is contained in:
Jordan Hrycaj 2023-07-05 21:27:48 +01:00 committed by GitHub
parent ab13e43db8
commit 93a72025a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 309 additions and 66 deletions

View File

@ -267,13 +267,61 @@ A *Leaf* record path segment is compact encoded. So it has at least one byte.
The first byte *P0* has bit 5 set, i.e. *P0 and 0x20* is non-zero (bit 4 is
also set if the right nibble is the first part of the path.)
If present, the serialisation of the payload field can be either for account
data, for RLP encoded or for unstructured data as defined below.
### Leaf record payload serialisation for account data
0 +-- .. --+
| | -- nonce, 0 or 8 bytes
+-- .. --+--+
| | -- balance, 0, 8, or 32 bytes
+-- .. --+--+
| | -- storage ID, 0 or 8 bytes
+-- .. --+--+
| | -- code hash, 0, 8 or 32 bytes
+--+ .. --+--+
| | -- bitmask(2)-word array
+--+
where each bitmask(2)-word array entry defines the length of
the preceeding data fields:
00 -- field is missing
01 -- field lengthh is 8 bytes
10 -- field lengthh is 32 bytes
Apparently, entries 0 and and 2 of the bitmask(2) word array cannot have the
value 10 as they refer to the nonce and the storage ID data fields. So, joining
the bitmask(2)-word array to a single byte, the maximum value of that byte is
0x99.
### Leaf record payload serialisation for RLP encoded data
0 +--+ .. --+
| | | -- data, at least one byte
+--+ .. --+
| | -- marker byte
+--+
where the marker byte is 0xaa
### Leaf record payload serialisation for unstructured data
0 +--+ .. --+
| | | -- data, at least one byte
+--+ .. --+
| | -- marker byte
+--+
where the marker byte is 0xff
### Descriptor record serialisation
0 +-- ..
... -- recycled vertexIDs
+--+--+--+--+--+--+--+--+--+
| | -- bottom of unused vertexIDs
+--+--+--+--+--+--+--+--+--+
+--+--+--+--+--+--+--+--+
| | -- bottom of unused vertexIDs
+--+--+--+--+--+--+--+--+
|| | -- marker(2) + unused(6)
+--+

View File

@ -43,7 +43,15 @@ proc toNodeBe(
## Similar to `toNode()` but fetching from the backend only
case vtx.vType:
of Leaf:
return ok NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData)
let node = NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData)
if vtx.lData.pType == AccountData:
let vid = vtx.lData.account.storageID
if vid.isValid:
let rc = db.getKeyBackend vid
if rc.isErr or not rc.value.isValid:
return err(vid)
node.key[0] = rc.value
return ok node
of Branch:
let node = NodeRef(vType: Branch, bVid: vtx.bVid)
var missing: seq[VertexID]

View File

@ -24,7 +24,7 @@ const
EmptyVidSeq* = seq[VertexID].default
## Useful shortcut
VOID_CODE_KEY* = EMPTY_CODE_HASH.to(HashKey)
VOID_CODE_HASH* = EMPTY_CODE_HASH
## Equivalent of `nil` for `Account` object code hash
VOID_HASH_KEY* = EMPTY_ROOT_HASH.to(HashKey)

View File

@ -97,8 +97,6 @@ proc ppKey(key: HashKey): string =
return "£ø"
if key == VOID_HASH_KEY:
return "£r"
if key == VOID_CODE_KEY:
return "£c"
"%" & key.ByteArray32
.mapIt(it.toHex(2)).join.tolowerAscii
@ -109,8 +107,6 @@ proc ppLabel(lbl: HashLabel; db: AristoDbRef): string =
return "£ø"
if lbl.key == VOID_HASH_KEY:
return "£r"
if lbl.key == VOID_CODE_KEY:
return "£c"
let rid = if not lbl.root.isValid: "ø:"
else: ($lbl.root.uint64.toHex).stripZeros & ":"
@ -132,8 +128,7 @@ proc ppRootKey(a: HashKey): string =
return a.ppKey
proc ppCodeKey(a: HashKey): string =
if a != VOID_CODE_KEY:
return a.ppKey
a.ppKey
proc ppLeafTie(lty: LeafTie, db: AristoDbRef): string =
if not db.top.isNil:
@ -157,14 +152,22 @@ proc ppPayload(p: PayloadRef, db: AristoDbRef): string =
result = "n/a"
else:
case p.pType:
of BlobData:
result &= p.blob.toHex.squeeze(hex=true)
of RawData:
result &= p.rawBlob.toHex.squeeze(hex=true)
of RlpData:
result &= "(" & p.rlpBlob.toHex.squeeze(hex=true) & ")"
of AccountData:
result = "("
result &= $p.account.nonce & ","
result &= $p.account.balance & ","
result &= p.account.storageRoot.to(HashKey).ppRootKey() & ","
result &= p.account.storageID.ppVid & ","
result &= p.account.codeHash.to(HashKey).ppCodeKey() & ")"
of LegacyAccount:
result = "("
result &= $p.legaAcc.nonce & ","
result &= $p.legaAcc.balance & ","
result &= p.legaAcc.storageRoot.to(HashKey).ppRootKey() & ","
result &= p.legaAcc.codeHash.to(HashKey).ppCodeKey() & ")"
proc ppVtx(nd: VertexRef, db: AristoDbRef, vid: VertexID): string =
if not nd.isValid:

View File

@ -42,6 +42,12 @@ type
DeblobLeafGotExtPrefix
DeblobSizeGarbled
DeblobWrongType
DeblobPayloadTooShortInt64
DeblobPayloadTooShortInt256
DeblobNonceLenUnsupported
DeblobBalanceLenUnsupported
DeblobStorageLenUnsupported
DeblobCodeLenUnsupported
# Converter `asNode()`, currenly for unit tests only
CacheMissingNodekeys

View File

@ -168,8 +168,11 @@ func `==`*(a, b: HashKey): bool =
## Table/KeyedQueue mixin
a.ByteArray32 == b.ByteArray32
func read*[T: HashID|HashKey](rlp: var Rlp, W: type T): T
{.gcsafe, raises: [RlpError].} =
func read*[T: HashID|HashKey](
rlp: var Rlp;
W: type T;
): T
{.gcsafe, raises: [RlpError].} =
rlp.read(Hash256).to(T)
func append*(writer: var RlpWriter, val: HashID|HashKey) =

View File

@ -25,18 +25,42 @@ type
Extension
Branch
AristoAccount* = object
nonce*: AccountNonce ## Some `uint64` type
balance*: UInt256
storageID*: VertexID ## Implies storage root Merkle hash key
codeHash*: Hash256
PayloadType* = enum
## Type of leaf data (to be extended)
BlobData ## Generic data, typically RLP encoded
AccountData ## Legacy `Account` with hash references
# AristoAccount ## `Aristo account` with vertex IDs links
## Type of leaf data. On the Aristo backend, data are serialised as
## follows:
##
## * Opaque data => opaque data, marked `0xff`
## * `Account` object => RLP encoded data, marked `0xaa`
## * `AristoAccount` object => serialised account, marked `0x99` or smaller
##
## On deserialisation from the Aristo backend, there is no reverese for an
## `Account` object. It rather is kept as an RLP encoded `Blob`.
##
## * opaque data, marked `0xff` => `RawData`
## * RLP encoded data, marked `0xaa` => `RlpData`
## * erialised account, marked `0x99` or smaller => `AccountData`
##
RawData ## Generic data
RlpData ## Marked RLP encoded
AccountData ## `Aristo account` with vertex IDs links
LegacyAccount ## Legacy `Account` with hash references
PayloadRef* = ref object
case pType*: PayloadType
of BlobData:
blob*: Blob ## Opaque data value reference
of RawData:
rawBlob*: Blob ## Opaque data, default value
of RlpData:
rlpBlob*: Blob ## Opaque data marked RLP encoded
of AccountData:
account*: Account ## Expanded accounting data
account*: AristoAccount
of LegacyAccount:
legaAcc*: Account ## Expanded accounting data
VertexRef* = ref object of RootRef
## Vertex for building a hexary Patricia or Merkle Patricia Trie
@ -54,7 +78,7 @@ type
## Combined record for a *traditional* ``Merkle Patricia Tree` node merged
## with a structural `VertexRef` type object.
error*: AristoError ## Can be used for error signalling
key*: array[16,HashKey] ## Merkle hash/es for Branch & Extension
key*: array[16,HashKey] ## Merkle hash/es for vertices
# ------------------------------------------------------------------------------
# Public helpers: `NodeRef` and `PayloadRef`
@ -70,12 +94,18 @@ proc `==`*(a, b: PayloadRef): bool =
if a.pType != b.pType:
return false
case a.pType:
of BlobData:
if a.blob != b.blob:
of RawData:
if a.rawBlob != b.rawBlob:
return false
of RlpData:
if a.rlpBlob != b.rlpBlob:
return false
of AccountData:
if a.account != b.account:
return false
of LegacyAccount:
if a.legaAcc != b.legaAcc:
return false
true
proc `==`*(a, b: VertexRef): bool =
@ -120,25 +150,25 @@ proc `==`*(a, b: NodeRef): bool =
# Public helpers, miscellaneous functions
# ------------------------------------------------------------------------------
proc convertTo*(payload: PayloadRef; T: type Blob): T =
## Probably lossy conversion as the storage type `kind` gets missing
case payload.pType:
of BlobData:
result = payload.blob
of AccountData:
result = rlp.encode payload.account
proc dup*(pld: PayloadRef): PayloadRef =
## Duplicate payload.
case pld.pType:
of BlobData:
of RawData:
PayloadRef(
pType: BlobData,
blob: pld.blob)
pType: RawData,
rawBlob: pld.rawBlob)
of RlpData:
PayloadRef(
pType: RlpData,
rlpBlob: pld.rlpBlob)
of AccountData:
PayloadRef(
pType: AccountData,
account: pld.account)
of LegacyAccount:
PayloadRef(
pType: LegacyAccount,
legaAcc: pld.legaAcc)
proc dup*(vtx: VertexRef): VertexRef =
## Duplicate vertex.

View File

@ -29,7 +29,16 @@ proc toNode*(
## Convert argument vertex to node
case vtx.vType:
of Leaf:
return ok NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData)
let node = NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData)
# Need to resolve storage root for account leaf
if vtx.lData.pType == AccountData:
let vid = vtx.lData.account.storageID
if vid.isValid:
let key = db.getKey vid
if not key.isValid:
return err(@[vid])
node.key[0] = key
return ok node
of Branch:
let node = NodeRef(vType: Branch, bVid: vtx.bVid)
var missing: seq[VertexID]

View File

@ -12,18 +12,50 @@
import
std/[bitops, sequtils],
eth/[common, trie/nibbles],
eth/[common, rlp, trie/nibbles],
stew/results,
"."/[aristo_constants, aristo_desc]
# ------------------------------------------------------------------------------
# Private functions
# Private helper
# ------------------------------------------------------------------------------
proc aristoError(error: AristoError): NodeRef =
## Allows returning de
NodeRef(vType: Leaf, error: error)
proc load64(data: Blob; start: var int): Result[uint64,AristoError] =
if data.len < start + 9:
return err(DeblobPayloadTooShortInt64)
let val = uint64.fromBytesBE(data[start ..< start + 8])
start += 8
ok val
proc load256(data: Blob; start: var int): Result[UInt256,AristoError] =
if data.len < start + 33:
return err(DeblobPayloadTooShortInt256)
let val = UInt256.fromBytesBE(data[start ..< start + 32])
start += 32
ok val
proc toPayloadBlob(node: NodeRef): Blob =
## Probably lossy conversion as the storage type `kind` gets missing
let pyl = node.lData
case pyl.pType:
of RawData:
result = pyl.rawBlob
of RlpData:
result = pyl.rlpBlob
of LegacyAccount:
result = rlp.encode pyl.legaAcc
of AccountData:
let key = if pyl.account.storageID.isValid: node.key[0] else: VOID_HASH_KEY
result = rlp.encode Account(
nonce: pyl.account.nonce,
balance: pyl.account.balance,
storageRoot: key.to(Hash256),
codeHash: pyl.account.codeHash)
# ------------------------------------------------------------------------------
# Public RLP transcoder mixins
# ------------------------------------------------------------------------------
@ -71,11 +103,11 @@ proc read*(
let (isLeaf, pathSegment) = hexPrefixDecode blobs[0]
if isLeaf:
return NodeRef(
vType: Leaf,
lPfx: pathSegment,
lData: PayloadRef(
pType: BlobData,
blob: blobs[1]))
vType: Leaf,
lPfx: pathSegment,
lData: PayloadRef(
pType: RawData,
rawBlob: blobs[1]))
else:
var node = NodeRef(
vType: Extension,
@ -121,12 +153,46 @@ proc append*(writer: var RlpWriter; node: NodeRef) =
of Leaf:
writer.startList(2)
writer.append node.lPfx.hexPrefixEncode(isleaf = true)
writer.append node.lData.convertTo(Blob)
writer.append node.toPayloadBlob
# ------------------------------------------------------------------------------
# Public db record transcoders
# Private functions
# ------------------------------------------------------------------------------
proc blobify*(pyl: PayloadRef): Blob =
if pyl.isNil:
return
case pyl.pType
of RawData:
result = pyl.rawBlob & @[0xff.byte]
of RlpData:
result = pyl.rlpBlob & @[0xaa.byte]
of LegacyAccount:
result = pyl.legaAcc.encode & @[0xaa.byte] # also RLP data
of AccountData:
var mask: byte
if 0 < pyl.account.nonce:
mask = mask or 0x01
result &= pyl.account.nonce.uint64.toBytesBE.toSeq
if high(uint64).u256 < pyl.account.balance:
mask = mask or 0x08
result &= pyl.account.balance.UInt256.toBytesBE.toSeq
elif 0 < pyl.account.balance:
mask = mask or 0x04
result &= pyl.account.balance.truncate(uint64).uint64.toBytesBE.toSeq
if VertexID(0) < pyl.account.storageID:
mask = mask or 0x10
result &= pyl.account.storageID.uint64.toBytesBE.toSeq
if pyl.account.codeHash != VOID_CODE_HASH:
mask = mask or 0x80
result &= pyl.account.codeHash.data.toSeq
result &= @[mask]
proc blobify*(vtx: VertexRef; data: var Blob): AristoError =
## This function serialises the vertex argument to a database record.
## Contrary to RLP based serialisation, these records aim to align on
@ -181,7 +247,8 @@ proc blobify*(vtx: VertexRef; data: var Blob): AristoError =
psLen = pSegm.len.byte
if psLen == 0 or 33 < psLen:
return BlobifyLeafPathOverflow
data = vtx.lData.convertTo(Blob) & pSegm & @[0xC0u8 or psLen]
data = vtx.lData.blobify & pSegm & @[0xC0u8 or psLen]
proc blobify*(vtx: VertexRef): Result[Blob, AristoError] =
## Variant of `blobify()`
@ -192,7 +259,6 @@ proc blobify*(vtx: VertexRef): Result[Blob, AristoError] =
return err(info)
ok(data)
proc blobify*(vGen: openArray[VertexID]; data: var Blob) =
## This function serialises the key generator used in the `AristoDb`
## descriptor.
@ -213,6 +279,73 @@ proc blobify*(vGen: openArray[VertexID]): Blob =
## Variant of `blobify()`
vGen.blobify result
# -------------
proc deblobify(data: Blob; pyl: var PayloadRef): AristoError =
if data.len == 0:
pyl = PayloadRef(pType: RawData)
return
let mask = data[^1]
if mask == 0xff:
pyl = PayloadRef(pType: RawData, rawBlob: data[0 .. ^2])
return
if mask == 0xaa:
pyl = PayloadRef(pType: RlpData, rlpBlob: data[0 .. ^2])
return
var
pAcc = PayloadRef(pType: AccountData)
start = 0
case mask and 0x03:
of 0x00:
discard
of 0x01:
let rc = data.load64 start
if rc.isErr:
return rc.error
pAcc.account.nonce = rc.value.AccountNonce
else:
return DeblobNonceLenUnsupported
case mask and 0x0c:
of 0x00:
discard
of 0x04:
let rc = data.load64 start
if rc.isErr:
return rc.error
pAcc.account.balance = rc.value.u256
of 0x08:
let rc = data.load256 start
if rc.isErr:
return rc.error
pAcc.account.balance = rc.value
else:
return DeblobBalanceLenUnsupported
case mask and 0x30:
of 0x00:
discard
of 0x10:
let rc = data.load64 start
if rc.isErr:
return rc.error
pAcc.account.storageID = rc.value.VertexID
else:
return DeblobStorageLenUnsupported
case mask and 0xc0:
of 0x00:
discard
of 0x80:
if data.len < start + 33:
return DeblobPayloadTooShortInt256
(addr pAcc.account.codeHash.data[0]).copyMem(unsafeAddr data[start], 32)
else:
return DeblobCodeLenUnsupported
pyl = pacc
proc deblobify*(record: Blob; vtx: var VertexRef): AristoError =
## De-serialise a data record encoded with `blobify()`. The second
@ -272,15 +405,18 @@ proc deblobify*(record: Blob; vtx: var VertexRef): AristoError =
let (isLeaf, pathSegment) = hexPrefixDecode record[pLen ..< rLen]
if not isLeaf:
return DeblobLeafGotExtPrefix
var pyl: PayloadRef
let err = record[0 ..< plen].deblobify(pyl)
if err != AristoError(0):
return err
vtx = VertexRef(
vType: Leaf,
lPfx: pathSegment,
lData: PayloadRef(
pType: BlobData,
blob: record[0 ..< plen]))
vType: Leaf,
lPfx: pathSegment,
lData: pyl)
else:
return DeblobUnknown
proc deblobify*(data: Blob; T: type VertexRef): Result[T,AristoError] =
## Variant of `deblobify()` for vertex deserialisation.
var vtx = T(nil) # will be auto-initialised
@ -289,7 +425,6 @@ proc deblobify*(data: Blob; T: type VertexRef): Result[T,AristoError] =
return err(info)
ok vtx
proc deblobify*(data: Blob; vGen: var seq[VertexID]): AristoError =
## De-serialise the data record encoded with `blobify()` into the vertex ID
## generator argument `vGen`.

View File

@ -265,11 +265,11 @@ when isMainModule:
setErrorLevel()
when true and false:
when true: # and false:
noisy.miscRunner()
# Borrowed from `test_sync_snap.nim`
when true and false:
when true: # and false:
for n,sam in snapTestList:
noisy.transcodeRunner(sam)
for n,sam in snapTestStorageList:

View File

@ -153,7 +153,7 @@ proc to*(ua: seq[UndumpAccounts]; T: type seq[ProofTrieData]): T =
leafTie: LeafTie(
root: rootVid,
path: it.accKey.to(HashKey).to(HashID)),
payload: PayloadRef(pType: BlobData, blob: it.accBlob))))
payload: PayloadRef(pType: RawData, rawBlob: it.accBlob))))
proc to*(us: seq[UndumpStorages]; T: type seq[ProofTrieData]): T =
var (rootKey, rootVid) = (VOID_HASH_KEY, VertexID(0))
@ -170,7 +170,7 @@ proc to*(us: seq[UndumpStorages]; T: type seq[ProofTrieData]): T =
leafTie: LeafTie(
root: rootVid,
path: it.slotHash.to(HashKey).to(HashID)),
payload: PayloadRef(pType: BlobData, blob: it.slotData))))
payload: PayloadRef(pType: RawData, rawBlob: it.slotData))))
if 0 < result.len:
result[^1].proof = s.data.proof

View File

@ -89,7 +89,7 @@ proc test_transcodeAccounts*(
var
adb = AristoDbRef.init BackendNone
count = -1
for (n, key,value) in rocky.walkAllDb():
for (n, key, value) in rocky.walkAllDb():
if stopAfter < n:
break
count = n
@ -112,10 +112,10 @@ proc test_transcodeAccounts*(
else:
case node.vType:
of aristo_desc.Leaf:
let account = node.lData.blob.decode(Account)
node.lData = PayloadRef(pType: AccountData, account: account)
discard adb.hashToVtxID(VertexID(1), node.lData.account.storageRoot)
discard adb.hashToVtxID(VertexID(1), node.lData.account.codeHash)
let account = node.lData.rawBlob.decode(Account)
node.lData = PayloadRef(pType: LegacyAccount, legaAcc: account)
discard adb.hashToVtxID(VertexID(1), node.lData.legaAcc.storageRoot)
discard adb.hashToVtxID(VertexID(1), node.lData.legaAcc.codeHash)
of aristo_desc.Extension:
# key <-> vtx correspondence
check node.key[0] == node0.key[0]
@ -146,8 +146,9 @@ proc test_transcodeAccounts*(
block:
# `deblobify()` will always decode to `BlobData` type payload
if node1.vType == aristo_desc.Leaf:
let account = node1.lData.blob.decode(Account)
node1.lData = PayloadRef(pType: AccountData, account: account)
# Node that deserialisation of the account stops at the RLP encoding
let account = node1.lData.rlpBlob.decode(Account)
node1.lData = PayloadRef(pType: LegacyAccount, legaAcc: account)
if node != node1:
check node == node1