diff --git a/nimbus/db/aristo/README.md b/nimbus/db/aristo/README.md index 8c19b23dc..efd68caba 100644 --- a/nimbus/db/aristo/README.md +++ b/nimbus/db/aristo/README.md @@ -267,13 +267,61 @@ A *Leaf* record path segment is compact encoded. So it has at least one byte. The first byte *P0* has bit 5 set, i.e. *P0 and 0x20* is non-zero (bit 4 is also set if the right nibble is the first part of the path.) +If present, the serialisation of the payload field can be either for account +data, for RLP encoded or for unstructured data as defined below. + +### Leaf record payload serialisation for account data + + 0 +-- .. --+ + | | -- nonce, 0 or 8 bytes + +-- .. --+--+ + | | -- balance, 0, 8, or 32 bytes + +-- .. --+--+ + | | -- storage ID, 0 or 8 bytes + +-- .. --+--+ + | | -- code hash, 0, 8 or 32 bytes + +--+ .. --+--+ + | | -- bitmask(2)-word array + +--+ + + where each bitmask(2)-word array entry defines the length of + the preceeding data fields: + 00 -- field is missing + 01 -- field lengthh is 8 bytes + 10 -- field lengthh is 32 bytes + +Apparently, entries 0 and and 2 of the bitmask(2) word array cannot have the +value 10 as they refer to the nonce and the storage ID data fields. So, joining +the bitmask(2)-word array to a single byte, the maximum value of that byte is +0x99. + +### Leaf record payload serialisation for RLP encoded data + + 0 +--+ .. --+ + | | | -- data, at least one byte + +--+ .. --+ + | | -- marker byte + +--+ + + where the marker byte is 0xaa + +### Leaf record payload serialisation for unstructured data + + 0 +--+ .. --+ + | | | -- data, at least one byte + +--+ .. --+ + | | -- marker byte + +--+ + + where the marker byte is 0xff + ### Descriptor record serialisation 0 +-- .. ... -- recycled vertexIDs - +--+--+--+--+--+--+--+--+--+ - | | -- bottom of unused vertexIDs - +--+--+--+--+--+--+--+--+--+ + +--+--+--+--+--+--+--+--+ + | | -- bottom of unused vertexIDs + +--+--+--+--+--+--+--+--+ || | -- marker(2) + unused(6) +--+ diff --git a/nimbus/db/aristo/aristo_check/check_be.nim b/nimbus/db/aristo/aristo_check/check_be.nim index 8bef60cb8..d5990bae3 100644 --- a/nimbus/db/aristo/aristo_check/check_be.nim +++ b/nimbus/db/aristo/aristo_check/check_be.nim @@ -43,7 +43,15 @@ proc toNodeBe( ## Similar to `toNode()` but fetching from the backend only case vtx.vType: of Leaf: - return ok NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData) + let node = NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData) + if vtx.lData.pType == AccountData: + let vid = vtx.lData.account.storageID + if vid.isValid: + let rc = db.getKeyBackend vid + if rc.isErr or not rc.value.isValid: + return err(vid) + node.key[0] = rc.value + return ok node of Branch: let node = NodeRef(vType: Branch, bVid: vtx.bVid) var missing: seq[VertexID] diff --git a/nimbus/db/aristo/aristo_constants.nim b/nimbus/db/aristo/aristo_constants.nim index 2011a84c2..d0629bfdf 100644 --- a/nimbus/db/aristo/aristo_constants.nim +++ b/nimbus/db/aristo/aristo_constants.nim @@ -24,7 +24,7 @@ const EmptyVidSeq* = seq[VertexID].default ## Useful shortcut - VOID_CODE_KEY* = EMPTY_CODE_HASH.to(HashKey) + VOID_CODE_HASH* = EMPTY_CODE_HASH ## Equivalent of `nil` for `Account` object code hash VOID_HASH_KEY* = EMPTY_ROOT_HASH.to(HashKey) diff --git a/nimbus/db/aristo/aristo_debug.nim b/nimbus/db/aristo/aristo_debug.nim index 89cc903f5..d814fbc71 100644 --- a/nimbus/db/aristo/aristo_debug.nim +++ b/nimbus/db/aristo/aristo_debug.nim @@ -97,8 +97,6 @@ proc ppKey(key: HashKey): string = return "£ø" if key == VOID_HASH_KEY: return "£r" - if key == VOID_CODE_KEY: - return "£c" "%" & key.ByteArray32 .mapIt(it.toHex(2)).join.tolowerAscii @@ -109,8 +107,6 @@ proc ppLabel(lbl: HashLabel; db: AristoDbRef): string = return "£ø" if lbl.key == VOID_HASH_KEY: return "£r" - if lbl.key == VOID_CODE_KEY: - return "£c" let rid = if not lbl.root.isValid: "ø:" else: ($lbl.root.uint64.toHex).stripZeros & ":" @@ -132,8 +128,7 @@ proc ppRootKey(a: HashKey): string = return a.ppKey proc ppCodeKey(a: HashKey): string = - if a != VOID_CODE_KEY: - return a.ppKey + a.ppKey proc ppLeafTie(lty: LeafTie, db: AristoDbRef): string = if not db.top.isNil: @@ -157,14 +152,22 @@ proc ppPayload(p: PayloadRef, db: AristoDbRef): string = result = "n/a" else: case p.pType: - of BlobData: - result &= p.blob.toHex.squeeze(hex=true) + of RawData: + result &= p.rawBlob.toHex.squeeze(hex=true) + of RlpData: + result &= "(" & p.rlpBlob.toHex.squeeze(hex=true) & ")" of AccountData: result = "(" result &= $p.account.nonce & "," result &= $p.account.balance & "," - result &= p.account.storageRoot.to(HashKey).ppRootKey() & "," + result &= p.account.storageID.ppVid & "," result &= p.account.codeHash.to(HashKey).ppCodeKey() & ")" + of LegacyAccount: + result = "(" + result &= $p.legaAcc.nonce & "," + result &= $p.legaAcc.balance & "," + result &= p.legaAcc.storageRoot.to(HashKey).ppRootKey() & "," + result &= p.legaAcc.codeHash.to(HashKey).ppCodeKey() & ")" proc ppVtx(nd: VertexRef, db: AristoDbRef, vid: VertexID): string = if not nd.isValid: diff --git a/nimbus/db/aristo/aristo_desc/aristo_error.nim b/nimbus/db/aristo/aristo_desc/aristo_error.nim index 38e2108b1..810d8f29e 100644 --- a/nimbus/db/aristo/aristo_desc/aristo_error.nim +++ b/nimbus/db/aristo/aristo_desc/aristo_error.nim @@ -42,6 +42,12 @@ type DeblobLeafGotExtPrefix DeblobSizeGarbled DeblobWrongType + DeblobPayloadTooShortInt64 + DeblobPayloadTooShortInt256 + DeblobNonceLenUnsupported + DeblobBalanceLenUnsupported + DeblobStorageLenUnsupported + DeblobCodeLenUnsupported # Converter `asNode()`, currenly for unit tests only CacheMissingNodekeys diff --git a/nimbus/db/aristo/aristo_desc/aristo_types_identifiers.nim b/nimbus/db/aristo/aristo_desc/aristo_types_identifiers.nim index 750fbcaeb..e14fd7275 100644 --- a/nimbus/db/aristo/aristo_desc/aristo_types_identifiers.nim +++ b/nimbus/db/aristo/aristo_desc/aristo_types_identifiers.nim @@ -168,8 +168,11 @@ func `==`*(a, b: HashKey): bool = ## Table/KeyedQueue mixin a.ByteArray32 == b.ByteArray32 -func read*[T: HashID|HashKey](rlp: var Rlp, W: type T): T - {.gcsafe, raises: [RlpError].} = +func read*[T: HashID|HashKey]( + rlp: var Rlp; + W: type T; + ): T + {.gcsafe, raises: [RlpError].} = rlp.read(Hash256).to(T) func append*(writer: var RlpWriter, val: HashID|HashKey) = diff --git a/nimbus/db/aristo/aristo_desc/aristo_types_structural.nim b/nimbus/db/aristo/aristo_desc/aristo_types_structural.nim index bfbae49a4..7ff7d0b3c 100644 --- a/nimbus/db/aristo/aristo_desc/aristo_types_structural.nim +++ b/nimbus/db/aristo/aristo_desc/aristo_types_structural.nim @@ -25,18 +25,42 @@ type Extension Branch + AristoAccount* = object + nonce*: AccountNonce ## Some `uint64` type + balance*: UInt256 + storageID*: VertexID ## Implies storage root Merkle hash key + codeHash*: Hash256 + PayloadType* = enum - ## Type of leaf data (to be extended) - BlobData ## Generic data, typically RLP encoded - AccountData ## Legacy `Account` with hash references - # AristoAccount ## `Aristo account` with vertex IDs links + ## Type of leaf data. On the Aristo backend, data are serialised as + ## follows: + ## + ## * Opaque data => opaque data, marked `0xff` + ## * `Account` object => RLP encoded data, marked `0xaa` + ## * `AristoAccount` object => serialised account, marked `0x99` or smaller + ## + ## On deserialisation from the Aristo backend, there is no reverese for an + ## `Account` object. It rather is kept as an RLP encoded `Blob`. + ## + ## * opaque data, marked `0xff` => `RawData` + ## * RLP encoded data, marked `0xaa` => `RlpData` + ## * erialised account, marked `0x99` or smaller => `AccountData` + ## + RawData ## Generic data + RlpData ## Marked RLP encoded + AccountData ## `Aristo account` with vertex IDs links + LegacyAccount ## Legacy `Account` with hash references PayloadRef* = ref object case pType*: PayloadType - of BlobData: - blob*: Blob ## Opaque data value reference + of RawData: + rawBlob*: Blob ## Opaque data, default value + of RlpData: + rlpBlob*: Blob ## Opaque data marked RLP encoded of AccountData: - account*: Account ## Expanded accounting data + account*: AristoAccount + of LegacyAccount: + legaAcc*: Account ## Expanded accounting data VertexRef* = ref object of RootRef ## Vertex for building a hexary Patricia or Merkle Patricia Trie @@ -54,7 +78,7 @@ type ## Combined record for a *traditional* ``Merkle Patricia Tree` node merged ## with a structural `VertexRef` type object. error*: AristoError ## Can be used for error signalling - key*: array[16,HashKey] ## Merkle hash/es for Branch & Extension + key*: array[16,HashKey] ## Merkle hash/es for vertices # ------------------------------------------------------------------------------ # Public helpers: `NodeRef` and `PayloadRef` @@ -70,12 +94,18 @@ proc `==`*(a, b: PayloadRef): bool = if a.pType != b.pType: return false case a.pType: - of BlobData: - if a.blob != b.blob: + of RawData: + if a.rawBlob != b.rawBlob: + return false + of RlpData: + if a.rlpBlob != b.rlpBlob: return false of AccountData: if a.account != b.account: return false + of LegacyAccount: + if a.legaAcc != b.legaAcc: + return false true proc `==`*(a, b: VertexRef): bool = @@ -120,25 +150,25 @@ proc `==`*(a, b: NodeRef): bool = # Public helpers, miscellaneous functions # ------------------------------------------------------------------------------ -proc convertTo*(payload: PayloadRef; T: type Blob): T = - ## Probably lossy conversion as the storage type `kind` gets missing - case payload.pType: - of BlobData: - result = payload.blob - of AccountData: - result = rlp.encode payload.account - proc dup*(pld: PayloadRef): PayloadRef = ## Duplicate payload. case pld.pType: - of BlobData: + of RawData: PayloadRef( - pType: BlobData, - blob: pld.blob) + pType: RawData, + rawBlob: pld.rawBlob) + of RlpData: + PayloadRef( + pType: RlpData, + rlpBlob: pld.rlpBlob) of AccountData: PayloadRef( pType: AccountData, account: pld.account) + of LegacyAccount: + PayloadRef( + pType: LegacyAccount, + legaAcc: pld.legaAcc) proc dup*(vtx: VertexRef): VertexRef = ## Duplicate vertex. diff --git a/nimbus/db/aristo/aristo_hashify/hashify_helper.nim b/nimbus/db/aristo/aristo_hashify/hashify_helper.nim index 32ddc33e2..1d8600c97 100644 --- a/nimbus/db/aristo/aristo_hashify/hashify_helper.nim +++ b/nimbus/db/aristo/aristo_hashify/hashify_helper.nim @@ -29,7 +29,16 @@ proc toNode*( ## Convert argument vertex to node case vtx.vType: of Leaf: - return ok NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData) + let node = NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData) + # Need to resolve storage root for account leaf + if vtx.lData.pType == AccountData: + let vid = vtx.lData.account.storageID + if vid.isValid: + let key = db.getKey vid + if not key.isValid: + return err(@[vid]) + node.key[0] = key + return ok node of Branch: let node = NodeRef(vType: Branch, bVid: vtx.bVid) var missing: seq[VertexID] diff --git a/nimbus/db/aristo/aristo_transcode.nim b/nimbus/db/aristo/aristo_transcode.nim index 10610b8e6..3d10c03e5 100644 --- a/nimbus/db/aristo/aristo_transcode.nim +++ b/nimbus/db/aristo/aristo_transcode.nim @@ -12,18 +12,50 @@ import std/[bitops, sequtils], - eth/[common, trie/nibbles], + eth/[common, rlp, trie/nibbles], stew/results, "."/[aristo_constants, aristo_desc] # ------------------------------------------------------------------------------ -# Private functions +# Private helper # ------------------------------------------------------------------------------ proc aristoError(error: AristoError): NodeRef = ## Allows returning de NodeRef(vType: Leaf, error: error) +proc load64(data: Blob; start: var int): Result[uint64,AristoError] = + if data.len < start + 9: + return err(DeblobPayloadTooShortInt64) + let val = uint64.fromBytesBE(data[start ..< start + 8]) + start += 8 + ok val + +proc load256(data: Blob; start: var int): Result[UInt256,AristoError] = + if data.len < start + 33: + return err(DeblobPayloadTooShortInt256) + let val = UInt256.fromBytesBE(data[start ..< start + 32]) + start += 32 + ok val + +proc toPayloadBlob(node: NodeRef): Blob = + ## Probably lossy conversion as the storage type `kind` gets missing + let pyl = node.lData + case pyl.pType: + of RawData: + result = pyl.rawBlob + of RlpData: + result = pyl.rlpBlob + of LegacyAccount: + result = rlp.encode pyl.legaAcc + of AccountData: + let key = if pyl.account.storageID.isValid: node.key[0] else: VOID_HASH_KEY + result = rlp.encode Account( + nonce: pyl.account.nonce, + balance: pyl.account.balance, + storageRoot: key.to(Hash256), + codeHash: pyl.account.codeHash) + # ------------------------------------------------------------------------------ # Public RLP transcoder mixins # ------------------------------------------------------------------------------ @@ -71,11 +103,11 @@ proc read*( let (isLeaf, pathSegment) = hexPrefixDecode blobs[0] if isLeaf: return NodeRef( - vType: Leaf, - lPfx: pathSegment, - lData: PayloadRef( - pType: BlobData, - blob: blobs[1])) + vType: Leaf, + lPfx: pathSegment, + lData: PayloadRef( + pType: RawData, + rawBlob: blobs[1])) else: var node = NodeRef( vType: Extension, @@ -121,12 +153,46 @@ proc append*(writer: var RlpWriter; node: NodeRef) = of Leaf: writer.startList(2) writer.append node.lPfx.hexPrefixEncode(isleaf = true) - writer.append node.lData.convertTo(Blob) + writer.append node.toPayloadBlob # ------------------------------------------------------------------------------ -# Public db record transcoders +# Private functions # ------------------------------------------------------------------------------ +proc blobify*(pyl: PayloadRef): Blob = + if pyl.isNil: + return + case pyl.pType + of RawData: + result = pyl.rawBlob & @[0xff.byte] + of RlpData: + result = pyl.rlpBlob & @[0xaa.byte] + of LegacyAccount: + result = pyl.legaAcc.encode & @[0xaa.byte] # also RLP data + + of AccountData: + var mask: byte + if 0 < pyl.account.nonce: + mask = mask or 0x01 + result &= pyl.account.nonce.uint64.toBytesBE.toSeq + + if high(uint64).u256 < pyl.account.balance: + mask = mask or 0x08 + result &= pyl.account.balance.UInt256.toBytesBE.toSeq + elif 0 < pyl.account.balance: + mask = mask or 0x04 + result &= pyl.account.balance.truncate(uint64).uint64.toBytesBE.toSeq + + if VertexID(0) < pyl.account.storageID: + mask = mask or 0x10 + result &= pyl.account.storageID.uint64.toBytesBE.toSeq + + if pyl.account.codeHash != VOID_CODE_HASH: + mask = mask or 0x80 + result &= pyl.account.codeHash.data.toSeq + + result &= @[mask] + proc blobify*(vtx: VertexRef; data: var Blob): AristoError = ## This function serialises the vertex argument to a database record. ## Contrary to RLP based serialisation, these records aim to align on @@ -181,7 +247,8 @@ proc blobify*(vtx: VertexRef; data: var Blob): AristoError = psLen = pSegm.len.byte if psLen == 0 or 33 < psLen: return BlobifyLeafPathOverflow - data = vtx.lData.convertTo(Blob) & pSegm & @[0xC0u8 or psLen] + data = vtx.lData.blobify & pSegm & @[0xC0u8 or psLen] + proc blobify*(vtx: VertexRef): Result[Blob, AristoError] = ## Variant of `blobify()` @@ -192,7 +259,6 @@ proc blobify*(vtx: VertexRef): Result[Blob, AristoError] = return err(info) ok(data) - proc blobify*(vGen: openArray[VertexID]; data: var Blob) = ## This function serialises the key generator used in the `AristoDb` ## descriptor. @@ -213,6 +279,73 @@ proc blobify*(vGen: openArray[VertexID]): Blob = ## Variant of `blobify()` vGen.blobify result +# ------------- + +proc deblobify(data: Blob; pyl: var PayloadRef): AristoError = + if data.len == 0: + pyl = PayloadRef(pType: RawData) + return + + let mask = data[^1] + if mask == 0xff: + pyl = PayloadRef(pType: RawData, rawBlob: data[0 .. ^2]) + return + if mask == 0xaa: + pyl = PayloadRef(pType: RlpData, rlpBlob: data[0 .. ^2]) + return + var + pAcc = PayloadRef(pType: AccountData) + start = 0 + + case mask and 0x03: + of 0x00: + discard + of 0x01: + let rc = data.load64 start + if rc.isErr: + return rc.error + pAcc.account.nonce = rc.value.AccountNonce + else: + return DeblobNonceLenUnsupported + + case mask and 0x0c: + of 0x00: + discard + of 0x04: + let rc = data.load64 start + if rc.isErr: + return rc.error + pAcc.account.balance = rc.value.u256 + of 0x08: + let rc = data.load256 start + if rc.isErr: + return rc.error + pAcc.account.balance = rc.value + else: + return DeblobBalanceLenUnsupported + + case mask and 0x30: + of 0x00: + discard + of 0x10: + let rc = data.load64 start + if rc.isErr: + return rc.error + pAcc.account.storageID = rc.value.VertexID + else: + return DeblobStorageLenUnsupported + + case mask and 0xc0: + of 0x00: + discard + of 0x80: + if data.len < start + 33: + return DeblobPayloadTooShortInt256 + (addr pAcc.account.codeHash.data[0]).copyMem(unsafeAddr data[start], 32) + else: + return DeblobCodeLenUnsupported + + pyl = pacc proc deblobify*(record: Blob; vtx: var VertexRef): AristoError = ## De-serialise a data record encoded with `blobify()`. The second @@ -272,15 +405,18 @@ proc deblobify*(record: Blob; vtx: var VertexRef): AristoError = let (isLeaf, pathSegment) = hexPrefixDecode record[pLen ..< rLen] if not isLeaf: return DeblobLeafGotExtPrefix + var pyl: PayloadRef + let err = record[0 ..< plen].deblobify(pyl) + if err != AristoError(0): + return err vtx = VertexRef( - vType: Leaf, - lPfx: pathSegment, - lData: PayloadRef( - pType: BlobData, - blob: record[0 ..< plen])) + vType: Leaf, + lPfx: pathSegment, + lData: pyl) else: return DeblobUnknown + proc deblobify*(data: Blob; T: type VertexRef): Result[T,AristoError] = ## Variant of `deblobify()` for vertex deserialisation. var vtx = T(nil) # will be auto-initialised @@ -289,7 +425,6 @@ proc deblobify*(data: Blob; T: type VertexRef): Result[T,AristoError] = return err(info) ok vtx - proc deblobify*(data: Blob; vGen: var seq[VertexID]): AristoError = ## De-serialise the data record encoded with `blobify()` into the vertex ID ## generator argument `vGen`. diff --git a/tests/test_aristo.nim b/tests/test_aristo.nim index f4cc8459d..26f4d70f2 100644 --- a/tests/test_aristo.nim +++ b/tests/test_aristo.nim @@ -265,11 +265,11 @@ when isMainModule: setErrorLevel() - when true and false: + when true: # and false: noisy.miscRunner() # Borrowed from `test_sync_snap.nim` - when true and false: + when true: # and false: for n,sam in snapTestList: noisy.transcodeRunner(sam) for n,sam in snapTestStorageList: diff --git a/tests/test_aristo/test_helpers.nim b/tests/test_aristo/test_helpers.nim index abde6672e..c7c28b95b 100644 --- a/tests/test_aristo/test_helpers.nim +++ b/tests/test_aristo/test_helpers.nim @@ -153,7 +153,7 @@ proc to*(ua: seq[UndumpAccounts]; T: type seq[ProofTrieData]): T = leafTie: LeafTie( root: rootVid, path: it.accKey.to(HashKey).to(HashID)), - payload: PayloadRef(pType: BlobData, blob: it.accBlob)))) + payload: PayloadRef(pType: RawData, rawBlob: it.accBlob)))) proc to*(us: seq[UndumpStorages]; T: type seq[ProofTrieData]): T = var (rootKey, rootVid) = (VOID_HASH_KEY, VertexID(0)) @@ -170,7 +170,7 @@ proc to*(us: seq[UndumpStorages]; T: type seq[ProofTrieData]): T = leafTie: LeafTie( root: rootVid, path: it.slotHash.to(HashKey).to(HashID)), - payload: PayloadRef(pType: BlobData, blob: it.slotData)))) + payload: PayloadRef(pType: RawData, rawBlob: it.slotData)))) if 0 < result.len: result[^1].proof = s.data.proof diff --git a/tests/test_aristo/test_transcode.nim b/tests/test_aristo/test_transcode.nim index bf97459aa..32b8fc5ac 100644 --- a/tests/test_aristo/test_transcode.nim +++ b/tests/test_aristo/test_transcode.nim @@ -89,7 +89,7 @@ proc test_transcodeAccounts*( var adb = AristoDbRef.init BackendNone count = -1 - for (n, key,value) in rocky.walkAllDb(): + for (n, key, value) in rocky.walkAllDb(): if stopAfter < n: break count = n @@ -112,10 +112,10 @@ proc test_transcodeAccounts*( else: case node.vType: of aristo_desc.Leaf: - let account = node.lData.blob.decode(Account) - node.lData = PayloadRef(pType: AccountData, account: account) - discard adb.hashToVtxID(VertexID(1), node.lData.account.storageRoot) - discard adb.hashToVtxID(VertexID(1), node.lData.account.codeHash) + let account = node.lData.rawBlob.decode(Account) + node.lData = PayloadRef(pType: LegacyAccount, legaAcc: account) + discard adb.hashToVtxID(VertexID(1), node.lData.legaAcc.storageRoot) + discard adb.hashToVtxID(VertexID(1), node.lData.legaAcc.codeHash) of aristo_desc.Extension: # key <-> vtx correspondence check node.key[0] == node0.key[0] @@ -146,8 +146,9 @@ proc test_transcodeAccounts*( block: # `deblobify()` will always decode to `BlobData` type payload if node1.vType == aristo_desc.Leaf: - let account = node1.lData.blob.decode(Account) - node1.lData = PayloadRef(pType: AccountData, account: account) + # Node that deserialisation of the account stops at the RLP encoding + let account = node1.lData.rlpBlob.decode(Account) + node1.lData = PayloadRef(pType: LegacyAccount, legaAcc: account) if node != node1: check node == node1