From 605739ef4c0c5228a8fd08031a33d94cd2878668 Mon Sep 17 00:00:00 2001 From: Jordan Hrycaj Date: Thu, 11 May 2023 15:25:29 +0100 Subject: [PATCH] Experimental MP-trie (#1573) * Experimental MP-trie why: Deleting records is a infeasible with the current structure * Added vertex ID recycling management Todo: Provide some unit tests * DB layout update why: Main news is the separation of `Merkel` hashes into an extra table. details: The code fragments cover conversion between compact MPT records and Aristo DB records as well as some rudimentary cache handling for the `Merkel` hashes (i.e. the extra table entries.) todo: Add some simple unit test for the descriptor record (currently used for vertex ID management, only.) * Updated vertex ID recycling management details: added simple unit tests (mainly testing ABI) * docu update --- nimbus/db/aristo/.gitignore | 1 + nimbus/db/aristo/README.md | 290 +++++++++++++++++++++++ nimbus/db/aristo/aristo_cache.nim | 176 ++++++++++++++ nimbus/db/aristo/aristo_debug.nim | 175 ++++++++++++++ nimbus/db/aristo/aristo_desc.nim | 231 ++++++++++++++++++ nimbus/db/aristo/aristo_error.nim | 50 ++++ nimbus/db/aristo/aristo_transcode.nim | 322 ++++++++++++++++++++++++++ tests/all_tests.nim | 1 + tests/test_aristo.nim | 221 ++++++++++++++++++ tests/test_aristo/test_helpers.nim | 73 ++++++ tests/test_aristo/test_transcode.nim | 232 +++++++++++++++++++ 11 files changed, 1772 insertions(+) create mode 100644 nimbus/db/aristo/.gitignore create mode 100644 nimbus/db/aristo/README.md create mode 100644 nimbus/db/aristo/aristo_cache.nim create mode 100644 nimbus/db/aristo/aristo_debug.nim create mode 100644 nimbus/db/aristo/aristo_desc.nim create mode 100644 nimbus/db/aristo/aristo_error.nim create mode 100644 nimbus/db/aristo/aristo_transcode.nim create mode 100644 tests/test_aristo.nim create mode 100644 tests/test_aristo/test_helpers.nim create mode 100644 tests/test_aristo/test_transcode.nim diff --git a/nimbus/db/aristo/.gitignore b/nimbus/db/aristo/.gitignore new file mode 100644 index 000000000..daa30a3f7 --- /dev/null +++ b/nimbus/db/aristo/.gitignore @@ -0,0 +1 @@ +README.html diff --git a/nimbus/db/aristo/README.md b/nimbus/db/aristo/README.md new file mode 100644 index 000000000..62186bb65 --- /dev/null +++ b/nimbus/db/aristo/README.md @@ -0,0 +1,290 @@ +Aristo Trie -- a Patricia Trie with Merkle hash labeled edges +============================================================= +These data structures allows to overlay the *Patricia Trie* with *Merkel +Trie* hashes. With a particular layout, the structure is called +and *Aristo Trie* (Patricia = Roman Aristocrat, Patrician.) + +This description does assume familiarity with the abstract notion of a hexary +*Merkle Patricia [Trie](https://en.wikipedia.org/wiki/Trie)*. Suffice it to +say the state of a valid *Merkle Patricia Tree* is uniquely verified by its +top level vertex. + +1. Deleting entries in a compact *Merkle Patricia Tree* +------------------------------------------------------- +The main feature of the *Aristo Trie* representation is that there are no +double used nodes any sub-trie as it happens with the representation as a +[compact Merkle Patricia Tree](http://archive.is/TinyK). For example, +consider the following state data for the latter. + + leaf = (0xf,0x12345678) (1) + branch = (a,a,a,,, ..) with a = hash(leaf) + root = hash(branch) + +These two nodes, called *leaf* and *branch*, and the *root* hash are a state +(aka key-value pairs) representation as a *compact Merkle Patricia Tree*. The +actual state is + + 0x0f ==> 0x12345678 + 0x1f ==> 0x12345678 + 0x2f ==> 0x12345678 + +The elements from *(1)* can be organised in a key-value table with the *Merkle* +hashes as lookup keys + + a -> leaf + root -> branch + +This is a space efficient way of keeping data as there is no duplication of +the sub-trees made up by the *Leaf* node with the same payload *0x12345678* +and path snippet *0xf*. One can imagine how this property applies to more +general sub-trees in a similar fashion. + +Now delete some key-value pair of the state, e.g. for the key *0x0f*. This +amounts to removing the first of the three *a* hashes from the *branch* +record. The new state of the *Merkle Patricia Tree* will look like + + leaf = (0xf,0x12345678) (2) + branch1 = (,a,a,,, ..) + root1 = hash(branch1) + + a -> leaf + root1 -> branch1 + +A problem arises when all keys are deleted and there is no reference to the +*leaf* data record, anymore. One should find out in general when it can be +deleted, too. It might be unknown whether the previous states leading to here +had only a single *Branch* record referencing to this *leaf* data record. + +Finding a stale data record can be achieved by a *mark and sweep* algorithm, +but it becomes too clumsy to be useful on a large state (i.e. database). +Reference counts come to mind but maintaining these is generally error prone +when actors concurrently manipulate the state (i.e. database). + +2. *Patricia Trie* example with *Merkle hash* labelled edges +------------------------------------------------------------ +Continuing with the example from chapter 1, the *branch* node is extended by +an additional set of structural identifiers *x, w, z*. It allows to handle +the deletion of entries in a more benign way while keeping the *Merkle hashes* +for validating sub-trees. + +A solution for the deletion problem is to represent the situation *(1)* as + + leaf-a = (0xf,0x12345678) copy of leaf from (1) (3) + leaf-b = (0xf,0x12345678) copy of leaf from (1) + leaf-c = (0xf,0x12345678) copy of leaf from (1) + branch2 = ((x,y,z,,, ..)(a,b,c,,, ..)) + root2 = (w,root) with root from (1) + +where + + a = hash(leaf-a) same as a from (1) + b = hash(leaf-b) same as a from (1) + c = hash(leaf-c) same as a from (1) + + w,x,y,z numbers, mutually different + +The records above are stored in a key-value database as + + w -> branch2 + x -> leaf-a + y -> leaf-b + z -> leaf-c + +Then this structure encodes the key-value pairs as before + + 0x0f ==> 0x12345678 + 0x1f ==> 0x12345678 + 0x2f ==> 0x12345678 + +Deleting the data for key *0x0f* now results in the new state + + leaf-b = (0xf,0x12345678) (4) + leaf-c = (0xf,0x12345678) + branch3 = ((,y,z,,, ..)(,b,c,,, ..)) + + w -> branch3 + y -> leaf-b + z -> leaf-c + +Due to duplication of the *leaf* node in *(3)*, no reference count is needed +in order to detect stale records cleanly when deleting key *0x0f*. Removing +this key allows to remove hash *a* from *branch2* as well as also structural +key *x* which will consequently be deleted from the lookup table. + +A minor observation is that manipulating a state entry, e.g. changing the +payload associated with key *0x0f* to + + 0x0f ==> 0x987654321 + +the structural layout of the above trie will not change, that is the indexes +*w, x, y, z* of the table that holds the data records as values. All that +changes are values. + + leaf-d = (0xf,0x987654321) (5) + leaf-b = (0xf,0x12345678) + leaf-c = (0xf,0x12345678) + branch3 = ((x,y,z,,, ..)(d,b,c,,, ..)) + + root3 = (w,hash(d,b,c,,, ..)) + +3. Discussion of the examples *(1)* and *(3)* +--------------------------------------------- +Examples *(1)* and *(3)* differ in that the structural *Patricia Trie* +information from *(1)* has been removed from the *Merkle hash* instances and +implemented as separate table lookup IDs (called *vertexID*s later on.) The +values of these lookup IDs are arbitrary as long as they are all different. + +In fact, the [Erigon](http://archive.is/6MJV7) project discusses a similar +situation in **Separation of keys and the structure**, albeit aiming for a +another scenario with the goal of using mostly flat data lookup structures. + +A graph for the example *(1)* would look like + + | + root + | + +-------------+ + | branch | + +-------------+ + | | | + a a a + | | | + leaf + +while example *(2)* has + + (root) (6) + | + w + | + +-------------+ + | branch2 | + | (a) (b) (c) | + +-------------+ + / | \ + x y z + / | \ + leaf-a leaf-b leaf-c + +The labels on the edges indicate the downward target of an edge while the +round brackets enclose separated *Merkle hash* information. + +This last example (6) can be completely split into structural tree and Merkel +hash mapping. + + structural trie hash map (7) + --------------- -------- + | (root) -> w + w (a) -> x + | (b) -> y + +-------------+ (c) -> z + | branch2 | + +-------------+ + / | \ + x y z + / | \ + leaf-a leaf-b leaf-c + + +4. *Patricia Trie* node serialisation with *Merkle hash* labelled edges +----------------------------------------------------------------------- +The data structure for the *Aristo Trie* forllows example *(7)* by keeping +structural information separate from the Merkle hash labels. As for teminology, + +* an *Aristo Trie* is a pair *(structural trie, hash map)* where +* the *structural trie* realises a haxary *Patricia Trie* containing the payload + values in the leaf records +* the *hash map* contains the hash information so that this trie operates as a + *Merkle Patricia Tree*. + +In order to accommodate for the additional structural elements, a non RLP-based +data layout is used for the *Branch*, *Extension*, and *Leaf* containers used +in the key-value table that implements the *Patricia Trie*. It is now called +*Aristo Trie* for this particular data layout. + +The structural keys *w, x, y, z* from the example *(3)* are called *vertexID* +and implemented as 64 bit values, stored *Big Endian* in the serialisation. + +### Branch record serialisation + + 0 +--+--+--+--+--+--+--+--+--+ + | | -- first vertexID + 8 +--+--+--+--+--+--+--+--+--+ + ... -- more vertexIDs + +--+--+ + | | -- access(16) bitmap + +--+--+ + || | -- marker(2) + unused(6) + +--+ + + where + marker(2) is the double bit array 00 + +For a given index *n* between *0..15*, if the bit at position *n* of the it +vector *access(16)* is reset to zero, then there is no *n*-th structural +*vertexID*. Otherwise one calculates + + the n-th vertexID is at position Vn * 8 + for Vn the number of non-zero bits in the range 0..(n-1) of access(16) + +Note that data are stored *Big Endian*, so the bits *0..7* of *access* are +stored in the right byte of the serialised bitmap. + +### Extension record serialisation + + 0 +--+--+--+--+--+--+--+--+--+ + | | -- vertexID + 8 +--+--+--+--+--+--+--+--+--+ + | | ... -- path segment + +--+ + || | -- marker(2) + pathSegmentLen(6) + +--+ + + where + marker(2) is the double bit array 10 + +The path segment of the *Extension* record is compact encoded. So it has at +least one byte. The first byte *P0* has bit 5 reset, i.e. *P0 and 0x20* is +zero (bit 4 is set if the right nibble is the first part of the path.) + +Note that the *pathSegmentLen(6)* is redunant as it is determined by the length +of the extension record (as *recordLen - 9*.) + +### Leaf record serialisation + + 0 +-- .. + ... -- payload (may be empty) + +--+ + | | ... -- path segment + +--+ + || | -- marker(2) + pathSegmentLen(6) + +--+ + + where + marker(2) is the double bit array 11 + +A *Leaf* record path segment is compact encoded. So it has at least one byte. +The first byte *P0* has bit 5 set, i.e. *P0 and 0x20* is non-zero (bit 4 is +also set if the right nibble is the first part of the path.) + +### Descriptor record serialisation + + 0 +-- .. + ... -- recycled vertexIDs + +--+--+--+--+--+--+--+--+--+ + | | -- bottom of unused vertexIDs + +--+--+--+--+--+--+--+--+--+ + || | -- marker(2) + unused(6) + +--+ + + where + marker(2) is the double bit array 01 + +Currently, the descriptor record only contains data for producing unique +vectorID values that can be used as structural keys. If this descriptor is +missing, the value `(0x40000000,0x01)` is assumed. The last vertexID in the +descriptor list has the property that that all values greater or equal than +this value can be used as vertexID. + +The vertexIDs in the descriptor record must all be non-zero and record itself +should be allocated in the structural table associated with the zero key. diff --git a/nimbus/db/aristo/aristo_cache.nim b/nimbus/db/aristo/aristo_cache.nim new file mode 100644 index 000000000..9e8ac38cf --- /dev/null +++ b/nimbus/db/aristo/aristo_cache.nim @@ -0,0 +1,176 @@ +# nimbus-eth1 +# Copyright (c) 2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises: [].} + +import + std/tables, + eth/common, + stew/results, + ../../sync/snap/range_desc, + "."/[aristo_desc, aristo_error, aristo_transcode] + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc convertPartially( + db: AristoDbRef; + vtx: VertexRef; + nd: var NodeRef; + ): seq[VertexID] = + ## Returns true if completely converted by looking up the cached hashes. + ## This function does not recurse. It will return the vertex IDs that are + ## are missing in order to convert in a single step. + case vtx.vType: + of Leaf: + nd = NodeRef( + vType: Leaf, + lPfx: vtx.lPfx, + lData: vtx.lData) + of Extension: + nd = NodeRef( + vType: Extension, + ePfx: vtx.ePfx, + eVtx: vtx.eVtx) + db.kMap.withValue(vtx.eVtx, keyPtr): + nd.key[0] = keyPtr[] + return + result.add vtx.eVtx + of Branch: + nd = NodeRef( + vType: Branch, + bVtx: vtx.bVtx) + for n in 0..15: + if vtx.bVtx[n].isZero: + continue + db.kMap.withValue(vtx.bVtx[n], kPtr): + nd.key[n] = kPtr[] + continue + result.add vtx.bVtx[n] + +proc convertPartiallyOk( + db: AristoDbRef; + vtx: VertexRef; + nd: var NodeRef; + ): bool = + ## Variant of `convertPartially()`, shortcut for `convertPartially().le==0`. + case vtx.vType: + of Leaf: + nd = NodeRef( + vType: Leaf, + lPfx: vtx.lPfx, + lData: vtx.lData) + result = true + of Extension: + nd = NodeRef( + vType: Extension, + ePfx: vtx.ePfx, + eVtx: vtx.eVtx) + db.kMap.withValue(vtx.eVtx, keyPtr): + nd.key[0] = keyPtr[] + result = true + of Branch: + nd = NodeRef( + vType: Branch, + bVtx: vtx.bVtx) + result = true + for n in 0..15: + if not vtx.bVtx[n].isZero: + db.kMap.withValue(vtx.bVtx[n], kPtr): + nd.key[n] = kPtr[] + continue + return false + +proc cachedVID(db: AristoDbRef; nodeKey: NodeKey): VertexID = + ## Get vertex ID from reverse cache + db.pAmk.withValue(nodeKey, vidPtr): + return vidPtr[] + result = VertexID.new(db) + db.pAmk[nodeKey] = result + db.kMap[result] = nodeKey + +# ------------------------------------------------------------------------------ +# Public functions for `VertexID` => `NodeKey` mapping +# ------------------------------------------------------------------------------ + +proc pal*(db: AristoDbRef; vid: VertexID): NodeKey = + ## Retrieve the cached `Merkel` hash (aka `NodeKey` object) associated with + ## the argument `VertexID` type argument `vid`. Return a zero `NodeKey` if + ## there is none. + ## + ## If the vertex ID `vid` is not found in the cache, then the structural + ## table is checked whether the cache can be updated. + if not db.isNil: + + db.kMap.withValue(vid, keyPtr): + return keyPtr[] + + db.sTab.withValue(vid, vtxPtr): + var node: NodeRef + if db.convertPartiallyOk(vtxPtr[],node): + var w = initRlpWriter() + w.append node + result = w.finish.keccakHash.data.NodeKey + db.kMap[vid] = result + +# ------------------------------------------------------------------------------ +# Public funcions extending/completing vertex records +# ------------------------------------------------------------------------------ + +proc updated*(nd: NodeRef; db: AristoDbRef): NodeRef = + ## Return a copy of the argument node `nd` with updated missing vertex IDs. + ## + ## For a `Leaf` node, the payload data `PayloadRef` type reference is *not* + ## duplicated and returned as-is. + ## + ## This function will not complain if all `Merkel` hashes (aka `NodeKey` + ## objects) are zero for either `Extension` or `Leaf` nodes. + if not nd.isNil: + case nd.vType: + of Leaf: + result = NodeRef( + vType: Leaf, + lPfx: nd.lPfx, + lData: nd.lData) + of Extension: + result = NodeRef( + vType: Extension, + ePfx: nd.ePfx) + if not nd.key[0].isZero: + result.eVtx = db.cachedVID nd.key[0] + result.key[0] = nd.key[0] + of Branch: + result = NodeRef( + vType: Branch, + key: nd.key) + for n in 0..15: + if not nd.key[n].isZero: + result.bVtx[n] = db.cachedVID nd.key[n] + +proc asNode*(vtx: VertexRef; db: AristoDbRef): NodeRef = + ## Return a `NodeRef` object by augmenting missing `Merkel` hashes (aka + ## `NodeKey` objects) from the cache or from calculated cached vertex + ## entries, if available. + ## + ## If not all `Merkel` hashes are available in a single lookup, then the + ## result object is a wrapper around an error code. + if not db.convertPartiallyOk(vtx, result): + return NodeRef(error: CacheMissingNodekeys) + +proc asNode*(rc: Result[VertexRef,AristoError]; db: AristoDbRef): NodeRef = + ## Variant of `asNode()`. + if rc.isErr: + return NodeRef(error: rc.error) + rc.value.asNode(db) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_debug.nim b/nimbus/db/aristo/aristo_debug.nim new file mode 100644 index 000000000..c75b302c8 --- /dev/null +++ b/nimbus/db/aristo/aristo_debug.nim @@ -0,0 +1,175 @@ +# nimbus-eth1 +# Copyright (c) 2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises: [].} + +import + std/[sequtils, strutils], + eth/[common, trie/nibbles], + stew/byteutils, + ../../sync/snap/range_desc, + "."/aristo_desc + +const + EMPTY_ROOT_KEY = EMPTY_ROOT_HASH.to(NodeKey) + EMPTY_CODE_KEY = EMPTY_CODE_HASH.to(NodeKey) + +# ------------------------------------------------------------------------------ +# Ptivate functions +# ------------------------------------------------------------------------------ + +proc keyVidUpdate(db: AristoDbRef, key: NodeKey, vid: VertexID): string = + if not key.isZero and + not vid.isZero and + not db.isNil: + db.pAmk.withValue(key, vidRef): + if vidRef[] != vid: + result = "(!)" + return + db.xMap.withValue(key, vidRef): + if vidRef[] == vid: + result = "(!)" + return + db.xMap[key] = vid + +proc squeeze(s: string; hex = false; ignLen = false): string = + ## For long strings print `begin..end` only + if hex: + let n = (s.len + 1) div 2 + result = if s.len < 20: s else: s[0 .. 5] & ".." & s[s.len-8 .. s.len-1] + if not ignLen: + result &= "[" & (if 0 < n: "#" & $n else: "") & "]" + elif s.len <= 30: + result = s + else: + result = if (s.len and 1) == 0: s[0 ..< 8] else: "0" & s[0 ..< 7] + if not ignLen: + result &= "..(" & $s.len & ")" + result &= ".." & s[s.len-16 ..< s.len] + +proc stripZeros(a: string): string = + for n in 0 ..< a.len: + if a[n] != '0': + return a[n .. ^1] + return a + +proc ppVid(vid: VertexID): string = + if vid.isZero: "ø" else: "$" & vid.uint64.toHex.stripZeros + +proc ppKey(key: NodeKey, db = AristoDbRef(nil)): string = + if key.isZero: + return "ø" + if key == EMPTY_ROOT_KEY: + return "£r" + if key == EMPTY_CODE_KEY: + return "£c" + + if not db.isNil: + db.pAmk.withValue(key, pRef): + return "£" & $pRef[] + db.xMap.withValue(key, xRef): + return "£" & $xRef[] + + "%" & ($key).squeeze(hex=true,ignLen=true) + +proc ppRootKey(a: NodeKey, db = AristoDbRef(nil)): string = + if a != EMPTY_ROOT_KEY: + return a.ppKey(db) + +proc ppCodeKey(a: NodeKey, db = AristoDbRef(nil)): string = + if a != EMPTY_CODE_KEY: + return a.ppKey(db) + +# ------------------------------------------------------------------------------ +# Public functions +# ------------------------------------------------------------------------------ + +proc keyToVtxID*(db: AristoDbRef, key: NodeKey): VertexID = + ## Associate a vertex ID with the argument `key` for pretty printing. + if not key.isZero and + key != EMPTY_ROOT_KEY and + key != EMPTY_CODE_KEY and + not db.isNil: + + db.xMap.withValue(key, vidPtr): + return vidPtr[] + + result = VertexID.new db + db.xMap[key] = result + +proc pp*(vid: openArray[VertexID]): string = + "[" & vid.mapIt(it.ppVid).join(",") & "]" + +proc pp*(p: PayloadRef, db = AristoDbRef(nil)): string = + if p.isNil: + result = "n/a" + else: + case p.pType: + of BlobData: + result &= p.blob.toHex.squeeze(hex=true) + of AccountData: + result = "(" + result &= $p.account.nonce & "," + result &= $p.account.balance & "," + result &= p.account.storageRoot.to(NodeKey).ppRootKey(db) & "," + result &= p.account.codeHash.to(NodeKey).ppCodeKey(db) & ")" + +proc pp*(nd: VertexRef, db = AristoDbRef(nil)): string = + if nd.isNil: + result = "n/a" + else: + result = ["l(", "x(", "b("][nd.vType.ord] + case nd.vType: + of Leaf: + result &= $nd.lPfx & "," & nd.lData.pp(db) + of Extension: + result &= $nd.ePfx & "," & nd.eVtx.ppVid + of Branch: + result &= "[" + for n in 0..15: + if not nd.bVtx[n].isZero: + result &= nd.bVtx[n].ppVid + result &= "," + result[^1] = ']' + result &= ")" + +proc pp*(nd: NodeRef, db = AristoDbRef(nil)): string = + if nd.isNil: + result = "n/a" + elif nd.isError: + result = "(!" & $nd.error + else: + result = ["L(", "X(", "B("][nd.vType.ord] + case nd.vType: + of Leaf: + result &= $nd.lPfx & "," & nd.lData.pp(db) + + of Extension: + result &= $nd.ePfx & "," & nd.eVtx.ppVid & "," & nd.key[0].ppKey + + of Branch: + result &= "[" + for n in 0..15: + if not nd.bVtx[n].isZero or not nd.key[n].isZero: + result &= nd.bVtx[n].ppVid + result &= db.keyVidUpdate(nd.key[n], nd.bVtx[n]) & "," + result[^1] = ']' + + result &= ",[" + for n in 0..15: + if not nd.bVtx[n].isZero or not nd.key[n].isZero: + result &= nd.key[n].ppKey(db) + result &= "," + result[^1] = ']' + result &= ")" + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_desc.nim b/nimbus/db/aristo/aristo_desc.nim new file mode 100644 index 000000000..5e1965c80 --- /dev/null +++ b/nimbus/db/aristo/aristo_desc.nim @@ -0,0 +1,231 @@ +# nimbus-eth1 +# Copyright (c) 2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +## Aristo DB -- a Patricia Trie with labeled edges +## =============================================== +## +## These data structures allows to overlay the *Patricia Trie* with *Merkel +## Trie* hashes. See the `README.md` in the `aristo` folder for documentation. + +{.push raises: [].} + +import + std/tables, + eth/[common, trie/nibbles], + stew/results, + ../../sync/snap/range_desc, + ./aristo_error + +type + VertexID* = distinct uint64 ## Tip of edge towards child, also table key + + VertexType* = enum ## Type of Patricia Trie node + Leaf + Extension + Branch + + PayloadType* = enum ## Type of leaf data (to be extended) + BlobData + AccountData + + PayloadRef* = ref object + case pType*: PayloadType + of BlobData: + blob*: Blob ## Opaque data value reference + of AccountData: + account*: Account ## Expanded accounting data + + VertexRef* = ref object of RootRef + ## Vertex for building a hexary Patricia or Merkle Patricia Trie + case vType*: VertexType + of Leaf: + lPfx*: NibblesSeq ## Portion of path segment + lData*: PayloadRef ## Reference to data payload + of Extension: + ePfx*: NibblesSeq ## Portion of path segment + eVtx*: VertexID ## Edge to vertex with ID `eVtx` + of Branch: + bVtx*: array[16,VertexID] ## Edge list with vertex IDs + + NodeRef* = ref object of VertexRef + ## Combined record for a *traditional* ``Merkle Patricia Tree` node merged + ## with a structural `VertexRef` type object. + error*: AristoError ## Can be used for error signalling + key*: array[16,NodeKey] ## Merkle hash(es) for Branch & Extension vtx + + PathStep* = object + ## For constructing a tree traversal path + # key*: NodeKey ## Node label ?? + node*: VertexRef ## Referes to data record + nibble*: int8 ## Branch node selector (if any) + depth*: int ## May indicate path length (typically 64) + + Path* = object + root*: VertexID ## Root node needed when `path.len == 0` + path*: seq[PathStep] ## Chain of nodes + tail*: NibblesSeq ## Stands for non completed leaf path + + LeafSpecs* = object + ## Temporarily stashed leaf data (as for an account.) Proper records + ## have non-empty payload. Records with empty payload are administrative + ## items, e.g. lower boundary records. + pathTag*: NodeTag ## `Patricia Trie` key path + nodeVtx*: VertexID ## Table lookup vertex ID (if any) + payload*: PayloadRef ## Reference to data payload + + GetFn* = proc(key: openArray[byte]): Blob + {.gcsafe, raises: [CatchableError].} + ## Persistent database `get()` function. For read-only cases, this + ## function can be seen as the persistent alternative to ``tab[]` on + ## a `HexaryTreeDbRef` descriptor. + + AristoDbRef* = ref object of RootObj + ## Hexary trie plus helper structures + sTab*: Table[VertexID,NodeRef] ## Structural vertex table making up a trie + kMap*: Table[VertexID,NodeKey] ## Merkle hash key mapping + pAmk*: Table[NodeKey,VertexID] ## Reverse mapper for data import + vidGen*: seq[VertexID] ## Unique vertex ID generator + + # Debugging data below, might go away in future + xMap*: Table[NodeKey,VertexID] ## Mapper for pretty printing, extends `pAmk` + +static: + # Not that there is no doubt about this ... + doAssert NodeKey.default.ByteArray32.initNibbleRange.len == 64 + +# ------------------------------------------------------------------------------ +# Public helpers: `VertexID` scalar data model +# ------------------------------------------------------------------------------ + +proc `<`*(a, b: VertexID): bool {.borrow.} +proc `==`*(a, b: VertexID): bool {.borrow.} +proc cmp*(a, b: VertexID): int {.borrow.} +proc `$`*(a: VertexID): string = $a.uint64 + +# ------------------------------------------------------------------------------ +# Public functions for `VertexID` management +# ------------------------------------------------------------------------------ + +proc new*(T: type VertexID; db: AristoDbRef): T = + ## Create a new `VertexID`. Reusable *ID*s are kept in a list where the top + ## entry *ID0* has the property that any other *ID* larger *ID0* is also not + ## not used on the database. + case db.vidGen.len: + of 0: + db.vidGen = @[2.VertexID] + result = 1.VertexID + of 1: + result = db.vidGen[^1] + db.vidGen = @[(result.uint64 + 1).VertexID] + else: + result = db.vidGen[^2] + db.vidGen[^2] = db.vidGen[^1] + db.vidGen.setLen(db.vidGen.len-1) + +proc peek*(T: type VertexID; db: AristoDbRef): T = + ## Like `new()` without consuming this *ID*. It will return the *ID* that + ## would be returned by the `new()` function. + if db.vidGen.len == 0: 1u64 else: db.vidGen[^1] + + +proc dispose*(db: AristoDbRef; vtxID: VertexID) = + ## Recycle the argument `vtxID` which is useful after deleting entries from + ## the vertex table to prevent the `VertexID` type key values small. + if db.vidGen.len == 0: + db.vidGen = @[vtxID] + else: + let topID = db.vidGen[^1] + # No need to store smaller numbers: all numberts larger than `topID` + # are free numbers + if vtxID < topID: + db.vidGen[^1] = vtxID + db.vidGen.add topID + +# ------------------------------------------------------------------------------ +# Public helpers: `NodeRef` and `PayloadRef` +# ------------------------------------------------------------------------------ + +proc `==`*(a, b: PayloadRef): bool = + ## Beware, potential deep comparison + if a.isNil: + return b.isNil + if b.isNil: + return false + if unsafeAddr(a) != unsafeAddr(b): + if a.pType != b.pType: + return false + case a.pType: + of BlobData: + if a.blob != b.blob: + return false + of AccountData: + if a.account != b.account: + return false + true + +proc `==`*(a, b: VertexRef): bool = + ## Beware, potential deep comparison + if a.isNil: + return b.isNil + if b.isNil: + return false + if unsafeAddr(a[]) != unsafeAddr(b[]): + if a.vType != b.vType: + return false + case a.vType: + of Leaf: + if a.lPfx != b.lPfx or a.lData != b.lData: + return false + of Extension: + if a.ePfx != b.ePfx or a.eVtx != b.eVtx: + return false + of Branch: + for n in 0..15: + if a.bVtx[n] != b.bVtx[n]: + return false + true + +proc `==`*(a, b: NodeRef): bool = + ## Beware, potential deep comparison + if a.VertexRef != b.VertexRef: + return false + case a.vType: + of Extension: + if a.key[0] != b.key[0]: + return false + of Branch: + for n in 0..15: + if a.bVtx[n] != 0.VertexID and a.key[n] != b.key[n]: + return false + else: + discard + true + +# ------------------------------------------------------------------------------ +# Public helpers, miscellaneous functions +# ------------------------------------------------------------------------------ + +proc isZero*[T: NodeKey|VertexID](a: T): bool = + a == typeof(a).default + +proc isError*(a: NodeRef): bool = + a.error != AristoError(0) + +proc convertTo*(payload: PayloadRef; T: type Blob): T = + ## Probably lossy conversion as the storage type `kind` gets missing + case payload.pType: + of BlobData: + result = payload.blob + of AccountData: + result = rlp.encode payload.account + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_error.nim b/nimbus/db/aristo/aristo_error.nim new file mode 100644 index 000000000..8a36c7334 --- /dev/null +++ b/nimbus/db/aristo/aristo_error.nim @@ -0,0 +1,50 @@ +# nimbus-eth1 +# Copyright (c) 2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +type + AristoError* = enum + NothingSerious = 0 + + # Rlp decoder, `fromRlpRecord()` + Rlp2Or17ListEntries + RlpBlobExpected + RlpBranchLinkExpected + RlpExtPathEncoding + RlpNonEmptyBlobExpected + RlpEmptyBlobExpected + RlpRlpException + RlpOtherException + + # Db record decoder, `fromDbRecord()` + DbrNilArgument + DbrUnknown + DbrTooShort + DbrBranchTooShort + DbrBranchSizeGarbled + DbrBranchInxOutOfRange + DbrExtTooShort + DbrExtSizeGarbled + DbrExtGotLeafPrefix + DbrLeafSizeGarbled + DbrLeafGotExtPrefix + + # Db admin data decoder, `fromAristoDb()` + ADbGarbledSize + ADbWrongType + + # Db record encoder, `toDbRecord()` + VtxExPathOverflow + VtxLeafPathOverflow + + # Converter `asNode()` + CacheMissingNodekeys + +# End + diff --git a/nimbus/db/aristo/aristo_transcode.nim b/nimbus/db/aristo/aristo_transcode.nim new file mode 100644 index 000000000..8763807f0 --- /dev/null +++ b/nimbus/db/aristo/aristo_transcode.nim @@ -0,0 +1,322 @@ +# nimbus-eth1 +# Copyright (c) 2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed +# except according to those terms. + +{.push raises: [].} + +import + std/[bitops, sequtils], + eth/[common, trie/nibbles], + stew/results, + ../../sync/snap/range_desc, + "."/[aristo_desc, aristo_error] + +const + EmptyBlob = seq[byte].default + ## Useful shortcut (borrowed from `sync/snap/constants.nim`) + +# ------------------------------------------------------------------------------ +# Private functions +# ------------------------------------------------------------------------------ + +proc aristoError(error: AristoError): NodeRef = + ## Allows returning de + NodeRef(vType: Leaf, error: error) + +# ------------------------------------------------------------------------------ +# Public RLP transcoder mixins +# ------------------------------------------------------------------------------ + +proc read*( + rlp: var Rlp; + T: type NodeRef; + ): T {.gcsafe, raises: [RlpError]} = + ## Mixin for RLP writer, see `fromRlpRecord()` for an encoder with detailed + ## error return code (if needed.) This reader is a jazzed up version which + ## reports some particular errors in the `Dummy` type node. + if not rlp.isList: + # Otherwise `rlp.items` would raise a `Defect` + return aristoError(Rlp2Or17ListEntries) + + var + blobs = newSeq[Blob](2) # temporary, cache + links: array[16,NodeKey] # reconstruct branch node + top = 0 # count entries and positions + + # Collect lists of either 2 or 17 blob entries. + for w in rlp.items: + case top + of 0, 1: + if not w.isBlob: + return aristoError(RlpBlobExpected) + blobs[top] = rlp.read(Blob) + of 2 .. 15: + if not links[top].init(rlp.read(Blob)): + return aristoError(RlpBranchLinkExpected) + of 16: + if not w.isBlob: + return aristoError(RlpBlobExpected) + if 0 < rlp.read(Blob).len: + return aristoError(RlpEmptyBlobExpected) + else: + return aristoError(Rlp2Or17ListEntries) + top.inc + + # Verify extension data + case top + of 2: + if blobs[0].len == 0: + return aristoError(RlpNonEmptyBlobExpected) + let (isLeaf, pathSegment) = hexPrefixDecode blobs[0] + if isLeaf: + return NodeRef( + vType: Leaf, + lPfx: pathSegment, + lData: PayloadRef( + pType: BlobData, + blob: blobs[1])) + else: + var node = NodeRef( + vType: Extension, + ePfx: pathSegment) + if not node.key[0].init(blobs[1]): + return aristoError(RlpExtPathEncoding) + return node + of 17: + for n in [0,1]: + if not links[n].init(blobs[n]): + return aristoError(RlpBranchLinkExpected) + return NodeRef( + vType: Branch, + key: links) + else: + discard + + aristoError(Rlp2Or17ListEntries) + + +proc append*(writer: var RlpWriter; node: NodeRef) = + ## Mixin for RLP writer. Note that a `Dummy` node is encoded as an empty + ## list. + proc addNodeKey(writer: var RlpWriter; key: NodeKey) = + if key.isZero: + writer.append EmptyBlob + else: + writer.append key.to(Hash256) + + if node.isError: + writer.startList(0) + else: + case node.vType: + of Branch: + writer.startList(17) + for n in 0..15: + writer.addNodeKey node.key[n] + writer.append EmptyBlob + of Extension: + writer.startList(2) + writer.append node.ePfx.hexPrefixEncode(isleaf = false) + writer.addNodeKey node.key[0] + of Leaf: + writer.startList(2) + writer.append node.lPfx.hexPrefixEncode(isleaf = true) + writer.append node.lData.convertTo(Blob) + +# ------------------------------------------------------------------------------ +# Public db record transcoders +# ------------------------------------------------------------------------------ + +proc blobify*(node: VertexRef; data: var Blob): AristoError = + ## This function serialises the node argument to a database record. Contrary + ## to RLP based serialisation, these records aim to align on fixed byte + ## boundaries. + ## :: + ## Branch: + ## uint64, ... -- list of up to 16 child nodes lookup keys + ## uint16 -- index bitmap + ## 0x00 -- marker(2) + unused(2) + ## + ## Extension: + ## uint64 -- child node lookup key + ## Blob -- hex encoded partial path (at least one byte) + ## 0x80 -- marker(2) + unused(2) + ## + ## Leaf: + ## Blob -- opaque leaf data payload (might be zero length) + ## Blob -- hex encoded partial path (at least one byte) + ## 0xc0 -- marker(2) + partialPathLen(6) + ## + ## For a branch record, the bytes of the `access` array indicate the position + ## of the Patricia Trie node reference. So the `vertexID` with index `n` has + ## :: + ## 8 * n * ((access shr (n * 4)) and 15) + ## + case node.vType: + of Branch: + var + top = 0u64 + access = 0u16 + refs: Blob + keys: Blob + for n in 0..15: + if not node.bVtx[n].isZero: + access = access or (1u16 shl n) + refs &= node.bVtx[n].uint64.toBytesBE.toSeq + data = refs & access.toBytesBE.toSeq & @[0u8] + of Extension: + let + pSegm = node.ePfx.hexPrefixEncode(isleaf = false) + psLen = pSegm.len.byte + if psLen == 0 or 33 < pslen: + return VtxExPathOverflow + data = node.eVtx.uint64.toBytesBE.toSeq & pSegm & @[0x80u8 or psLen] + of Leaf: + let + pSegm = node.lPfx.hexPrefixEncode(isleaf = true) + psLen = pSegm.len.byte + if psLen == 0 or 33 < psLen: + return VtxLeafPathOverflow + data = node.lData.convertTo(Blob) & pSegm & @[0xC0u8 or psLen] + +proc blobify*(node: VertexRef): Result[Blob, AristoError] = + ## Variant of `blobify()` + var + data: Blob + info = node.blobify data + if info != AristoError(0): + return err(info) + ok(data) + + +proc blobify*(db: AristoDbRef; data: var Blob) = + ## This function serialises some maintenance data for the `AristoDb` + ## descriptor. At the moment, this contains the recycliing table for the + ## `VertexID` values, only. + ## + ## This data recoed is supposed to be stored as the table value with the + ## zero key for persistent tables. + ## :: + ## Admin: + ## uint64, ... -- list of IDs + ## 0x40 + ## + data.setLen(0) + for w in db.vidGen: + data &= w.uint64.toBytesBE.toSeq + data.add 0x40u8 + +proc blobify*(db: AristoDbRef): Blob = + ## Variant of `toDescRecord()` + db.blobify result + + +proc deblobify*(record: Blob; vtx: var VertexRef): AristoError = + ## De-serialise a data record encoded with `blobify()`. The second + ## argument `vtx` can be `nil`. + if record.len < 3: # minimum `Leaf` record + return DbrTooShort + + case record[^1] shr 6: + of 0: # `Branch` node + if record.len < 19: # at least two edges + return DbrBranchTooShort + if (record.len mod 8) != 3: + return DbrBranchSizeGarbled + let + maxOffset = record.len - 11 + aInx = record.len - 3 + aIny = record.len - 2 + var + offs = 0 + access = uint16.fromBytesBE record[aInx..aIny] # bitmap + vtxList: array[16,VertexID] + while access != 0: + if maxOffset < offs: + return DbrBranchInxOutOfRange + let n = access.firstSetBit - 1 + access.clearBit n + vtxList[n] = (uint64.fromBytesBE record[offs ..< offs+8]).VertexID + offs += 8 + # End `while` + vtx = VertexRef( + vType: Branch, + bVtx: vtxList) + + of 2: # `Extension` node + let + sLen = record[^1].int and 0x3f # length of path segment + rlen = record.len - 1 # `vertexID` + path segm + if record.len < 10: + return DbrExtTooShort + if 8 + sLen != rlen: # => slen is at least 1 + return DbrExtSizeGarbled + let (isLeaf, pathSegment) = hexPrefixDecode record[8 ..< rLen] + if isLeaf: + return DbrExtGotLeafPrefix + vtx = VertexRef( + vType: Extension, + eVtx: (uint64.fromBytesBE record[0 ..< 8]).VertexID, + ePfx: pathSegment) + + of 3: # `Leaf` node + let + sLen = record[^1].int and 0x3f # length of path segment + rlen = record.len - 1 # payload + path segment + pLen = rLen - sLen # payload length + if rlen < sLen: + return DbrLeafSizeGarbled + let (isLeaf, pathSegment) = hexPrefixDecode record[pLen ..< rLen] + if not isLeaf: + return DbrLeafGotExtPrefix + vtx = VertexRef( + vType: Leaf, + lPfx: pathSegment, + lData: PayloadRef( + pType: BlobData, + blob: record[0 ..< plen])) + else: + return DbrUnknown + + +proc deblobify*(data: Blob; db: var AristoDbRef): AristoError = + ## De-serialise the data record encoded with `blobify()`. The second + ## argument `db` can be `nil` in which case a new `AristoDbRef` type + ## descriptor will be created. + if db.isNil: + db = AristoDbRef() + if data.len == 0: + db.vidGen = @[1.VertexID] + else: + if (data.len mod 8) != 1: + return ADbGarbledSize + if data[^1] shr 6 != 1: + return ADbWrongType + for n in 0 ..< (data.len div 8): + let w = n * 8 + db.vidGen.add (uint64.fromBytesBE data[w ..< w + 8]).VertexID + + +proc deblobify*[W: VertexRef|AristoDbRef]( + record: Blob; + T: type W; + ): Result[T,AristoError] = + ## Variant of `deblobify()` for either `VertexRef` or `AristoDbRef` + var obj: T # isNil, will be auto-initialised + let info = record.deblobify obj + if info != AristoError(0): + return err(info) + ok(obj) + +proc deblobify*(record: Blob): Result[VertexRef,AristoError] = + ## Default variant of `deblobify()` for `VertexRef`. + record.deblobify VertexRef + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tests/all_tests.nim b/tests/all_tests.nim index 060b99d14..c53d14b38 100644 --- a/tests/all_tests.nim +++ b/tests/all_tests.nim @@ -12,6 +12,7 @@ import ../test_macro cliBuilder: import ./test_code_stream, ./test_accounts_cache, + ./test_aristo, ./test_custom_network, ./test_sync_snap, ./test_rocksdb_timing, diff --git a/tests/test_aristo.nim b/tests/test_aristo.nim new file mode 100644 index 000000000..a89d5efdb --- /dev/null +++ b/tests/test_aristo.nim @@ -0,0 +1,221 @@ +# Nimbus - Types, data structures and shared utilities used in network sync +# +# Copyright (c) 2018-2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + +## Re-invented implementation for Merkle Patricia Tree named as Aristo Trie + +import + std/[os, strformat, strutils], + chronicles, + eth/[common, p2p], + rocksdb, + unittest2, + ../nimbus/db/select_backend, + ../nimbus/db/aristo/[aristo_desc], + ../nimbus/core/chain, + ../nimbus/sync/snap/worker/db/[ + hexary_desc, rocky_bulk_load, snapdb_accounts, snapdb_desc], + ./replay/[pp, undump_accounts], + ./test_sync_snap/[snap_test_xx, test_accounts, test_types], + ./test_aristo/[test_transcode] + +const + baseDir = [".", "..", ".."/"..", $DirSep] + repoDir = [".", "tests", "nimbus-eth1-blobs"] + subDir = ["replay", "test_sync_snap", "replay"/"snap"] + + # Reference file for finding the database directory + sampleDirRefFile = "sample0.txt.gz" + + # Standard test samples + accSample = snapTest0 + + # Number of database slots available + nTestDbInstances = 9 + + # Dormant (may be set if persistent database causes problems) + disablePersistentDB = false + +type + TestDbs = object + ## Provide enough spare empty databases + persistent: bool + dbDir: string + baseDir: string # for cleanup + subDir: string # for cleanup + cdb: array[nTestDbInstances,ChainDb] + +# ------------------------------------------------------------------------------ +# Helpers +# ------------------------------------------------------------------------------ + +proc findFilePath( + file: string; + baseDir: openArray[string] = baseDir; + repoDir: openArray[string] = repoDir; + subDir: openArray[string] = subDir; + ): Result[string,void] = + for dir in baseDir: + if dir.dirExists: + for repo in repoDir: + if (dir / repo).dirExists: + for sub in subDir: + if (dir / repo / sub).dirExists: + let path = dir / repo / sub / file + if path.fileExists: + return ok(path) + echo "*** File not found \"", file, "\"." + err() + +proc getTmpDir(sampleDir = sampleDirRefFile): string = + sampleDir.findFilePath.value.splitFile.dir + +proc setTraceLevel {.used.} = + discard + when defined(chronicles_runtime_filtering) and loggingEnabled: + setLogLevel(LogLevel.TRACE) + +proc setErrorLevel {.used.} = + discard + when defined(chronicles_runtime_filtering) and loggingEnabled: + setLogLevel(LogLevel.ERROR) + +# ------------------------------------------------------------------------------ +# Private functions +# ------------------------------------------------------------------------------ + +proc to(sample: AccountsSample; T: type seq[UndumpAccounts]): T = + ## Convert test data into usable in-memory format + let file = sample.file.findFilePath.value + var root: Hash256 + for w in file.undumpNextAccount: + let n = w.seenAccounts - 1 + if n < sample.firstItem: + continue + if sample.lastItem < n: + break + if sample.firstItem == n: + root = w.root + elif w.root != root: + break + result.add w + +proc flushDbDir(s: string; subDir = "") = + if s != "": + let baseDir = s / "tmp" + for n in 0 ..< nTestDbInstances: + let instDir = if subDir == "": baseDir / $n else: baseDir / subDir / $n + if (instDir / "nimbus" / "data").dirExists: + # Typically under Windows: there might be stale file locks. + try: instDir.removeDir except CatchableError: discard + try: (baseDir / subDir).removeDir except CatchableError: discard + block dontClearUnlessEmpty: + for w in baseDir.walkDir: + break dontClearUnlessEmpty + try: baseDir.removeDir except CatchableError: discard + + +proc flushDbs(db: TestDbs) = + if db.persistent: + for n in 0 ..< nTestDbInstances: + if db.cdb[n].rocksStoreRef.isNil: + break + db.cdb[n].rocksStoreRef.store.db.rocksdb_close + db.baseDir.flushDbDir(db.subDir) + +proc testDbs( + workDir: string; + subDir: string; + instances: int; + persistent: bool; + ): TestDbs = + if disablePersistentDB or workDir == "" or not persistent: + result.persistent = false + result.dbDir = "*notused*" + else: + result.persistent = true + result.baseDir = workDir + result.subDir = subDir + if subDir != "": + result.dbDir = workDir / "tmp" / subDir + else: + result.dbDir = workDir / "tmp" + if result.persistent: + workDir.flushDbDir(subDir) + for n in 0 ..< min(result.cdb.len, instances): + result.cdb[n] = (result.dbDir / $n).newChainDB + +proc snapDbRef(cdb: ChainDb; pers: bool): SnapDbRef = + if pers: SnapDbRef.init(cdb) else: SnapDbRef.init(newMemoryDB()) + +proc snapDbAccountsRef(cdb:ChainDb; root:Hash256; pers:bool):SnapDbAccountsRef = + SnapDbAccountsRef.init(cdb.snapDbRef(pers), root, Peer()) + +# ------------------------------------------------------------------------------ +# Test Runners: accounts and accounts storages +# ------------------------------------------------------------------------------ + +proc trancodeRunner(noisy = true; sample = accSample; stopAfter = high(int)) = + let + accLst = sample.to(seq[UndumpAccounts]) + root = accLst[0].root + tmpDir = getTmpDir() + db = tmpDir.testDbs(sample.name & "-accounts", instances=2, persistent=true) + info = if db.persistent: &"persistent db on \"{db.baseDir}\"" + else: "in-memory db" + fileInfo = sample.file.splitPath.tail.replace(".txt.gz","") + + defer: + db.flushDbs + + suite &"Aristo: transcoding {fileInfo} accounts and proofs for {info}": + + test &"Trancoding VertexID recyling lists (seed={accLst.len})": + noisy.test_transcodeVidRecycleLists(accLst.len) + + # New common descriptor for this sub-group of tests + let + desc = db.cdb[0].snapDbAccountsRef(root, db.persistent) + hexaDb = desc.hexaDb + getFn = desc.getAccountFn + dbg = if noisy: hexaDb else: nil + + # Borrowed from `test_sync_snap/test_accounts.nim` + test &"Importing {accLst.len} list items to persistent database": + if db.persistent: + accLst.test_accountsImport(desc, true) + else: + skip() + + test "Trancoding database records: RLP, NodeRef, Blob, VertexRef": + noisy.showElapsed("test_transcoder()"): + noisy.test_transcodeAccounts(db.cdb[0].rocksStoreRef, stopAfter) + +# ------------------------------------------------------------------------------ +# Main function(s) +# ------------------------------------------------------------------------------ + +proc aristoMain*(noisy = defined(debug)) = + noisy.trancodeRunner() + +when isMainModule: + const + noisy = defined(debug) or true + + # Borrowed from `test_sync_snap.nim` + when true: # and false: + for n,sam in snapTestList: + noisy.trancodeRunner(sam) + for n,sam in snapTestStorageList: + noisy.trancodeRunner(sam) + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tests/test_aristo/test_helpers.nim b/tests/test_aristo/test_helpers.nim new file mode 100644 index 000000000..1a300c9ef --- /dev/null +++ b/tests/test_aristo/test_helpers.nim @@ -0,0 +1,73 @@ +# Nimbus - Types, data structures and shared utilities used in network sync +# +# Copyright (c) 2018-2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + +import + std/sequtils, + eth/common, + rocksdb, + ../../nimbus/db/kvstore_rocksdb, + ../../nimbus/sync/snap/constants, + ../replay/pp + +# ------------------------------------------------------------------------------ +# Public helpers +# ------------------------------------------------------------------------------ + +proc say*(noisy = false; pfx = "***"; args: varargs[string, `$`]) = + if noisy: + if args.len == 0: + echo "*** ", pfx + elif 0 < pfx.len and pfx[^1] != ' ': + echo pfx, " ", args.toSeq.join + else: + echo pfx, args.toSeq.join + +# ------------------------------------------------------------------------------ +# Public iterators +# ------------------------------------------------------------------------------ + +iterator walkAllDb*(rocky: RocksStoreRef): (int,Blob,Blob) = + ## Walk over all key-value pairs of the database (`RocksDB` only.) + let + rop = rocky.store.readOptions + rit = rocky.store.db.rocksdb_create_iterator(rop) + defer: + rit.rocksdb_iter_destroy() + + rit.rocksdb_iter_seek_to_first() + var count = -1 + + while rit.rocksdb_iter_valid() != 0: + count .inc + + # Read key-value pair + var + kLen, vLen: csize_t + let + kData = rit.rocksdb_iter_key(addr kLen) + vData = rit.rocksdb_iter_value(addr vLen) + + # Fetch data + let + key = if kData.isNil: EmptyBlob + else: kData.toOpenArrayByte(0,int(kLen)-1).toSeq + value = if vData.isNil: EmptyBlob + else: vData.toOpenArrayByte(0,int(vLen)-1).toSeq + + yield (count, key, value) + + # Update Iterator (might overwrite kData/vdata) + rit.rocksdb_iter_next() + # End while + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/tests/test_aristo/test_transcode.nim b/tests/test_aristo/test_transcode.nim new file mode 100644 index 000000000..9ca0b7556 --- /dev/null +++ b/tests/test_aristo/test_transcode.nim @@ -0,0 +1,232 @@ +# Nimbus - Types, data structures and shared utilities used in network sync +# +# Copyright (c) 2018-2021 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + +## Aristo (aka Patricia) DB trancoder test + +import + eth/common, + stew/byteutils, + unittest2, + ../../nimbus/db/kvstore_rocksdb, + ../../nimbus/db/aristo/[ + aristo_desc, aristo_cache, aristo_debug, aristo_error, aristo_transcode], + ../../nimbus/sync/snap/range_desc, + ./test_helpers + +type + TesterDesc = object + prng: uint32 ## random state + +# ------------------------------------------------------------------------------ +# Private helpers +# ------------------------------------------------------------------------------ + +proc posixPrngRand(state: var uint32): byte = + ## POSIX.1-2001 example of a rand() implementation, see manual page rand(3). + state = state * 1103515245 + 12345; + let val = (state shr 16) and 32767 # mod 2^31 + (val shr 8).byte # Extract second byte + +proc rand[W: SomeInteger|VertexID](ap: var TesterDesc; T: type W): T = + var a: array[sizeof T,byte] + for n in 0 ..< sizeof T: + a[n] = ap.prng.posixPrngRand().byte + when sizeof(T) == 1: + let w = uint8.fromBytesBE(a).T + when sizeof(T) == 2: + let w = uint16.fromBytesBE(a).T + when sizeof(T) == 4: + let w = uint32.fromBytesBE(a).T + else: + let w = uint64.fromBytesBE(a).T + when T is SomeUnsignedInt: + # That way, `fromBytesBE()` can be applied to `uint` + result = w + else: + # That way the result is independent of endianness + (addr result).copyMem(unsafeAddr w, sizeof w) + +proc vidRand(td: var TesterDesc; bits = 19): VertexID = + if bits < 64: + let + mask = (1u64 shl max(1,bits)) - 1 + rval = td.rand uint64 + (rval and mask).VertexID + else: + td.rand VertexID + +proc init(T: type TesterDesc; seed: int): TesterDesc = + result.prng = (seed and 0x7fffffff).uint32 + +# ----- + +proc getOrEmpty(rc: Result[Blob,AristoError]; noisy = true): Blob = + if rc.isOk: + return rc.value + noisy.say "***", "error=", rc.error + +proc `+`(a: VertexID, b: int): VertexID = + (a.uint64 + b.uint64).VertexID + +# ------------------------------------------------------------------------------ +# Public test function +# ------------------------------------------------------------------------------ + +proc test_transcodeAccounts*( + noisy = true; + rocky: RocksStoreRef; + stopAfter = high(int); + ) = + ## Transcoder tests on accounts database + var + adb = AristoDbRef() + count = -1 + for (n, key,value) in rocky.walkAllDb(): + if stopAfter < n: + break + count = n + + # RLP <-> NIM object mapping + let node0 = value.decode(NodeRef) + block: + let blob0 = rlp.encode node0 + if value != blob0: + check value.len == blob0.len + check value == blob0 + noisy.say "***", "count=", count, " value=", value.rlpFromBytes.inspect + noisy.say "***", "count=", count, " blob0=", blob0.rlpFromBytes.inspect + + # Provide DbRecord with dummy links and expanded payload. Registering the + # node as vertex and re-converting it does the job + var node = node0.updated(adb) + if node.isError: + check node.error == AristoError(0) + else: + case node.vType: + of aristo_desc.Leaf: + let account = node.lData.blob.decode(Account) + node.lData = PayloadRef(pType: AccountData, account: account) + discard adb.keyToVtxID node.lData.account.storageRoot.to(NodeKey) + discard adb.keyToVtxID node.lData.account.codeHash.to(NodeKey) + of aristo_desc.Extension: + # key <-> vtx correspondence + check node.key[0] == node0.key[0] + check not node.eVtx.isZero + of aristo_desc.Branch: + for n in 0..15: + # key[n] <-> vtx[n] correspondence + check node.key[n] == node0.key[n] + check node.key[n].isZero == node.bVtx[n].isZero + + # This NIM object must match to the same RLP encoded byte stream + block: + var blob1 = rlp.encode node + if value != blob1: + check value.len == blob1.len + check value == blob1 + noisy.say "***", "count=", count, " value=", value.rlpFromBytes.inspect + noisy.say "***", "count=", count, " blob1=", blob1.rlpFromBytes.inspect + + # NIM object <-> DbRecord mapping + let dbr = node.blobify.getOrEmpty(noisy) + var node1 = dbr.deblobify.asNode(adb) + if node1.isError: + check node1.error == AristoError(0) + + block: + # `deblobify()` will always decode to `BlobData` type payload + if node1.vType == aristo_desc.Leaf: + let account = node1.lData.blob.decode(Account) + node1.lData = PayloadRef(pType: AccountData, account: account) + + if node != node1: + check node == node1 + noisy.say "***", "count=", count, " node=", node.pp(adb) + noisy.say "***", "count=", count, " node1=", node1.pp(adb) + + # Serialise back with expanded `AccountData` type payload (if any) + let dbr1 = node1.blobify.getOrEmpty(noisy) + block: + if dbr != dbr1: + check dbr == dbr1 + noisy.say "***", "count=", count, " dbr=", dbr.toHex + noisy.say "***", "count=", count, " dbr1=", dbr1.toHex + + # Serialise back as is + let dbr2 = dbr.deblobify.asNode(adb).blobify.getOrEmpty(noisy) + block: + if dbr != dbr2: + check dbr == dbr2 + noisy.say "***", "count=", count, " dbr=", dbr.toHex + noisy.say "***", "count=", count, " dbr2=", dbr2.toHex + + noisy.say "***", "records visited: ", count + 1 + + +proc test_transcodeVidRecycleLists*(noisy = true; seed = 42) = + ## Transcode VID lists held in `AristoDb` descriptor + var td = TesterDesc.init seed + let db = AristoDbRef() + + # Add some randum numbers + block: + let first = td.vidRand() + db.dispose first + + var + expectedVids = 1 + count = 1 + # Feed some numbers used and some discaded + while expectedVids < 5 or count < 5 + expectedVids: + count.inc + let vid = td.vidRand() + expectedVids += (vid < first).ord + db.dispose vid + + check db.vidGen.len == expectedVids + noisy.say "***", "vids=", db.vidGen.len, " discarded=", count-expectedVids + + # Serialise/deserialise + block: + let dbBlob = db.blobify + + # Deserialise + let db1 = block: + let rc = dbBlob.deblobify AristoDbRef + if rc.isErr: + check rc.isOk + rc.get(otherwise = AristoDbRef()) + + check db.vidGen == db1.vidGen + + # Make sure that recycled numbers are fetched first + let topVid = db.vidGen[^1] + while 1 < db.vidGen.len: + let w = VertexID.new(db) + check w < topVid + check db.vidGen.len == 1 and db.vidGen[0] == topVid + + # Get some consecutive vertex IDs + for n in 0 .. 5: + let w = VertexID.new(db) + check w == topVid + n + check db.vidGen.len == 1 + + # Repeat last test after clearing the cache + db.vidGen.setLen(0) + for n in 0 .. 5: + let w = VertexID.new(db) + check w == 1.VertexID + n + check db.vidGen.len == 1 + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------