diff --git a/nimbus/db/aristo/aristo_blobify.nim b/nimbus/db/aristo/aristo_blobify.nim index 98d287ee4..7bc68c50a 100644 --- a/nimbus/db/aristo/aristo_blobify.nim +++ b/nimbus/db/aristo/aristo_blobify.nim @@ -159,7 +159,7 @@ proc blobifyTo*(pyl: LeafPayload, data: var seq[byte]) = data &= pyl.stoData.blobify().data data &= [0x20.byte] -proc blobifyTo*(vtx: VertexRef; key: HashKey, data: var seq[byte]): Result[void,AristoError] = +proc blobifyTo*(vtx: VertexRef, key: HashKey, data: var seq[byte]) = ## This function serialises the vertex argument to a database record. ## Contrary to RLP based serialisation, these records aim to align on ## fixed byte boundaries. @@ -181,72 +181,53 @@ proc blobifyTo*(vtx: VertexRef; key: HashKey, data: var seq[byte]): Result[void, ## :: ## 8 * n * ((access shr (n * 4)) and 15) ## - if not vtx.isValid: - return err(BlobifyNilVertex) - case vtx.vType: - of Branch: - let code = if key.isValid: - data.add byte(key.len) - data.add key.data() - # TODO using 0 here for legacy reasons - a bit flag would be easier - 0'u8 shl 6 - else: - 2'u8 shl 6 - var - lens = 0u64 - pos = data.len - for n in 0..15: - if vtx.bVid[n].isValid: - let tmp = vtx.bVid[n].blobify() - lens += uint64(tmp.len) shl (n * 4) - data &= tmp.data() - if data.len == pos: - return err(BlobifyBranchMissingRefs) + doAssert vtx.isValid - let - pSegm = - if vtx.pfx.len > 0: - vtx.pfx.toHexPrefix(isleaf = false) - else: - default(HexPrefixBuf) - psLen = pSegm.len.byte - if 33 < psLen: - return err(BlobifyExtPathOverflow) + let + bits = + case vtx.vType + of Branch: + let bits = + if key.isValid and key.len == 32: + # Shorter keys can be loaded from the vertex directly + data.add key.data() + 0b10'u8 + else: + 0b00'u8 - data &= pSegm.data() - data &= lens.toBytesBE - data &= [code or psLen] + data.add vtx.startVid.blobify().data() + data.add toBytesBE(vtx.used) + bits + of Leaf: + vtx.lData.blobifyTo(data) + 0b01'u8 - of Leaf: - let - pSegm = vtx.pfx.toHexPrefix(isleaf = true) - psLen = pSegm.len.byte - if psLen == 0 or 33 < psLen: - return err(BlobifyLeafPathOverflow) - vtx.lData.blobifyTo(data) - data &= pSegm.data() - data &= [(3'u8 shl 6) or psLen] + pSegm = + if vtx.pfx.len > 0: + vtx.pfx.toHexPrefix(isleaf = vtx.vType == Leaf) + else: + default(HexPrefixBuf) + psLen = pSegm.len.byte - ok() + data &= pSegm.data() + data &= [(bits shl 6) or psLen] proc blobify*(vtx: VertexRef, key: HashKey): seq[byte] = ## Variant of `blobify()` result = newSeqOfCap[byte](128) - if vtx.blobifyTo(key, result).isErr: - result.setLen(0) # blobify only fails on invalid verticies + vtx.blobifyTo(key, result) -proc blobifyTo*(lSst: SavedState; data: var seq[byte]): Result[void,AristoError] = +proc blobifyTo*(lSst: SavedState; data: var seq[byte]) = ## Serialise a last saved state record data.add lSst.key.data data.add lSst.serial.toBytesBE data.add @[0x7fu8] - ok() -proc blobify*(lSst: SavedState): Result[seq[byte],AristoError] = +proc blobify*(lSst: SavedState): seq[byte] = ## Variant of `blobify()` var data: seq[byte] - ? lSst.blobifyTo data - ok(move(data)) + lSst.blobifyTo data + data # ------------- proc deblobify( @@ -296,11 +277,10 @@ proc deblobifyType*(record: openArray[byte]; T: type VertexRef): if record.len < 3: # minimum `Leaf` record return err(DeblobVtxTooShort) - ok case record[^1] shr 6: - of 0, 2: Branch - of 3: Leaf + ok if ((record[^1] shr 6) and 0b01'u8) > 0: + Leaf else: - return err(DeblobUnknown) + Branch proc deblobify*( record: openArray[byte]; @@ -308,67 +288,44 @@ proc deblobify*( ): Result[T,AristoError] = ## De-serialise a data record encoded with `blobify()`. The second ## argument `vtx` can be `nil`. - if record.len < 3: # minimum `Leaf` record + if record.len < 3: # minimum `Leaf` record return err(DeblobVtxTooShort) - let kind = record[^1] shr 6 - let start = if kind == 0: - int(record[0] + 1) - else: - 0 - ok case kind: - of 0, 2: # `Branch` vertex - if record.len - start < 11: # at least two edges - return err(DeblobBranchTooShort) + + let + bits = record[^1] shr 6 + vType = if (bits and 0b01'u8) > 0: Leaf else: Branch + hasKey = (bits and 0b10'u8) > 0 + psLen = int(record[^1] and 0b00111111) + start = if hasKey: 32 else: 0 + + if psLen > record.len - 2 or start > record.len - 2 - psLen: + return err(DeblobBranchTooShort) + + let + psPos = record.len - psLen - 1 + (_, pathSegment) = + NibblesBuf.fromHexPrefix record.toOpenArray(psPos, record.len - 2) + + ok case vType + of Branch: + var pos = start let - aInx = record.len - 9 - aIny = record.len - 2 - var - offs = start - lens = uint64.fromBytesBE record.toOpenArray(aInx, aIny) # bitmap - vtxList: array[16,VertexID] - n = 0 - while lens != 0: - let len = lens and 0b1111 - if len > 0: - vtxList[n] = VertexID(? load64(record, offs, int(len))) - inc n - lens = lens shr 4 + svLen = psPos - pos - 2 + startVid = VertexID(?load64(record, pos, svLen)) + used = uint16.fromBytesBE(record.toOpenArray(pos, pos + 1)) - let (isLeaf, pathSegment) = - NibblesBuf.fromHexPrefix record.toOpenArray(offs, aInx - 1) - if isLeaf: - return err(DeblobBranchGotLeafPrefix) + pos += 2 - # End `while` - VertexRef( - vType: Branch, - pfx: pathSegment, - bVid: vtxList) + VertexRef(vType: Branch, pfx: pathSegment, startVid: startVid, used: used) + of Leaf: + let vtx = VertexRef(vType: Leaf, pfx: pathSegment) - of 3: # `Leaf` vertex - let - sLen = record[^1].int and 0x3f # length of path segment - rLen = record.len - 1 # payload + path segment - pLen = rLen - sLen # payload length - if rLen < sLen or pLen < 1: - return err(DeblobLeafSizeGarbled) - let (isLeaf, pathSegment) = - NibblesBuf.fromHexPrefix record.toOpenArray(pLen, rLen-1) - if not isLeaf: - return err(DeblobLeafGotExtPrefix) - let vtx = VertexRef( - vType: Leaf, - pfx: pathSegment) - - ? record.toOpenArray(start, pLen - 1).deblobify(vtx.lData) + ?record.toOpenArray(start, psPos - 1).deblobify(vtx.lData) vtx - else: - return err(DeblobUnknown) - proc deblobify*(record: openArray[byte], T: type HashKey): Opt[HashKey] = - if record.len > 1 and ((record[^1] shr 6) == 0) and (int(record[0]) + 1) < record.len: - HashKey.fromBytes(record.toOpenArray(1, int(record[0]))) + if record.len > 33 and (((record[^1] shr 6) and 0b10'u8) > 0): + HashKey.fromBytes(record.toOpenArray(0, 31)) else: Opt.none(HashKey) diff --git a/nimbus/db/aristo/aristo_check/check_be.nim b/nimbus/db/aristo/aristo_check/check_be.nim index e7d98f229..8e830fb46 100644 --- a/nimbus/db/aristo/aristo_check/check_be.nim +++ b/nimbus/db/aristo/aristo_check/check_be.nim @@ -42,11 +42,10 @@ proc checkBE*[T: RdbBackendRef|MemBackendRef|VoidBackendRef]( of Branch: block check42Links: var seen = false - for n in 0 .. 15: - if vtx.bVid[n].isValid: - if seen: - break check42Links - seen = true + for _, _ in vtx.pairs(): + if seen: + break check42Links + seen = true return err((rvid.vid,CheckBeVtxBranchLinksMissing)) for (rvid,key) in T.walkKeyBe db: diff --git a/nimbus/db/aristo/aristo_check/check_top.nim b/nimbus/db/aristo/aristo_check/check_top.nim index 27b9d79bc..331db8a02 100644 --- a/nimbus/db/aristo/aristo_check/check_top.nim +++ b/nimbus/db/aristo/aristo_check/check_top.nim @@ -100,11 +100,10 @@ proc checkTopCommon*( of Branch: block check42Links: var seen = false - for n in 0 .. 15: - if vtx.bVid[n].isValid: - if seen: - break check42Links - seen = true + for _, _ in vtx.pairs(): + if seen: + break check42Links + seen = true return err((rvid.vid,CheckAnyVtxBranchLinksMissing)) else: nNilVtx.inc diff --git a/nimbus/db/aristo/aristo_compute.nim b/nimbus/db/aristo/aristo_compute.nim index 8bbb6c29c..591ca68e4 100644 --- a/nimbus/db/aristo/aristo_compute.nim +++ b/nimbus/db/aristo/aristo_compute.nim @@ -11,102 +11,13 @@ {.push raises: [].} import - system/ansi_c, - std/[strformat, math, hashes], - stew/staticfor, + std/[strformat, math], chronicles, eth/common, results, - "."/[aristo_desc, aristo_get, aristo_serialise, aristo_walk/persistent], + "."/[aristo_desc, aristo_get, aristo_walk/persistent], ./aristo_desc/desc_backend -type BasicBloomFilter = object - # School book implementation of bloom filter based on - # https://github.com/save-buffer/bloomfilter_benchmarks. - # - # In theory, this bloom filter could be turned into a reusable component but - # it is fairly specialised to the particular use case and gets used in a - # tight/hot loop in the code - a generalisation would require care so as not - # to introduce overhead but could of course be further optimised using - bytes: ptr UncheckedArray[byte] - -proc computeBits(n: int, epsilon: float): int = - # Number of bits in the bloom filter required for n elements and eposilon - # false positive rate - int(-1.4427 * float(n) * log2(epsilon) + 0.5) - -proc computeHashFns(epsilon: float): int = - # Number of hash functions given the desired false positive rate - int(-log2(epsilon) + 0.5) - -const - bloomRate = 0.002 - # The leaf cache computation is fairly sensitive to false positives as these - # ripple up the branch trie with false postivies being amplified by trie - # branching - this has to be balanced with the cost which - # goes up fairly quickly with ~13 bits per key at 0.002, meaning ~2gb of - # memory for the current setting below! - bloomHashes = computeHashFns(bloomRate) - expectedKeys = 1500000000 - # expected number of elements in the bloom filter - this is reported as - # `keys` below and will need adjusting - the value is more or less accurate - # on mainnet as of block 2100000 (~oct 2024) for the number of leaves - # present - we use leaf count because bloom filter accuracy is most - # important for the first round of branches. - # TODO rocksdb can estimate the number of keys present in the vertex table - - # this would provide a reasonable estimate of what the bloom table size - # should be, though in reality we want leaf count per above argument - - # at the time of writing leaves make up around 3/4 of all verticies - bloomSize = uint32((computeBits(expectedKeys, bloomRate) + 7) / 8) - -func hashes(v: uint64): (uint32, uint32) = - # Use the two halves of an uint64 to create two independent hashes functions - # for the bloom that allow efficiently generating more bloom hash functions - # per Kirsch and Mitzenmacher: - # https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf - let - v = uint64(hash(v)) # `hash` for a better spread of bits into upper half - h1 = uint32(v) - h2 = uint32(v shr 32) - (h1, h2) - -func insert(filter: var BasicBloomFilter, v: uint64) = - let (h1, h2) = hashes(v) - - staticFor i, 0 ..< bloomHashes: - let - hash = (h1 + i * h2) - bitIdx = uint8(hash mod 8) - byteIdx = (hash div 8) mod bloomSize - filter.bytes[byteIdx] = filter.bytes[byteIdx] or (1'u8 shl bitIdx) - -func query(filter: BasicBloomFilter, v: uint64): bool = - let (h1, h2) = hashes(v) - - var match = 1'u8 - - staticFor i, 0 ..< bloomHashes: - let - hash = (h1 + i * h2) - bitIdx = uint8(hash mod 8) - byteIdx = (hash div 8) mod bloomSize - match = match and ((filter.bytes[byteIdx] shr bitIdx) and 1) - - match > 0 - -proc init(T: type BasicBloomFilter): T = - # We use the C memory allocator so as to return memory to the operating system - # at the end of the computation - we don't want the one-off blob to remain in - # the hands of the Nim GC. - # `calloc` to get zeroed memory out of the box - let memory = c_calloc(csize_t(bloomSize), 1) - doAssert memory != nil, "Could not allocate memory for bloom filter" - T(bytes: cast[ptr UncheckedArray[byte]](memory)) - -proc release(v: BasicBloomFilter) = - # TODO with orc, this could be a destructor - c_free(v.bytes) - type WriteBatch = tuple[writer: PutHdlRef, count: int, depth: int, prefix: uint64] # Keep write batch size _around_ 1mb, give or take some overhead - this is a @@ -141,12 +52,12 @@ func progress(batch: WriteBatch): string = # looking at the path prefix that we're currently processing &"{(float(batch.prefix) / float(uint64.high)) * 100:02.2f}%" -func enter(batch: var WriteBatch, nibble: int) = +func enter(batch: var WriteBatch, nibble: uint8) = batch.depth += 1 if batch.depth <= 16: batch.prefix += uint64(nibble) shl ((16 - batch.depth) * 4) -func leave(batch: var WriteBatch, nibble: int) = +func leave(batch: var WriteBatch, nibble: uint8) = if batch.depth <= 16: batch.prefix -= uint64(nibble) shl ((16 - batch.depth) * 4) batch.depth -= 1 @@ -196,9 +107,9 @@ template encodeLeaf(w: var RlpWriter, pfx: NibblesBuf, leafData: untyped): HashK w.append(leafData) w.finish().digestTo(HashKey) -template encodeBranch(w: var RlpWriter, subKeyForN: untyped): HashKey = +template encodeBranch(w: var RlpWriter, vtx: VertexRef, subKeyForN: untyped): HashKey = w.startList(17) - for n {.inject.} in 0 .. 15: + for (n {.inject.}, subvid {.inject.}) in vtx.allPairs(): w.append(subKeyForN) w.append EmptyBlob w.finish().digestTo(HashKey) @@ -209,23 +120,26 @@ template encodeExt(w: var RlpWriter, pfx: NibblesBuf, branchKey: HashKey): HashK w.append(branchKey) w.finish().digestTo(HashKey) +proc getKey( + db: AristoDbRef, rvid: RootedVertexID, skipLayers: static bool +): Result[((HashKey, VertexRef), int), AristoError] = + ok when skipLayers: + (?db.getKeyUbe(rvid, {GetVtxFlag.PeekCache}), -2) + else: + ?db.getKeyRc(rvid, {}) + proc computeKeyImpl( db: AristoDbRef, rvid: RootedVertexID, batch: var WriteBatch, - bloom: ptr BasicBloomFilter = nil, + vtxl: (VertexRef, int), + skipLayers: static bool, ): Result[(HashKey, int), AristoError] = # The bloom filter available used only when creating the key cache from an # empty state - if bloom == nil or bloom[].query(uint64(rvid.vid)): - db.getKeyRc(rvid).isErrOr: - # Value cached either in layers or database - return ok value - - let (vtx, vl) = ?db.getVtxRc(rvid, {GetVtxFlag.PeekCache}) - # Top-most level of all the verticies this hash compution depends on - var level = vl + # Top-most level of all the verticies this hash computation depends on + var (vtx, level) = vtxl # TODO this is the same code as when serializing NodeRef, without the NodeRef var writer = initRlpWriter() @@ -240,8 +154,16 @@ proc computeKeyImpl( stoID = vtx.lData.stoID skey = if stoID.isValid: - let (skey, sl) = - ?db.computeKeyImpl((stoID.vid, stoID.vid), batch, bloom) + let + keyvtxl = ?db.getKey((stoID.vid, stoID.vid), skipLayers) + (skey, sl) = + if keyvtxl[0][0].isValid: + (keyvtxl[0][0], keyvtxl[1]) + else: + let vtxl = (keyvtxl[0][1], keyvtxl[1]) + ?db.computeKeyImpl( + (stoID.vid, stoID.vid), batch, vtxl, skipLayers = skipLayers + ) level = maxLevel(level, sl) skey else: @@ -257,12 +179,26 @@ proc computeKeyImpl( # TODO avoid memory allocation when encoding storage data rlp.encode(vtx.lData.stoData) of Branch: + # For branches, we need to load the verticies before recursing into them + # to exploit their on-disk order + var keyvtxs: array[16, ((HashKey, VertexRef), int)] + for n, subvid in vtx.pairs: + keyvtxs[n] = ?db.getKey((rvid.root, subvid), skipLayers) + template writeBranch(w: var RlpWriter): HashKey = - w.encodeBranch: - let vid = vtx.bVid[n] - if vid.isValid: + w.encodeBranch(vtx): + if subvid.isValid: batch.enter(n) - let (bkey, bl) = ?db.computeKeyImpl((rvid.root, vid), batch, bloom) + let (bkey, bl) = + if keyvtxs[n][0][0].isValid: + (keyvtxs[n][0][0], keyvtxs[n][1]) + else: + ?db.computeKeyImpl( + (rvid.root, subvid), + batch, + (keyvtxs[n][0][1], keyvtxs[n][1]), + skipLayers = skipLayers, + ) batch.leave(n) level = maxLevel(level, bl) @@ -289,10 +225,19 @@ proc computeKeyImpl( ok (key, level) proc computeKeyImpl( - db: AristoDbRef, rvid: RootedVertexID, bloom: ptr BasicBloomFilter + db: AristoDbRef, rvid: RootedVertexID, skipLayers: static bool ): Result[HashKey, AristoError] = + let (keyvtx, level) = + when skipLayers: + (?db.getKeyUbe(rvid, {GetVtxFlag.PeekCache}), -2) + else: + ?db.getKeyRc(rvid, {}) + + if keyvtx[0].isValid: + return ok(keyvtx[0]) + var batch: WriteBatch - let res = computeKeyImpl(db, rvid, batch, bloom) + let res = computeKeyImpl(db, rvid, batch, (keyvtx[1], level), skipLayers = skipLayers) if res.isOk: ?batch.flush(db) @@ -313,208 +258,12 @@ proc computeKey*( ## state/hash, it must be converted to a `Hash32` (using (`.to(Hash32)`) as ## in `db.computeKey(rvid).value.to(Hash32)` which always results in a ## 32 byte value. - computeKeyImpl(db, rvid, nil) - -proc computeLeafKeysImpl( - T: type, db: AristoDbRef, root: VertexID -): Result[void, AristoError] = - # Key computation function that works by iterating over the entries in the - # database (instead of traversing trie using point lookups) - due to how - # rocksdb is organised, this cache-friendly traversal order turns out to be - # more efficient even if we "touch" a lot of irrelevant entries. - # Computation works bottom-up starting with the leaves and proceeding with - # branches whose children were computed in the previous round one "layer" - # at a time until the the number of successfully computed nodes grows low. - # TODO progress indicator - - block: - if db.getKeyUbe((root, root)).isOk(): - return ok() # Fast path for when the root is in the database already - - # Smoke check to see if we can find lots of branch nodes with keys already - var branches, found: int - for (rvid, vtx) in T.walkVtxBe(db, {Branch}): - branches += 1 - - if db.getKeyUbe(rvid).isOk: - found += 1 - - # 10% found on the initial sample.. good enough? Some more randomness - # here would maybe make sense - if branches > 1000: - if found * 10 > branches: - return ok() - break - - - info "Writing key cache (this may take a while)" - - var batch: WriteBatch - - # Bloom filter keeping track of keys we're added to the database already so - # as to avoid expensive speculative lookups - var bloom = BasicBloomFilter.init() - defer: - bloom.release() - - var - # Reuse rlp writers to avoid superfluous memory allocations - writer = initRlpWriter() - writer2 = initRlpWriter() - writer3 = initRlpWriter() - level = 0 - leaves = 0 - - # Load leaves into bloom filter so we can quickly skip over branch nodes where - # we know the lookup will fail. - # At the time of writing, this is roughly 3/4 of the of the entries in the - # database on mainnet - the ratio roughly corresponds to the fill ratio of the - # deepest branch nodes as nodes close to the MPT root don't come in - # significant numbers - # Leaf keys are not computed to save space - instead, if they are needed they - # are computed from the leaf data. - for (rvid, vtx) in T.walkVtxBe(db, {Leaf}): - if vtx.lData.pType == AccountData and vtx.lData.stoID.isValid: - # Accounts whose key depends on the storage trie typically will not yet - # have their root node computed and several such contracts are - # significant in size, meaning that we might as well let their leaves - # be computed and then top up during regular trie traversal. - continue - - bloom.insert(uint64(rvid.vid)) - leaves += 1 - - # The leaves have been loaded into the bloom filter - we'll now proceed to - # branches expecting diminishing returns for each layer - not only beacuse - # there are fewer nodes closer to the root in the trie but also because leaves - # we skipped over lead larger and larger branch gaps and the advantage of - # iterating in disk order is lost - var lastRound = leaves - - level += 1 - - # 16*16 looks like "2 levels of MPT" but in reality, the branch nodes close - # to the leaves are sparse - on average about 4 nodes per branch on mainnet - - # meaning that we'll do 3-4 levels of branch depending on the network - var branches = 0 - while lastRound > (leaves div (16 * 16)): - info "Starting branch layer", keys = batch.count, lastRound, level - var round = 0 - branches = 0 - - for (rvid, vtx) in T.walkVtxBe(db, {Branch}): - branches += 1 - - if vtx.pfx.len > 0: - # TODO there shouldn't be many extension nodes - is it worth the lookup? - continue - - if level > 1: - # A hit on the bloom filter here means we **maybe** already computed a - # key for this branch node - we could verify this with a lookup but - # the generally low false positive rate makes this check more expensive - # than simply revisiting the node using trie traversal. - if bloom.query(uint64(rvid.vid)): - continue - - block branchKey: - for b in vtx.bVid: - if b.isValid and not bloom.query(uint64(b)): - # If any child is missing from the branch, we can't compute the key - # trivially - break branchKey - - writer.clear() - let key = writer.encodeBranch: - let vid = vtx.bVid[n] - if vid.isValid: - let bkeyOpt = - if level == 1: # No leaf keys in database - Result[HashKey, AristoError].err(GetKeyNotFound) - else: - db.getKeyUbe((rvid.root, vid)) - bkeyOpt.valueOr: - let bvtx = db.getVtxUbe((rvid.root, vid)).valueOr: - # Incomplete database? - break branchKey - - if bvtx == nil or ( - bvtx.vType == Leaf and bvtx.lData.pType == AccountData and - bvtx.lData.stoID.isValid - ): - # It's unlikely storage root key has been computed already, so - # skip - # TODO maybe worth revisting - a not insignificant number of - # contracts have only a leaf storage slot so for those we - # could trivially compute account storage root.. - break branchKey - case bvtx.vType - of Leaf: - writer2.clear() - - writer2.encodeLeaf(bvtx.pfx): - writer3.clear() - case bvtx.lData.pType - of AccountData: - writer3.append Account( - nonce: bvtx.lData.account.nonce, - balance: bvtx.lData.account.balance, - # Accounts with storage filtered out above - storageRoot: EMPTY_ROOT_HASH, - codeHash: bvtx.lData.account.codeHash, - ) - of StoData: - writer3.append(bvtx.lData.stoData) - writer3.finish() - of Branch: - break branchKey - else: - VOID_HASH_KEY - - ?batch.putVtx(db, rvid, vtx, key) - - if batch.count mod batchSize == 0: - ?batch.flush(db) - if batch.count mod (batchSize * 100) == 0: - info "Writing branches", keys = batch.count, round, level - else: - debug "Writing branches", keys = batch.count, round, level - - round += 1 - bloom.insert(uint64(rvid.vid)) - - lastRound = round - level += 1 - - ?batch.flush(db) - - info "Key cache base written", - keys = batch.count, lastRound, leaves, branches - - let rc = computeKeyImpl(db, (root, root), addr bloom) - if rc.isOk() or rc.error() == GetVtxNotFound: - # When there's no root vertex, the database is likely empty - ok() - else: - err(rc.error()) + computeKeyImpl(db, rvid, skipLayers = false) proc computeKeys*(db: AristoDbRef, root: VertexID): Result[void, AristoError] = - ## Computing the leaf keys is a pre-processing step for when hash cache is - ## empty. - ## - ## Computing it by traversing the trie can take days because of the mismatch - ## between trie traversal order and the on-disk VertexID-based sorting. - ## - ## This implementation speeds up the inital seeding of the cache by traversing - ## the full state in on-disk order and computing hashes bottom-up instead. - - case db.backend.kind - of BackendMemory: - MemBackendRef.computeLeafKeysImpl db, root - of BackendRocksDB, BackendRdbHosting: - RdbBackendRef.computeLeafKeysImpl db, root - of BackendVoid: - ok() + ## Ensure that key cache is topped up with the latest state root + discard db.computeKeyImpl((root, root), skipLayers = true) + ok() # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/aristo/aristo_debug.nim b/nimbus/db/aristo/aristo_debug.nim index 1b49c43f9..bec16559f 100644 --- a/nimbus/db/aristo/aristo_debug.nim +++ b/nimbus/db/aristo/aristo_debug.nim @@ -200,9 +200,9 @@ func ppVtx(nd: VertexRef, db: AristoDbRef, rvid: RootedVertexID): string = result &= nd.pfx.ppPathPfx & "," & nd.lData.ppPayload(db) of Branch: result &= nd.pfx.ppPathPfx & ":" - for n in 0..15: - if nd.bVid[n].isValid: - result &= nd.bVid[n].ppVid + for n in 0'u8..15'u8: + if nd.bVid(n).isValid: + result &= nd.bVid(n).ppVid if n < 15: result &= "," result &= ")" @@ -238,12 +238,12 @@ proc ppNode( else: result &= nd.vtx.lData.ppPayload(db) of Branch: - let keyOnly = nd.vtx.bVid.toSeq.filterIt(it.isValid).len == 0 + let keyOnly = nd.vtx.subVids.toSeq.filterIt(it.isValid).len == 0 result &= nd.vtx.pfx.ppPathPfx & ":" - for n in 0..15: - if nd.vtx.bVid[n].isValid: - let tag = db.ppKeyOk(nd.key[n],(rvid.root,nd.vtx.bVid[n])) - result &= nd.vtx.bVid[n].ppVid & tag + for n in 0'u8..15'u8: + if nd.vtx.bVid(n).isValid: + let tag = db.ppKeyOk(nd.key[n],(rvid.root,nd.vtx.bVid(n))) + result &= nd.vtx.bVid(n).ppVid & tag elif keyOnly and nd.key[n].isValid: result &= nd.key[n].ppKey(db) if n < 15: diff --git a/nimbus/db/aristo/aristo_delete.nim b/nimbus/db/aristo/aristo_delete.nim index 504609f20..86a4cd6ab 100644 --- a/nimbus/db/aristo/aristo_delete.nim +++ b/nimbus/db/aristo/aristo_delete.nim @@ -25,14 +25,14 @@ import # Private heplers # ------------------------------------------------------------------------------ -proc branchStillNeeded(vtx: VertexRef, removed: int): Result[int,void] = +proc branchStillNeeded(vtx: VertexRef, removed: int8): Result[int8,void] = ## Returns the nibble if there is only one reference left. - var nibble = -1 - for n in 0 .. 15: + var nibble = -1'i8 + for n in 0'i8 .. 15'i8: if n == removed: continue - if vtx.bVid[n].isValid: + if vtx.bVid(uint8 n).isValid: if 0 <= nibble: return ok(-1) nibble = n @@ -84,7 +84,7 @@ proc deleteImpl( # Get child vertex (there must be one after a `Branch` node) let - vid = br.vtx.bVid[nbl] + vid = br.vtx.bVid(uint8 nbl) nxt = db.getVtx (hike.root, vid) if not nxt.isValid: return err(DelVidStaleVtx) @@ -103,7 +103,8 @@ proc deleteImpl( VertexRef( vType: Branch, pfx: br.vtx.pfx & NibblesBuf.nibble(nbl.byte) & nxt.pfx, - bVid: nxt.bVid) + startVid: nxt.startVid, + used: nxt.used) # Put the new vertex at the id of the obsolete branch db.layersPutVtx((hike.root, br.vid), vtx) @@ -115,7 +116,7 @@ proc deleteImpl( else: # Clear the removed leaf from the branch (that still contains other children) let brDup = br.vtx.dup - brDup.bVid[hike.legs[^2].nibble] = VertexID(0) + discard brDup.setUsed(uint8 hike.legs[^2].nibble, false) db.layersPutVtx((hike.root, br.vid), brDup) ok(nil) diff --git a/nimbus/db/aristo/aristo_delete/delete_subtree.nim b/nimbus/db/aristo/aristo_delete/delete_subtree.nim index cbbdb5694..038c01f8c 100644 --- a/nimbus/db/aristo/aristo_delete/delete_subtree.nim +++ b/nimbus/db/aristo/aristo_delete/delete_subtree.nim @@ -29,9 +29,8 @@ proc delSubTreeNow( return err(error) if vtx.vType == Branch: - for n in 0..15: - if vtx.bVid[n].isValid: - ? db.delSubTreeNow((rvid.root,vtx.bVid[n])) + for _, subvid in vtx.pairs(): + ? db.delSubTreeNow((rvid.root, subvid)) db.layersResVtx(rvid) @@ -53,11 +52,10 @@ proc delStoTreeNow( case vtx.vType of Branch: - for i in 0..15: - if vtx.bVid[i].isValid: - ? db.delStoTreeNow( - (rvid.root, vtx.bVid[i]), accPath, - stoPath & vtx.pfx & NibblesBuf.nibble(byte i)) + for n, subvid in vtx.pairs(): + ? db.delStoTreeNow( + (rvid.root, subvid), accPath, + stoPath & vtx.pfx & NibblesBuf.nibble(n)) of Leaf: let stoPath = Hash32((stoPath & vtx.pfx).getBytes()) diff --git a/nimbus/db/aristo/aristo_delta/delta_reverse.nim b/nimbus/db/aristo/aristo_delta/delta_reverse.nim index 308e54089..4db807f46 100644 --- a/nimbus/db/aristo/aristo_delta/delta_reverse.nim +++ b/nimbus/db/aristo/aristo_delta/delta_reverse.nim @@ -35,9 +35,9 @@ proc revSubTree( return err((rvid.vid,rc.error)) key = block: - let rc = db.getKeyUbe rvid + let rc = db.getKeyUbe(rvid, {}) if rc.isOk: - rc.value + rc.value[0] elif rc.error == GetKeyNotFound: VOID_HASH_KEY else: @@ -89,9 +89,9 @@ proc revFilter*( # Calculate reverse changes for the `kMap[]` structural table. for rvid in filter.kMap.keys: - let rc = db.getKeyUbe rvid + let rc = db.getKeyUbe(rvid, {}) if rc.isOk: - rev.kMap[rvid] = rc.value + rev.kMap[rvid] = rc.value[0] elif rc.error == GetKeyNotFound: rev.kMap[rvid] = VOID_HASH_KEY else: diff --git a/nimbus/db/aristo/aristo_desc/desc_backend.nim b/nimbus/db/aristo/aristo_desc/desc_backend.nim index 6a144cbe0..d206a6a9f 100644 --- a/nimbus/db/aristo/aristo_desc/desc_backend.nim +++ b/nimbus/db/aristo/aristo_desc/desc_backend.nim @@ -25,7 +25,7 @@ type ## `Aristo DB` data record. GetKeyFn* = - proc(rvid: RootedVertexID): Result[HashKey,AristoError] {.gcsafe, raises: [].} + proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[(HashKey, VertexRef),AristoError] {.gcsafe, raises: [].} ## Generic backend database retrieval function for a single ## `Aristo DB` hash lookup value. diff --git a/nimbus/db/aristo/aristo_desc/desc_identifiers.nim b/nimbus/db/aristo/aristo_desc/desc_identifiers.nim index 350e1711f..231526968 100644 --- a/nimbus/db/aristo/aristo_desc/desc_identifiers.nim +++ b/nimbus/db/aristo/aristo_desc/desc_identifiers.nim @@ -383,6 +383,12 @@ func hash*(a: HashKey): Hash = ## Table/KeyedQueue mixin hash(a.data) +func append*(w: var RlpWriter; key: HashKey) = + if 1 < key.len and key.len < 32: + w.appendRawBytes key.data + else: + w.append key.data + # ------------------------------------------------------------------------------ # Miscellaneous helpers # ------------------------------------------------------------------------------ diff --git a/nimbus/db/aristo/aristo_desc/desc_structural.nim b/nimbus/db/aristo/aristo_desc/desc_structural.nim index 216f4f368..f3e5f4ba9 100644 --- a/nimbus/db/aristo/aristo_desc/desc_structural.nim +++ b/nimbus/db/aristo/aristo_desc/desc_structural.nim @@ -73,7 +73,8 @@ type of Leaf: lData*: LeafPayload ## Reference to data payload of Branch: - bVid*: array[16,VertexID] ## Edge list with vertex IDs + startVid*: VertexID + used*: uint16 NodeRef* = ref object of RootRef ## Combined record for a *traditional* ``Merkle Patricia Tree` node merged @@ -135,6 +136,20 @@ type # Public helpers (misc) # ------------------------------------------------------------------------------ +func bVid*(vtx: VertexRef, nibble: uint8): VertexID = + if (vtx.used and (1'u16 shl nibble)) > 0: + VertexID(uint64(vtx.startVid) + nibble) + else: + default(VertexID) + +func setUsed*(vtx: VertexRef, nibble: uint8, used: static bool): VertexID = + vtx.used = + when used: + vtx.used or (1'u16 shl nibble) + else: + vtx.used and (not (1'u16 shl nibble)) + vtx.bVid(nibble) + func init*(T: type LayerRef): T = ## Constructor, returns empty layer T() @@ -176,18 +191,41 @@ proc `==`*(a, b: VertexRef): bool = if a.pfx != b.pfx or a.lData != b.lData: return false of Branch: - if a.pfx != b.pfx or a.bVid != b.bVid: + if a.pfx != b.pfx or a.startVid != b.startVid or a.used != b.used: return false true +iterator pairs*(vtx: VertexRef): tuple[nibble: uint8, vid: VertexID] = + ## Iterates over the sub-vids of a branch (does nothing for leaves) + case vtx.vType: + of Leaf: + discard + of Branch: + for n in 0'u8 .. 15'u8: + if (vtx.used and (1'u16 shl n)) > 0: + yield (n, VertexID(uint64(vtx.startVid) + n)) + +iterator allPairs*(vtx: VertexRef): tuple[nibble: uint8, vid: VertexID] = + ## Iterates over the sub-vids of a branch (does nothing for leaves) including + ## currently unset nodes + case vtx.vType: + of Leaf: + discard + of Branch: + for n in 0'u8 .. 15'u8: + if (vtx.used and (1'u16 shl n)) > 0: + yield (n, VertexID(uint64(vtx.startVid) + n)) + else: + yield (n, default(VertexID)) + proc `==`*(a, b: NodeRef): bool = ## Beware, potential deep comparison if a.vtx != b.vtx: return false case a.vtx.vType: of Branch: - for n in 0..15: - if a.vtx.bVid[n] != 0.VertexID or b.vtx.bVid[n] != 0.VertexID: + for n in 0'u8..15'u8: + if a.vtx.bVid(n) != 0.VertexID or b.vtx.bVid(n) != 0.VertexID: if a.key[n] != b.key[n]: return false else: @@ -228,7 +266,8 @@ func dup*(vtx: VertexRef): VertexRef = VertexRef( vType: Branch, pfx: vtx.pfx, - bVid: vtx.bVid) + startVid: vtx.startVid, + used: vtx.used) func dup*(node: NodeRef): NodeRef = ## Duplicate node. diff --git a/nimbus/db/aristo/aristo_fetch.nim b/nimbus/db/aristo/aristo_fetch.nim index 8d4354b21..3e9128230 100644 --- a/nimbus/db/aristo/aristo_fetch.nim +++ b/nimbus/db/aristo/aristo_fetch.nim @@ -83,7 +83,7 @@ proc retrieveMerkleHash( ): Result[Hash32,AristoError] = let key = db.computeKey((root, root)).valueOr: - if error == GetVtxNotFound: + if error in [GetVtxNotFound, GetKeyNotFound]: return ok(EMPTY_ROOT_HASH) return err(error) diff --git a/nimbus/db/aristo/aristo_get.nim b/nimbus/db/aristo/aristo_get.nim index 387f7c054..f50f63072 100644 --- a/nimbus/db/aristo/aristo_get.nim +++ b/nimbus/db/aristo/aristo_get.nim @@ -54,11 +54,12 @@ proc getVtxUbe*( proc getKeyUbe*( db: AristoDbRef; rvid: RootedVertexID; - ): Result[HashKey,AristoError] = + flags: set[GetVtxFlag]; + ): Result[(HashKey, VertexRef),AristoError] = ## Get the Merkle hash/key from the unfiltered backend if available. let be = db.backend if not be.isNil: - return be.getKeyFn rvid + return be.getKeyFn(rvid, flags) err GetKeyNotFound # ------------------ @@ -87,14 +88,18 @@ proc getVtxBE*( proc getKeyBE*( db: AristoDbRef; rvid: RootedVertexID; - ): Result[(HashKey, int),AristoError] = + flags: set[GetVtxFlag]; + ): Result[((HashKey, VertexRef), int),AristoError] = ## Get the merkle hash/key from the (filtered) backend if available. if not db.balancer.isNil: db.balancer.kMap.withValue(rvid, w): if w[].isValid: - return ok((w[], -1)) - return err(GetKeyNotFound) - ok ((?db.getKeyUbe rvid), -2) + return ok(((w[], nil), -1)) + db.balancer.sTab.withValue(rvid, s): + if s[].isValid: + return ok(((VOID_HASH_KEY, s[]), -1)) + return err(GetKeyNotFound) + ok ((?db.getKeyUbe(rvid, flags)), -2) # ------------------ @@ -124,7 +129,8 @@ proc getVtx*(db: AristoDbRef; rvid: RootedVertexID, flags: set[GetVtxFlag] = {}) ## db.getVtxRc(rvid).valueOr((VertexRef(nil), 0))[0] -proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[(HashKey, int),AristoError] = +proc getKeyRc*( + db: AristoDbRef; rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[((HashKey, VertexRef), int),AristoError] = ## Cascaded attempt to fetch a Merkle hash from the cache layers or the ## backend. This function will never return a `VOID_HASH_KEY` but rather ## some `GetKeyNotFound` or `GetKeyUpdateNeeded` error. @@ -135,27 +141,27 @@ proc getKeyRc*(db: AristoDbRef; rvid: RootedVertexID): Result[(HashKey, int),Ari # If there is a zero key value, the entry is either marked for being # updated or for deletion on the database. So check below. if key[0].isValid: - return ok key + return ok ((key[0], nil), key[1]) # The zero key value does not refer to an update mark if there is no # valid vertex (either on the cache or the backend whatever comes first.) let vtx = db.layersGetVtx(rvid).valueOr: # There was no vertex on the cache. So there must be one the backend (the - # reason for the key lable to exists, at all.) - return err(GetKeyUpdateNeeded) + # reason for the key label to exists, at all.) + return err(GetKeyNotFound) if vtx[0].isValid: - return err(GetKeyUpdateNeeded) + return ok ((VOID_HASH_KEY, vtx[0]), vtx[1]) else: # The vertex is to be deleted. So is the value key. return err(GetKeyNotFound) - db.getKeyBE rvid + db.getKeyBE(rvid, flags) proc getKey*(db: AristoDbRef; rvid: RootedVertexID): HashKey = ## Cascaded attempt to fetch a vertex from the cache layers or the backend. ## The function returns `nil` on error or failure. ## - (db.getKeyRc(rvid).valueOr((VOID_HASH_KEY, 0)))[0] + (db.getKeyRc(rvid, {}).valueOr(((VOID_HASH_KEY, nil), 0)))[0][0] # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/aristo/aristo_hike.nim b/nimbus/db/aristo/aristo_hike.nim index a238bab67..3c51a4ad1 100644 --- a/nimbus/db/aristo/aristo_hike.nim +++ b/nimbus/db/aristo/aristo_hike.nim @@ -91,8 +91,8 @@ proc step*( return err(HikeBranchTailEmpty) let - nibble = path[vtx.pfx.len].int8 - nextVid = vtx.bVid[nibble] + nibble = path[vtx.pfx.len] + nextVid = vtx.bVid(nibble) if not nextVid.isValid: return err(HikeBranchMissingEdge) diff --git a/nimbus/db/aristo/aristo_init/memory_db.nim b/nimbus/db/aristo/aristo_init/memory_db.nim index c3aa46d6e..e70484f60 100644 --- a/nimbus/db/aristo/aristo_init/memory_db.nim +++ b/nimbus/db/aristo/aristo_init/memory_db.nim @@ -97,13 +97,14 @@ proc getVtxFn(db: MemBackendRef): GetVtxFn = proc getKeyFn(db: MemBackendRef): GetKeyFn = result = - proc(rvid: RootedVertexID): Result[HashKey,AristoError] = + proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[(HashKey, VertexRef),AristoError] = let data = db.mdb.sTab.getOrDefault(rvid, EmptyBlob) if 0 < data.len: let key = data.deblobify(HashKey).valueOr: - return err(GetKeyNotFound) - if key.isValid: - return ok(key) + let vtx = data.deblobify(VertexRef).valueOr: + return err(GetKeyNotFound) + return ok((VOID_HASH_KEY, vtx)) + return ok((key, nil)) err(GetKeyNotFound) proc getTuvFn(db: MemBackendRef): GetTuvFn = @@ -150,14 +151,7 @@ proc putLstFn(db: MemBackendRef): PutLstFn = proc(hdl: PutHdlRef; lst: SavedState) = let hdl = hdl.getSession db if hdl.error.isNil: - let rc = lst.blobify # test - if rc.isOk: - hdl.lSst = Opt.some(lst) - else: - hdl.error = TypedPutHdlErrRef( - pfx: AdmPfx, - aid: AdmTabIdLst, - code: rc.error) + hdl.lSst = Opt.some(lst) proc putEndFn(db: MemBackendRef): PutEndFn = result = diff --git a/nimbus/db/aristo/aristo_init/rocks_db.nim b/nimbus/db/aristo/aristo_init/rocks_db.nim index a25b05936..36baa6734 100644 --- a/nimbus/db/aristo/aristo_init/rocks_db.nim +++ b/nimbus/db/aristo/aristo_init/rocks_db.nim @@ -90,15 +90,15 @@ proc getVtxFn(db: RdbBackendRef): GetVtxFn = proc getKeyFn(db: RdbBackendRef): GetKeyFn = result = - proc(rvid: RootedVertexID): Result[HashKey,AristoError] = + proc(rvid: RootedVertexID, flags: set[GetVtxFlag]): Result[(HashKey, VertexRef),AristoError] = # Fetch serialised data record - let key = db.rdb.getKey(rvid).valueOr: + let key = db.rdb.getKey(rvid, flags).valueOr: when extraTraceMessages: trace logTxt "getKeyFn: failed", rvid, error=error[0], info=error[1] return err(error[0]) - if key.isValid: + if (key[0].isValid or key[1].isValid): return ok(key) err(GetKeyNotFound) @@ -173,12 +173,7 @@ proc putLstFn(db: RdbBackendRef): PutLstFn = proc(hdl: PutHdlRef; lst: SavedState) = let hdl = hdl.getSession db if hdl.error.isNil: - let data = lst.blobify.valueOr: - hdl.error = TypedPutHdlErrRef( - pfx: AdmPfx, - aid: AdmTabIdLst, - code: error) - return + let data = lst.blobify db.rdb.putAdm(AdmTabIdLst, data).isOkOr: hdl.error = TypedPutHdlErrRef( pfx: AdmPfx, diff --git a/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim b/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim index aa0914d42..d7df888c8 100644 --- a/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim +++ b/nimbus/db/aristo/aristo_init/rocks_db/rdb_get.nim @@ -22,25 +22,21 @@ import ./rdb_desc, std/concurrency/atomics -const - extraTraceMessages = false - ## Enable additional logging noise +const extraTraceMessages = false ## Enable additional logging noise when extraTraceMessages: - import - chronicles + import chronicles logScope: topics = "aristo-rocksdb" when defined(metrics): - import - metrics - + import metrics + type RdbVtxLruCounter = ref object of Counter RdbKeyLruCounter = ref object of Counter - + var rdbVtxLruStatsMetric {.used.} = RdbVtxLruCounter.newCollector( "aristo_rdb_vtx_lru_total", @@ -50,10 +46,10 @@ when defined(metrics): rdbKeyLruStatsMetric {.used.} = RdbKeyLruCounter.newCollector( "aristo_rdb_key_lru_total", "HashKey LRU lookup", labels = ["state", "hit"] ) - + method collect*(collector: RdbVtxLruCounter, output: MetricHandler) = let timestamp = collector.now() - + # We don't care about synchronization between each type of metric or between # the metrics thread and others since small differences like this don't matter for state in RdbStateType: @@ -66,10 +62,10 @@ when defined(metrics): labelValues = [$state, $vtype, $ord(hit)], timestamp = timestamp, ) - + method collect*(collector: RdbKeyLruCounter, output: MetricHandler) = let timestamp = collector.now() - + for state in RdbStateType: for hit in [false, true]: output( @@ -84,16 +80,16 @@ when defined(metrics): # Public functions # ------------------------------------------------------------------------------ -proc getAdm*(rdb: RdbInst; xid: AdminTabID): Result[seq[byte],(AristoError,string)] = +proc getAdm*(rdb: RdbInst, xid: AdminTabID): Result[seq[byte], (AristoError, string)] = var res: seq[byte] let onData = proc(data: openArray[byte]) = res = @data let gotData = rdb.admCol.get(xid.toOpenArray, onData).valueOr: - const errSym = RdbBeDriverGetAdmError - when extraTraceMessages: - trace logTxt "getAdm", xid, error=errSym, info=error - return err((errSym,error)) + const errSym = RdbBeDriverGetAdmError + when extraTraceMessages: + trace logTxt "getAdm", xid, error = errSym, info = error + return err((errSym, error)) # Correct result if needed if not gotData: @@ -101,43 +97,64 @@ proc getAdm*(rdb: RdbInst; xid: AdminTabID): Result[seq[byte],(AristoError,strin ok move(res) proc getKey*( - rdb: var RdbInst; - rvid: RootedVertexID; - ): Result[HashKey,(AristoError,string)] = - # Try LRU cache first - var rc = rdb.rdKeyLru.get(rvid.vid) - if rc.isOk: - rdbKeyLruStats[rvid.to(RdbStateType)].inc(true) - return ok(move(rc.value)) + rdb: var RdbInst, rvid: RootedVertexID, flags: set[GetVtxFlag] +): Result[(HashKey, VertexRef), (AristoError, string)] = + block: + # Try LRU cache first + let rc = + if GetVtxFlag.PeekCache in flags: + rdb.rdKeyLru.peek(rvid.vid) + else: + rdb.rdKeyLru.get(rvid.vid) - rdbKeyLruStats[rvid.to(RdbStateType)].inc(false) + if rc.isOk: + rdbKeyLruStats[rvid.to(RdbStateType)].inc(true) + return ok((rc.value, nil)) + + rdbKeyLruStats[rvid.to(RdbStateType)].inc(false) + + block: + # We don't store keys for leaves, no need to hit the database + let rc = rdb.rdVtxLru.peek(rvid.vid) + if rc.isOk(): + if rc.value().vType == Leaf: + return ok((VOID_HASH_KEY, rc.value())) # Otherwise fetch from backend database # A threadvar is used to avoid allocating an environment for onData - var res{.threadvar.}: Opt[HashKey] + var res {.threadvar.}: Opt[HashKey] + var vtx {.threadvar.}: Result[VertexRef, AristoError] + let onData = proc(data: openArray[byte]) = res = data.deblobify(HashKey) + if res.isSome(): + reset(vtx) + else: + vtx = data.deblobify(VertexRef) let gotData = rdb.vtxCol.get(rvid.blobify().data(), onData).valueOr: - const errSym = RdbBeDriverGetKeyError - when extraTraceMessages: - trace logTxt "getKey", rvid, error=errSym, info=error - return err((errSym,error)) + const errSym = RdbBeDriverGetKeyError + when extraTraceMessages: + trace logTxt "getKey", rvid, error = errSym, info = error + return err((errSym, error)) - # Correct result if needed - if not gotData or res.isNone(): - res.ok(VOID_HASH_KEY) + if not gotData: + return ok((VOID_HASH_KEY, nil)) - # Update cache and return - rdb.rdKeyLru.put(rvid.vid, res.value()) + # Update cache and return - in peek mode, avoid evicting cache items + if res.isSome() and + (GetVtxFlag.PeekCache notin flags or rdb.rdKeyLru.len < rdb.rdKeyLru.capacity): + rdb.rdKeyLru.put(rvid.vid, res.value()) - ok res.value() + if vtx.isOk() and + (GetVtxFlag.PeekCache notin flags or rdb.rdVtxLru.len < rdb.rdVtxLru.capacity): + rdb.rdVtxLru.put(rvid.vid, vtx.value()) + + ok (res.valueOr(VOID_HASH_KEY), vtx.valueOr(nil)) proc getVtx*( - rdb: var RdbInst; - rvid: RootedVertexID; - flags: set[GetVtxFlag]; - ): Result[VertexRef,(AristoError,string)] = + rdb: var RdbInst, rvid: RootedVertexID, flags: set[GetVtxFlag] +): Result[VertexRef, (AristoError, string)] = # Try LRU cache first var rc = if GetVtxFlag.PeekCache in flags: @@ -151,15 +168,15 @@ proc getVtx*( # Otherwise fetch from backend database # A threadvar is used to avoid allocating an environment for onData - var res {.threadvar.}: Result[VertexRef,AristoError] + var res {.threadvar.}: Result[VertexRef, AristoError] let onData = proc(data: openArray[byte]) = res = data.deblobify(VertexRef) let gotData = rdb.vtxCol.get(rvid.blobify().data(), onData).valueOr: const errSym = RdbBeDriverGetVtxError when extraTraceMessages: - trace logTxt "getVtx", vid, error=errSym, info=error - return err((errSym,error)) + trace logTxt "getVtx", vid, error = errSym, info = error + return err((errSym, error)) if not gotData: # As a hack, we count missing data as leaf nodes diff --git a/nimbus/db/aristo/aristo_merge.nim b/nimbus/db/aristo/aristo_merge.nim index 5a80fc049..89f2c3b0f 100644 --- a/nimbus/db/aristo/aristo_merge.nim +++ b/nimbus/db/aristo/aristo_merge.nim @@ -96,15 +96,13 @@ proc mergePayloadImpl( else: # Turn leaf into a branch (or extension) then insert the two leaves # into the branch - let branch = VertexRef(vType: Branch, pfx: path.slice(0, n)) + let branch = VertexRef(vType: Branch, pfx: path.slice(0, n), startVid: db.vidFetch(16)) let other = block: # Copy of existing leaf node, now one level deeper - let local = db.vidFetch() - branch.bVid[vtx.pfx[n]] = local + let local = branch.setUsed(vtx.pfx[n], true) db.layersPutLeaf((root, local), vtx.pfx.slice(n + 1), vtx.lData) let leafVtx = block: # Newly inserted leaf node - let local = db.vidFetch() - branch.bVid[path[n]] = local + let local = branch.setUsed(path[n], true) db.layersPutLeaf((root, local), path.slice(n + 1), payload) # Put the branch at the vid where the leaf was @@ -121,7 +119,7 @@ proc mergePayloadImpl( # The existing branch is a prefix of the new entry let nibble = path[vtx.pfx.len] - next = vtx.bVid[nibble] + next = vtx.bVid(nibble) if next.isValid: cur = next @@ -135,32 +133,30 @@ proc mergePayloadImpl( else: # There's no vertex at the branch point - insert the payload as a new # leaf and update the existing branch - let - local = db.vidFetch() - leafVtx = db.layersPutLeaf((root, local), path.slice(n + 1), payload) - brDup = vtx.dup() - brDup.bVid[nibble] = local + let brDup = vtx.dup() + let local = brDup.setUsed(nibble, true) db.layersPutVtx((root, cur), brDup) + let + leafVtx = db.layersPutLeaf((root, local), path.slice(n + 1), payload) + resetKeys() return ok((leafVtx, nil, nil)) else: # Partial path match - we need to split the existing branch at # the point of divergence, inserting a new branch - let branch = VertexRef(vType: Branch, pfx: path.slice(0, n)) + let branch = VertexRef(vType: Branch, pfx: path.slice(0, n), startVid: db.vidFetch(16)) block: # Copy the existing vertex and add it to the new branch - let local = db.vidFetch() - branch.bVid[vtx.pfx[n]] = local + let local = branch.setUsed(vtx.pfx[n], true) db.layersPutVtx( (root, local), - VertexRef(vType: Branch, pfx: vtx.pfx.slice(n + 1), bVid: vtx.bVid), + VertexRef(vType: Branch, pfx: vtx.pfx.slice(n + 1), startVid: vtx.startVid, used: vtx.used), ) let leafVtx = block: # add the new entry - let local = db.vidFetch() - branch.bVid[path[n]] = local + let local = branch.setUsed(path[n], true) db.layersPutLeaf((root, local), path.slice(n + 1), payload) db.layersPutVtx((root, cur), branch) diff --git a/nimbus/db/aristo/aristo_nearby.nim b/nimbus/db/aristo/aristo_nearby.nim index cfc850e52..323ead85c 100644 --- a/nimbus/db/aristo/aristo_nearby.nim +++ b/nimbus/db/aristo/aristo_nearby.nim @@ -57,7 +57,7 @@ proc branchNibbleMin*(vtx: VertexRef; minInx: int8): int8 = ## greater or equal the argument `nibble`. if vtx.vType == Branch: for n in minInx .. 15: - if vtx.bVid[n].isValid: + if vtx.bVid(uint8 n).isValid: return n -1 @@ -66,7 +66,7 @@ proc branchNibbleMax*(vtx: VertexRef; maxInx: int8): int8 = ## less or equal the argument `nibble`. if vtx.vType == Branch: for n in maxInx.countdown 0: - if vtx.bVid[n].isValid: + if vtx.bVid(uint8 n).isValid: return n -1 @@ -112,7 +112,7 @@ proc complete( else: leg.nibble = vtx.branchNibbleMax 15 if 0 <= leg.nibble: - vid = vtx.bVid[leg.nibble] + vid = vtx.bVid(uint8 leg.nibble) vtx = db.getVtx (hike.root, vid) if vtx.isValid: uHike.legs.add leg @@ -225,7 +225,7 @@ proc finalise( if 0 <= top.nibble and top.nibble == top.wp.vtx.branchBorderNibble: # Check the following up vertex let - vid = top.wp.vtx.bVid[top.nibble] + vid = top.wp.vtx.bVid(uint8 top.nibble) vtx = db.getVtx (hike.root, vid) if not vtx.isValid: return err((vid,NearbyDanglingLink)) @@ -298,7 +298,7 @@ proc nearbyNext( # Look ahead checking next vertex if start: - let vid = top.wp.vtx.bVid[top.nibble] + let vid = top.wp.vtx.bVid(uint8 top.nibble) if not vid.isValid: return err((top.wp.vid,NearbyDanglingLink)) # error @@ -322,7 +322,7 @@ proc nearbyNext( if 0 <= n: uHike.legs[^1].nibble = n return uHike.complete( - step.wp.vtx.bVid[n], db, hikeLenMax, doLeast=moveRight) + step.wp.vtx.bVid(uint8 n), db, hikeLenMax, doLeast=moveRight) if start: # Retry without look ahead @@ -550,7 +550,7 @@ proc rightMissing*( if top.wp.vtx.vType != Branch or top.nibble < 0: return err(NearbyBranchError) - let vid = top.wp.vtx.bVid[top.nibble] + let vid = top.wp.vtx.bVid(uint8 top.nibble) if not vid.isValid: return err(NearbyDanglingLink) # error diff --git a/nimbus/db/aristo/aristo_part.nim b/nimbus/db/aristo/aristo_part.nim index 202bec2a6..21bea627d 100644 --- a/nimbus/db/aristo/aristo_part.nim +++ b/nimbus/db/aristo/aristo_part.nim @@ -242,7 +242,8 @@ proc partPut*( of Leaf: node.vtx.lData = vtx.lData of Branch: - node.vtx.bVid = vtx.bVid + node.vtx.startVid = vtx.startVid + node.vtx.used = vtx.used ps.addCore(root, key) # register core node ps.pureExt.del key # core node can't be an extension continue @@ -266,7 +267,7 @@ proc partPut*( for n in 0 .. 15: let bKey = node.key[n] if bKey.isValid: - node.vtx.bVid[n] = (? ps.getRvid(root, bKey))[0].vid + doAssert false, "TODO node.vtx.bVid[n] = (? ps.getRvid(root, bKey))[0].vid" ps.addCore(root, key) # register core node ps.pureExt.del key # core node can't be an extension @@ -444,7 +445,7 @@ proc partWithExtEnd*(ps: PartStateRef): Result[void,AristoError] = return err(PartExtVtxHasVanished) if vtx.vType != Branch or vtx.pfx != ext.xPfx or - vtx.bVid != array[16,VertexID].default: + vtx.used != uint16.default: restore() return err(PartExtVtxWasModified) rollback.add (rvid,ext) diff --git a/nimbus/db/aristo/aristo_part/part_chain_rlp.nim b/nimbus/db/aristo/aristo_part/part_chain_rlp.nim index 4bd4acb05..c0b70571d 100644 --- a/nimbus/db/aristo/aristo_part/part_chain_rlp.nim +++ b/nimbus/db/aristo/aristo_part/part_chain_rlp.nim @@ -62,10 +62,10 @@ proc chainRlpNodes*( let nibble = path[nChewOff] rest = path.slice(nChewOff+1) - if not vtx.bVid[nibble].isValid: + if not vtx.bVid(nibble).isValid: return err(PartChnBranchVoidEdge) # Recursion! - db.chainRlpNodes((rvid.root,vtx.bVid[nibble]), rest, chain) + db.chainRlpNodes((rvid.root,vtx.bVid(nibble)), rest, chain) proc trackRlpNodes*( diff --git a/nimbus/db/aristo/aristo_part/part_ctx.nim b/nimbus/db/aristo/aristo_part/part_ctx.nim index a5c70da07..d05166ab5 100644 --- a/nimbus/db/aristo/aristo_part/part_ctx.nim +++ b/nimbus/db/aristo/aristo_part/part_ctx.nim @@ -29,7 +29,7 @@ proc newCtx(ps: PartStateRef; hike: Hike): Result[PartStateCtx,AristoError] = let wp = hike.legs[^1].wp nibble = hike.legs[^1].nibble - fromVid = wp.vtx.bVid[nibble] + fromVid = wp.vtx.bVid(uint8 nibble) if not ps.isPerimeter(fromVid) or ps.isExtension(fromVid): return err(PartCtxNotAvailable) @@ -43,7 +43,7 @@ proc newCtx(ps: PartStateRef; hike: Hike): Result[PartStateCtx,AristoError] = fromVid: fromVid) # Update database so that is space for adding a new sub-tree here - vtx2.bVid[nibble] = VertexID(0) + discard vtx2.setUsed(uint8 nibble, false) ps.db.layersPutVtx(psc.location,vtx2) ok psc @@ -97,12 +97,12 @@ proc ctxAcceptChange(psc: PartStateCtx): Result[bool,AristoError] = ps = psc.ps db = ps.db (vtx,_) = ? db.getVtxRc psc.location - toVid = vtx.bVid[psc.nibble] + toVid = vtx.bVid(uint8 psc.nibble) if not toVid.isValid: # Nothing changed, so restore let vtx2 = vtx.dup - vtx2.bVid[psc.nibble] = psc.fromVid + doAssert false, "TODO vtx2.bVid[psc.nibble] = psc.fromVid" db.layersPutVtx(psc.location, vtx2) ok(false) diff --git a/nimbus/db/aristo/aristo_serialise.nim b/nimbus/db/aristo/aristo_serialise.nim index a89d6ccc9..0092b7ea5 100644 --- a/nimbus/db/aristo/aristo_serialise.nim +++ b/nimbus/db/aristo/aristo_serialise.nim @@ -13,7 +13,7 @@ import eth/[common, rlp], results, - "."/[aristo_constants, aristo_desc, aristo_get] + "."/[aristo_constants, aristo_desc, aristo_compute] type ResolveVidFn = proc( @@ -55,14 +55,6 @@ proc serialise( # Public RLP transcoder mixins # ------------------------------------------------------------------------------ -func append*(w: var RlpWriter; key: HashKey) = - if 1 < key.len and key.len < 32: - w.appendRawBytes key.data - else: - w.append key.data - -# --------------------- - proc to*(node: NodeRef; T: type seq[seq[byte]]): T = ## Convert the argument pait `w` to a single or a double item list item of ## `` type entries. Only in case of a combined extension @@ -150,7 +142,7 @@ proc serialise*( ## of account type, otherwise pass the data as is. ## proc getKey(vid: VertexID): Result[HashKey,AristoError] = - ok (?db.getKeyRc((root, vid)))[0] + ok (?db.computeKey((root, vid))) pyl.serialise getKey diff --git a/nimbus/db/aristo/aristo_tx.nim b/nimbus/db/aristo/aristo_tx.nim index 908ecd121..3909db414 100644 --- a/nimbus/db/aristo/aristo_tx.nim +++ b/nimbus/db/aristo/aristo_tx.nim @@ -159,8 +159,8 @@ proc findTx*( # Try `(vid,key)` on unfiltered backend block: - let beKey = db.getKeyUbe(rvid).valueOr: VOID_HASH_KEY - if beKey == key: + let beKey = db.getKeyUbe(rvid, {}).valueOr: (VOID_HASH_KEY, nil) + if beKey[0] == key: return ok(-2) err(TxNotFound) diff --git a/nimbus/db/aristo/aristo_utils.nim b/nimbus/db/aristo/aristo_utils.nim index b30f72826..d6ffaf134 100644 --- a/nimbus/db/aristo/aristo_utils.nim +++ b/nimbus/db/aristo/aristo_utils.nim @@ -54,12 +54,10 @@ proc toNode*( of Branch: let node = NodeRef(vtx: vtx.dup()) - for n in 0 .. 15: - let vid = vtx.bVid[n] - if vid.isValid: - let key = db.computeKey((root, vid)).valueOr: - return err(@[vid]) - node.key[n] = key + for n, subvid in vtx.pairs(): + let key = db.computeKey((root, subvid)).valueOr: + return err(@[subvid]) + node.key[n] = key return ok node iterator subVids*(vtx: VertexRef): VertexID = @@ -71,9 +69,8 @@ iterator subVids*(vtx: VertexRef): VertexID = if stoID.isValid: yield stoID.vid of Branch: - for vid in vtx.bVid: - if vid.isValid: - yield vid + for _, subvid in vtx.pairs(): + yield subvid iterator subVidKeys*(node: NodeRef): (VertexID,HashKey) = ## Simolar to `subVids()` but for nodes @@ -84,10 +81,8 @@ iterator subVidKeys*(node: NodeRef): (VertexID,HashKey) = if stoID.isValid: yield (stoID.vid, node.key[0]) of Branch: - for n in 0 .. 15: - let vid = node.vtx.bVid[n] - if vid.isValid: - yield (vid,node.key[n]) + for n, subvid in node.vtx.pairs(): + yield (subvid,node.key[n]) # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/aristo/aristo_vid.nim b/nimbus/db/aristo/aristo_vid.nim index 820c7341d..e8afb5691 100644 --- a/nimbus/db/aristo/aristo_vid.nim +++ b/nimbus/db/aristo/aristo_vid.nim @@ -20,14 +20,15 @@ import # Public functions # ------------------------------------------------------------------------------ -proc vidFetch*(db: AristoDbRef): VertexID = +proc vidFetch*(db: AristoDbRef, n = 1): VertexID = ## Fetch next vertex ID. ## if db.top.vTop == 0: db.top.vTop = VertexID(LEAST_FREE_VID) - else: - db.top.vTop.inc - db.top.vTop + var ret = db.top.vTop + ret.inc + db.top.vTop.inc(n) + ret # ------------------------------------------------------------------------------ # End diff --git a/nimbus/db/aristo/aristo_walk/memory_only.nim b/nimbus/db/aristo/aristo_walk/memory_only.nim index 166177ff9..62918cea8 100644 --- a/nimbus/db/aristo/aristo_walk/memory_only.nim +++ b/nimbus/db/aristo/aristo_walk/memory_only.nim @@ -19,7 +19,8 @@ import export memory_db, - memory_only + memory_only, + aristo_desc # ------------------------------------------------------------------------------ # Public iterators (all in one) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 291022851..09b8a8c45 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,41 +1,39 @@ -alabaster==0.7.16 -attrs==23.2.0 -Babel==2.15.0 -cattrs==23.2.3 -certifi==2024.07.04 -charset-normalizer==3.3.2 -contourpy==1.2.1 +# +# This file is autogenerated by pip-compile with Python 3.13 +# by the following command: +# +# pip-compile +# +contourpy==1.3.1 + # via matplotlib cycler==0.12.1 -docutils==0.20.1 -esbonio==0.16.4 -fonttools==4.53.0 -idna==3.7 -imagesize==1.4.1 -Jinja2==3.1.4 -kiwisolver==1.4.5 -lsprotocol==2023.0.1 -MarkupSafe==2.1.5 -matplotlib==3.9.0 -numpy==1.26.4 -packaging==24.0 -pandas==2.2.2 -pillow==10.3.0 -platformdirs==4.2.1 -pygls==1.3.1 -Pygments==2.18.0 -pyparsing==3.1.2 -pyspellchecker==0.8.1 + # via matplotlib +fonttools==4.55.0 + # via matplotlib +kiwisolver==1.4.7 + # via matplotlib +matplotlib==3.9.2 + # via -r requirements.in +numpy==2.1.3 + # via + # contourpy + # matplotlib + # pandas +packaging==24.2 + # via matplotlib +pandas==2.2.3 + # via -r requirements.in +pillow==11.0.0 + # via matplotlib +pyparsing==3.2.0 + # via matplotlib python-dateutil==2.9.0.post0 -pytz==2024.1 -requests==2.32.2 + # via + # matplotlib + # pandas +pytz==2024.2 + # via pandas six==1.16.0 -snowballstemmer==2.2.0 -Sphinx==7.3.7 -sphinxcontrib-applehelp==1.0.8 -sphinxcontrib-devhelp==1.0.6 -sphinxcontrib-htmlhelp==2.0.5 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.7 -sphinxcontrib-serializinghtml==1.1.10 -tzdata==2024.1 -urllib3==2.2.2 + # via python-dateutil +tzdata==2024.2 + # via pandas diff --git a/tests/test_aristo/test_balancer.nim b/tests/test_aristo/test_balancer.nim index 8ca6f2e77..193774a64 100644 --- a/tests/test_aristo/test_balancer.nim +++ b/tests/test_aristo/test_balancer.nim @@ -204,15 +204,15 @@ proc isDbEq(a, b: LayerRef; db: AristoDbRef; noisy = true): bool = if aKey.isValid and bKey.isValid: return false # The valid one must match the backend data - let rc = db.getKeyUbe vid + let rc = db.getKeyUbe(vid, {}) if rc.isErr: return false let key = if aKey.isValid: aKey else: bKey - if key != rc.value: + if key != rc.value[0]: return false elif not vid.isValid and not bMap.hasKey vid: - let rc = db.getKeyUbe vid + let rc = db.getKeyUbe(vid, {}) if rc.isOk: return false # Exists on backend but missing on `bMap[]` elif rc.error != GetKeyNotFound: diff --git a/tests/test_aristo/test_blobify.nim b/tests/test_aristo/test_blobify.nim index d812a1e1b..d70761443 100644 --- a/tests/test_aristo/test_blobify.nim +++ b/tests/test_aristo/test_blobify.nim @@ -10,60 +10,33 @@ {.used.} -import unittest2, ../../nimbus/db/aristo/aristo_blobify +import unittest2, std/sequtils, ../../nimbus/db/aristo/aristo_blobify suite "Aristo blobify": test "VertexRef roundtrip": let - leafAccount = VertexRef(vType: Leaf, lData: LeafPayload(pType: AccountData)) - leafStoData = - VertexRef(vType: Leaf, lData: LeafPayload(pType: StoData, stoData: 42.u256)) - branch = VertexRef( - vType: Branch, - bVid: [ - VertexID(0), - VertexID(1), - VertexID(0), - VertexID(0), - VertexID(4), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - ], + leafAccount = VertexRef( + vType: Leaf, + pfx: NibblesBuf.nibble(1), + lData: LeafPayload( + pType: AccountData, account: AristoAccount(nonce: 100, balance: 123.u256) + ), ) + leafStoData = VertexRef( + vType: Leaf, + pfx: NibblesBuf.nibble(3), + lData: LeafPayload(pType: StoData, stoData: 42.u256), + ) + branch = VertexRef(vType: Branch, startVid: VertexID(0x334452), used: 0x43'u16) extension = VertexRef( vType: Branch, pfx: NibblesBuf.nibble(2), - bVid: [ - VertexID(0), - VertexID(0), - VertexID(2), - VertexID(0), - VertexID(0), - VertexID(5), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - VertexID(0), - ], + startVid: VertexID(0x55), + used: 0x12'u16, ) - key = HashKey.fromBytes(rlp.encode([10'u64]))[] + key = HashKey.fromBytes(repeat(0x34'u8, 32))[] check: deblobify(blobify(leafAccount, key), VertexRef)[] == leafAccount diff --git a/tests/test_aristo/test_portal_proof.nim b/tests/test_aristo/test_portal_proof.nim index 020bf5b85..66591a60b 100644 --- a/tests/test_aristo/test_portal_proof.nim +++ b/tests/test_aristo/test_portal_proof.nim @@ -92,10 +92,10 @@ proc payloadAsBlob(pyl: LeafPayload; ps: PartStateRef): seq[byte] = of AccountData: let key = block: if pyl.stoID.isValid: - let rc = ps.db.getKeyRc (VertexID(1),pyl.stoID.vid) + let rc = ps.db.getKeyRc((VertexID(1),pyl.stoID.vid), {}) if rc.isErr: raiseAssert info & ": getKey => " & $rc.error - rc.value[0] + rc.value[0][0] else: VOID_HASH_KEY